1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /**
25  * @file qpu_instr.h
26  *
27  * Definitions of the unpacked form of QPU instructions.  Assembly and
28  * disassembly will use this for talking about instructions, with qpu_encode.c
29  * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU
30  * instruction.
31  */
32 
33 #ifndef QPU_INSTR_H
34 #define QPU_INSTR_H
35 
36 #include <stdbool.h>
37 #include <stdint.h>
38 #include "util/macros.h"
39 
40 struct v3d_device_info;
41 
42 struct v3d_qpu_sig {
43         bool thrsw:1;
44         bool ldunif:1;
45         bool ldunifa:1;
46         bool ldunifrf:1;
47         bool ldunifarf:1;
48         bool ldtmu:1;
49         bool ldvary:1;
50         bool ldvpm:1;
51         bool ldtlb:1;
52         bool ldtlbu:1;
53         bool small_imm:1;
54         bool ucb:1;
55         bool rotate:1;
56         bool wrtmuc:1;
57 };
58 
59 enum v3d_qpu_cond {
60         V3D_QPU_COND_NONE,
61         V3D_QPU_COND_IFA,
62         V3D_QPU_COND_IFB,
63         V3D_QPU_COND_IFNA,
64         V3D_QPU_COND_IFNB,
65 };
66 
67 enum v3d_qpu_pf {
68         V3D_QPU_PF_NONE,
69         V3D_QPU_PF_PUSHZ,
70         V3D_QPU_PF_PUSHN,
71         V3D_QPU_PF_PUSHC,
72 };
73 
74 enum v3d_qpu_uf {
75         V3D_QPU_UF_NONE,
76         V3D_QPU_UF_ANDZ,
77         V3D_QPU_UF_ANDNZ,
78         V3D_QPU_UF_NORNZ,
79         V3D_QPU_UF_NORZ,
80         V3D_QPU_UF_ANDN,
81         V3D_QPU_UF_ANDNN,
82         V3D_QPU_UF_NORNN,
83         V3D_QPU_UF_NORN,
84         V3D_QPU_UF_ANDC,
85         V3D_QPU_UF_ANDNC,
86         V3D_QPU_UF_NORNC,
87         V3D_QPU_UF_NORC,
88 };
89 
90 enum v3d_qpu_waddr {
91         V3D_QPU_WADDR_R0 = 0,
92         V3D_QPU_WADDR_R1 = 1,
93         V3D_QPU_WADDR_R2 = 2,
94         V3D_QPU_WADDR_R3 = 3,
95         V3D_QPU_WADDR_R4 = 4,
96         V3D_QPU_WADDR_R5 = 5,
97         /* 6 is reserved, but note 3.2.2.8: "Result Writes" */
98         V3D_QPU_WADDR_NOP = 6,
99         V3D_QPU_WADDR_TLB = 7,
100         V3D_QPU_WADDR_TLBU = 8,
101         V3D_QPU_WADDR_TMU = 9,
102         V3D_QPU_WADDR_TMUL = 10,
103         V3D_QPU_WADDR_TMUD = 11,
104         V3D_QPU_WADDR_TMUA = 12,
105         V3D_QPU_WADDR_TMUAU = 13,
106         V3D_QPU_WADDR_VPM = 14,
107         V3D_QPU_WADDR_VPMU = 15,
108         V3D_QPU_WADDR_SYNC = 16,
109         V3D_QPU_WADDR_SYNCU = 17,
110         V3D_QPU_WADDR_SYNCB = 18,
111         V3D_QPU_WADDR_RECIP = 19,
112         V3D_QPU_WADDR_RSQRT = 20,
113         V3D_QPU_WADDR_EXP = 21,
114         V3D_QPU_WADDR_LOG = 22,
115         V3D_QPU_WADDR_SIN = 23,
116         V3D_QPU_WADDR_RSQRT2 = 24,
117         V3D_QPU_WADDR_TMUC = 32,
118         V3D_QPU_WADDR_TMUS = 33,
119         V3D_QPU_WADDR_TMUT = 34,
120         V3D_QPU_WADDR_TMUR = 35,
121         V3D_QPU_WADDR_TMUI = 36,
122         V3D_QPU_WADDR_TMUB = 37,
123         V3D_QPU_WADDR_TMUDREF = 38,
124         V3D_QPU_WADDR_TMUOFF = 39,
125         V3D_QPU_WADDR_TMUSCM = 40,
126         V3D_QPU_WADDR_TMUSF = 41,
127         V3D_QPU_WADDR_TMUSLOD = 42,
128         V3D_QPU_WADDR_TMUHS = 43,
129         V3D_QPU_WADDR_TMUHSCM = 44,
130         V3D_QPU_WADDR_TMUHSF = 45,
131         V3D_QPU_WADDR_TMUHSLOD = 46,
132         V3D_QPU_WADDR_R5REP = 55,
133 };
134 
135 struct v3d_qpu_flags {
136         enum v3d_qpu_cond ac, mc;
137         enum v3d_qpu_pf apf, mpf;
138         enum v3d_qpu_uf auf, muf;
139 };
140 
141 enum v3d_qpu_add_op {
142         V3D_QPU_A_FADD,
143         V3D_QPU_A_FADDNF,
144         V3D_QPU_A_VFPACK,
145         V3D_QPU_A_ADD,
146         V3D_QPU_A_SUB,
147         V3D_QPU_A_FSUB,
148         V3D_QPU_A_MIN,
149         V3D_QPU_A_MAX,
150         V3D_QPU_A_UMIN,
151         V3D_QPU_A_UMAX,
152         V3D_QPU_A_SHL,
153         V3D_QPU_A_SHR,
154         V3D_QPU_A_ASR,
155         V3D_QPU_A_ROR,
156         V3D_QPU_A_FMIN,
157         V3D_QPU_A_FMAX,
158         V3D_QPU_A_VFMIN,
159         V3D_QPU_A_AND,
160         V3D_QPU_A_OR,
161         V3D_QPU_A_XOR,
162         V3D_QPU_A_VADD,
163         V3D_QPU_A_VSUB,
164         V3D_QPU_A_NOT,
165         V3D_QPU_A_NEG,
166         V3D_QPU_A_FLAPUSH,
167         V3D_QPU_A_FLBPUSH,
168         V3D_QPU_A_FLPOP,
169         V3D_QPU_A_RECIP,
170         V3D_QPU_A_SETMSF,
171         V3D_QPU_A_SETREVF,
172         V3D_QPU_A_NOP,
173         V3D_QPU_A_TIDX,
174         V3D_QPU_A_EIDX,
175         V3D_QPU_A_LR,
176         V3D_QPU_A_VFLA,
177         V3D_QPU_A_VFLNA,
178         V3D_QPU_A_VFLB,
179         V3D_QPU_A_VFLNB,
180         V3D_QPU_A_FXCD,
181         V3D_QPU_A_XCD,
182         V3D_QPU_A_FYCD,
183         V3D_QPU_A_YCD,
184         V3D_QPU_A_MSF,
185         V3D_QPU_A_REVF,
186         V3D_QPU_A_VDWWT,
187         V3D_QPU_A_IID,
188         V3D_QPU_A_SAMPID,
189         V3D_QPU_A_BARRIERID,
190         V3D_QPU_A_TMUWT,
191         V3D_QPU_A_VPMSETUP,
192         V3D_QPU_A_VPMWT,
193         V3D_QPU_A_LDVPMV_IN,
194         V3D_QPU_A_LDVPMV_OUT,
195         V3D_QPU_A_LDVPMD_IN,
196         V3D_QPU_A_LDVPMD_OUT,
197         V3D_QPU_A_LDVPMP,
198         V3D_QPU_A_RSQRT,
199         V3D_QPU_A_EXP,
200         V3D_QPU_A_LOG,
201         V3D_QPU_A_SIN,
202         V3D_QPU_A_RSQRT2,
203         V3D_QPU_A_LDVPMG_IN,
204         V3D_QPU_A_LDVPMG_OUT,
205         V3D_QPU_A_FCMP,
206         V3D_QPU_A_VFMAX,
207         V3D_QPU_A_FROUND,
208         V3D_QPU_A_FTOIN,
209         V3D_QPU_A_FTRUNC,
210         V3D_QPU_A_FTOIZ,
211         V3D_QPU_A_FFLOOR,
212         V3D_QPU_A_FTOUZ,
213         V3D_QPU_A_FCEIL,
214         V3D_QPU_A_FTOC,
215         V3D_QPU_A_FDX,
216         V3D_QPU_A_FDY,
217         V3D_QPU_A_STVPMV,
218         V3D_QPU_A_STVPMD,
219         V3D_QPU_A_STVPMP,
220         V3D_QPU_A_ITOF,
221         V3D_QPU_A_CLZ,
222         V3D_QPU_A_UTOF,
223 };
224 
225 enum v3d_qpu_mul_op {
226         V3D_QPU_M_ADD,
227         V3D_QPU_M_SUB,
228         V3D_QPU_M_UMUL24,
229         V3D_QPU_M_VFMUL,
230         V3D_QPU_M_SMUL24,
231         V3D_QPU_M_MULTOP,
232         V3D_QPU_M_FMOV,
233         V3D_QPU_M_MOV,
234         V3D_QPU_M_NOP,
235         V3D_QPU_M_FMUL,
236 };
237 
238 enum v3d_qpu_output_pack {
239         V3D_QPU_PACK_NONE,
240         /**
241          * Convert to 16-bit float, put in low 16 bits of destination leaving
242          * high unmodified.
243          */
244         V3D_QPU_PACK_L,
245         /**
246          * Convert to 16-bit float, put in high 16 bits of destination leaving
247          * low unmodified.
248          */
249         V3D_QPU_PACK_H,
250 };
251 
252 enum v3d_qpu_input_unpack {
253         /**
254          * No-op input unpacking.  Note that this enum's value doesn't match
255          * the packed QPU instruction value of the field (we use 0 so that the
256          * default on new instruction creation is no-op).
257          */
258         V3D_QPU_UNPACK_NONE,
259         /** Absolute value.  Only available for some operations. */
260         V3D_QPU_UNPACK_ABS,
261         /** Convert low 16 bits from 16-bit float to 32-bit float. */
262         V3D_QPU_UNPACK_L,
263         /** Convert high 16 bits from 16-bit float to 32-bit float. */
264         V3D_QPU_UNPACK_H,
265 
266         /** Convert to 16f and replicate it to the high bits. */
267         V3D_QPU_UNPACK_REPLICATE_32F_16,
268 
269         /** Replicate low 16 bits to high */
270         V3D_QPU_UNPACK_REPLICATE_L_16,
271 
272         /** Replicate high 16 bits to low */
273         V3D_QPU_UNPACK_REPLICATE_H_16,
274 
275         /** Swap high and low 16 bits */
276         V3D_QPU_UNPACK_SWAP_16,
277 };
278 
279 enum v3d_qpu_mux {
280         V3D_QPU_MUX_R0,
281         V3D_QPU_MUX_R1,
282         V3D_QPU_MUX_R2,
283         V3D_QPU_MUX_R3,
284         V3D_QPU_MUX_R4,
285         V3D_QPU_MUX_R5,
286         V3D_QPU_MUX_A,
287         V3D_QPU_MUX_B,
288 };
289 
290 struct v3d_qpu_alu_instr {
291         struct {
292                 enum v3d_qpu_add_op op;
293                 enum v3d_qpu_mux a, b;
294                 uint8_t waddr;
295                 bool magic_write;
296                 enum v3d_qpu_output_pack output_pack;
297                 enum v3d_qpu_input_unpack a_unpack;
298                 enum v3d_qpu_input_unpack b_unpack;
299         } add;
300 
301         struct {
302                 enum v3d_qpu_mul_op op;
303                 enum v3d_qpu_mux a, b;
304                 uint8_t waddr;
305                 bool magic_write;
306                 enum v3d_qpu_output_pack output_pack;
307                 enum v3d_qpu_input_unpack a_unpack;
308                 enum v3d_qpu_input_unpack b_unpack;
309         } mul;
310 };
311 
312 enum v3d_qpu_branch_cond {
313         V3D_QPU_BRANCH_COND_ALWAYS,
314         V3D_QPU_BRANCH_COND_A0,
315         V3D_QPU_BRANCH_COND_NA0,
316         V3D_QPU_BRANCH_COND_ALLA,
317         V3D_QPU_BRANCH_COND_ANYNA,
318         V3D_QPU_BRANCH_COND_ANYA,
319         V3D_QPU_BRANCH_COND_ALLNA,
320 };
321 
322 enum v3d_qpu_msfign {
323         /** Ignore multisample flags when determining branch condition. */
324         V3D_QPU_MSFIGN_NONE,
325         /**
326          * If no multisample flags are set in the lane (a pixel in the FS, a
327          * vertex in the VS), ignore the lane's condition when computing the
328          * branch condition.
329          */
330         V3D_QPU_MSFIGN_P,
331         /**
332          * If no multisample flags are set in a 2x2 quad in the FS, ignore the
333          * quad's a/b conditions.
334          */
335         V3D_QPU_MSFIGN_Q,
336 };
337 
338 enum v3d_qpu_branch_dest {
339         V3D_QPU_BRANCH_DEST_ABS,
340         V3D_QPU_BRANCH_DEST_REL,
341         V3D_QPU_BRANCH_DEST_LINK_REG,
342         V3D_QPU_BRANCH_DEST_REGFILE,
343 };
344 
345 struct v3d_qpu_branch_instr {
346         enum v3d_qpu_branch_cond cond;
347         enum v3d_qpu_msfign msfign;
348 
349         /** Selects how to compute the new IP if the branch is taken. */
350         enum v3d_qpu_branch_dest bdi;
351 
352         /**
353          * Selects how to compute the new uniforms pointer if the branch is
354          * taken.  (ABS/REL implicitly load a uniform and use that)
355          */
356         enum v3d_qpu_branch_dest bdu;
357 
358         /**
359          * If set, then udest determines how the uniform stream will branch,
360          * otherwise the uniform stream is left as is.
361          */
362         bool ub;
363 
364         uint8_t raddr_a;
365 
366         uint32_t offset;
367 };
368 
369 enum v3d_qpu_instr_type {
370         V3D_QPU_INSTR_TYPE_ALU,
371         V3D_QPU_INSTR_TYPE_BRANCH,
372 };
373 
374 struct v3d_qpu_instr {
375         enum v3d_qpu_instr_type type;
376 
377         struct v3d_qpu_sig sig;
378         uint8_t sig_addr;
379         bool sig_magic; /* If the signal writes to a magic address */
380         uint8_t raddr_a;
381         uint8_t raddr_b;
382         struct v3d_qpu_flags flags;
383 
384         union {
385                 struct v3d_qpu_alu_instr alu;
386                 struct v3d_qpu_branch_instr branch;
387         };
388 };
389 
390 const char *v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr);
391 const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op);
392 const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op);
393 const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond);
394 const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf);
395 const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf);
396 const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack);
397 const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack);
398 const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond);
399 const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign);
400 
401 enum v3d_qpu_cond v3d_qpu_cond_invert(enum v3d_qpu_cond cond) ATTRIBUTE_CONST;
402 
403 bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op);
404 bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op);
405 int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op);
406 int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op);
407 
408 bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
409                       const struct v3d_qpu_sig *sig,
410                       uint32_t *packed_sig);
411 bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
412                         uint32_t packed_sig,
413                         struct v3d_qpu_sig *sig);
414 
415 bool
416 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
417                    const struct v3d_qpu_flags *cond,
418                    uint32_t *packed_cond);
419 bool
420 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
421                      uint32_t packed_cond,
422                      struct v3d_qpu_flags *cond);
423 
424 bool
425 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
426                        uint32_t value,
427                        uint32_t *packed_small_immediate);
428 
429 bool
430 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
431                          uint32_t packed_small_immediate,
432                          uint32_t *small_immediate);
433 
434 bool
435 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
436                    const struct v3d_qpu_instr *instr,
437                    uint64_t *packed_instr);
438 bool
439 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
440                      uint64_t packed_instr,
441                      struct v3d_qpu_instr *instr);
442 
443 bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
444 bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
445 bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
446 bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
447 bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
448 bool v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST;
449 bool v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
450 bool v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
451 bool v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
452 bool v3d_qpu_writes_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
453 bool v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
454 bool v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
455                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
456 bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
457                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
458 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
459                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
460 bool v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
461 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
462 bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
463 bool v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
464 bool v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
465 bool v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
466 bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
467 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
468 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
469                                 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
470 bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
471 bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
472 
473 #endif
474