1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29 
30 #ifndef QPU_MASK
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field)                                       \
34         ({                                                                \
35                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36                 assert((fieldval & ~ field ## _MASK) == 0);               \
37                 fieldval & field ## _MASK;                                \
38          })
39 
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
41 
42 #define QPU_UPDATE_FIELD(inst, value, field)                              \
43         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44 #endif /* QPU_MASK */
45 
46 #define VC5_QPU_OP_MUL_SHIFT                58
47 #define VC5_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
48 
49 #define VC5_QPU_SIG_SHIFT                   53
50 #define VC5_QPU_SIG_MASK                    QPU_MASK(57, 53)
51 
52 #define VC5_QPU_COND_SHIFT                  46
53 #define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
54 #define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
55 
56 #define VC5_QPU_MM                          QPU_MASK(45, 45)
57 #define VC5_QPU_MA                          QPU_MASK(44, 44)
58 
59 #define V3D_QPU_WADDR_M_SHIFT               38
60 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
61 
62 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT       35
63 #define VC5_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
64 
65 #define V3D_QPU_WADDR_A_SHIFT               32
66 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
67 
68 #define VC5_QPU_BRANCH_COND_SHIFT           32
69 #define VC5_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
70 
71 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT      24
72 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
73 
74 #define VC5_QPU_OP_ADD_SHIFT                24
75 #define VC5_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
76 
77 #define VC5_QPU_MUL_B_SHIFT                 21
78 #define VC5_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
79 
80 #define VC5_QPU_BRANCH_MSFIGN_SHIFT         21
81 #define VC5_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
82 
83 #define VC5_QPU_MUL_A_SHIFT                 18
84 #define VC5_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
85 
86 #define VC5_QPU_ADD_B_SHIFT                 15
87 #define VC5_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
88 
89 #define VC5_QPU_BRANCH_BDU_SHIFT            15
90 #define VC5_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
91 
92 #define VC5_QPU_BRANCH_UB                   QPU_MASK(14, 14)
93 
94 #define VC5_QPU_ADD_A_SHIFT                 12
95 #define VC5_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
96 
97 #define VC5_QPU_BRANCH_BDI_SHIFT            12
98 #define VC5_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
99 
100 #define VC5_QPU_RADDR_A_SHIFT               6
101 #define VC5_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
102 
103 #define VC5_QPU_RADDR_B_SHIFT               0
104 #define VC5_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
105 
106 #define THRSW .thrsw = true
107 #define LDUNIF .ldunif = true
108 #define LDUNIFRF .ldunifrf = true
109 #define LDUNIFA .ldunifa = true
110 #define LDUNIFARF .ldunifarf = true
111 #define LDTMU .ldtmu = true
112 #define LDVARY .ldvary = true
113 #define LDVPM .ldvpm = true
114 #define SMIMM .small_imm = true
115 #define LDTLB .ldtlb = true
116 #define LDTLBU .ldtlbu = true
117 #define UCB .ucb = true
118 #define ROT .rotate = true
119 #define WRTMUC .wrtmuc = true
120 
121 static const struct v3d_qpu_sig v33_sig_map[] = {
122         /*      MISC   R3       R4      R5 */
123         [0]  = {                               },
124         [1]  = { THRSW,                        },
125         [2]  = {                        LDUNIF },
126         [3]  = { THRSW,                 LDUNIF },
127         [4]  = {                LDTMU,         },
128         [5]  = { THRSW,         LDTMU,         },
129         [6]  = {                LDTMU,  LDUNIF },
130         [7]  = { THRSW,         LDTMU,  LDUNIF },
131         [8]  = {        LDVARY,                },
132         [9]  = { THRSW, LDVARY,                },
133         [10] = {        LDVARY,         LDUNIF },
134         [11] = { THRSW, LDVARY,         LDUNIF },
135         [12] = {        LDVARY, LDTMU,         },
136         [13] = { THRSW, LDVARY, LDTMU,         },
137         [14] = { SMIMM, LDVARY,                },
138         [15] = { SMIMM,                        },
139         [16] = {        LDTLB,                 },
140         [17] = {        LDTLBU,                },
141         /* 18-21 reserved */
142         [22] = { UCB,                          },
143         [23] = { ROT,                          },
144         [24] = {        LDVPM,                 },
145         [25] = { THRSW, LDVPM,                 },
146         [26] = {        LDVPM,          LDUNIF },
147         [27] = { THRSW, LDVPM,          LDUNIF },
148         [28] = {        LDVPM, LDTMU,          },
149         [29] = { THRSW, LDVPM, LDTMU,          },
150         [30] = { SMIMM, LDVPM,                 },
151         [31] = { SMIMM,                        },
152 };
153 
154 static const struct v3d_qpu_sig v40_sig_map[] = {
155         /*      MISC    R3      R4      R5 */
156         [0]  = {                               },
157         [1]  = { THRSW,                        },
158         [2]  = {                        LDUNIF },
159         [3]  = { THRSW,                 LDUNIF },
160         [4]  = {                LDTMU,         },
161         [5]  = { THRSW,         LDTMU,         },
162         [6]  = {                LDTMU,  LDUNIF },
163         [7]  = { THRSW,         LDTMU,  LDUNIF },
164         [8]  = {        LDVARY,                },
165         [9]  = { THRSW, LDVARY,                },
166         [10] = {        LDVARY,         LDUNIF },
167         [11] = { THRSW, LDVARY,         LDUNIF },
168         /* 12-13 reserved */
169         [14] = { SMIMM, LDVARY,                },
170         [15] = { SMIMM,                        },
171         [16] = {        LDTLB,                 },
172         [17] = {        LDTLBU,                },
173         [18] = {                        WRTMUC },
174         [19] = { THRSW,                 WRTMUC },
175         [20] = {        LDVARY,         WRTMUC },
176         [21] = { THRSW, LDVARY,         WRTMUC },
177         [22] = { UCB,                          },
178         [23] = { ROT,                          },
179         /* 24-30 reserved */
180         [31] = { SMIMM,         LDTMU,         },
181 };
182 
183 static const struct v3d_qpu_sig v41_sig_map[] = {
184         /*      MISC       phys    R5 */
185         [0]  = {                          },
186         [1]  = { THRSW,                   },
187         [2]  = {                   LDUNIF },
188         [3]  = { THRSW,            LDUNIF },
189         [4]  = {           LDTMU,         },
190         [5]  = { THRSW,    LDTMU,         },
191         [6]  = {           LDTMU,  LDUNIF },
192         [7]  = { THRSW,    LDTMU,  LDUNIF },
193         [8]  = {           LDVARY,        },
194         [9]  = { THRSW,    LDVARY,        },
195         [10] = {           LDVARY, LDUNIF },
196         [11] = { THRSW,    LDVARY, LDUNIF },
197         [12] = { LDUNIFRF                 },
198         [13] = { THRSW,    LDUNIFRF       },
199         [14] = { SMIMM,    LDVARY,        },
200         [15] = { SMIMM,                   },
201         [16] = {           LDTLB,         },
202         [17] = {           LDTLBU,        },
203         [18] = {                          WRTMUC },
204         [19] = { THRSW,                   WRTMUC },
205         [20] = {           LDVARY,        WRTMUC },
206         [21] = { THRSW,    LDVARY,        WRTMUC },
207         [22] = { UCB,                     },
208         [23] = { ROT,                     },
209         /* 24-30 reserved */
210         [24] = {                   LDUNIFA},
211         [25] = { LDUNIFARF                },
212         [31] = { SMIMM,            LDTMU, },
213 };
214 
215 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)216 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
217                    uint32_t packed_sig,
218                    struct v3d_qpu_sig *sig)
219 {
220         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
221                 return false;
222 
223         if (devinfo->ver >= 41)
224                 *sig = v41_sig_map[packed_sig];
225         else if (devinfo->ver == 40)
226                 *sig = v40_sig_map[packed_sig];
227         else
228                 *sig = v33_sig_map[packed_sig];
229 
230         /* Signals with zeroed unpacked contents after element 0 are reserved. */
231         return (packed_sig == 0 ||
232                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
233 }
234 
235 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)236 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
237                  const struct v3d_qpu_sig *sig,
238                  uint32_t *packed_sig)
239 {
240         static const struct v3d_qpu_sig *map;
241 
242         if (devinfo->ver >= 41)
243                 map = v41_sig_map;
244         else if (devinfo->ver == 40)
245                 map = v40_sig_map;
246         else
247                 map = v33_sig_map;
248 
249         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
250                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
251                         *packed_sig = i;
252                         return true;
253                 }
254         }
255 
256         return false;
257 }
258 static inline unsigned
fui(float f)259 fui( float f )
260 {
261         union {float f; unsigned ui;} fi;
262    fi.f = f;
263    return fi.ui;
264 }
265 
266 static const uint32_t small_immediates[] = {
267         0, 1, 2, 3,
268         4, 5, 6, 7,
269         8, 9, 10, 11,
270         12, 13, 14, 15,
271         -16, -15, -14, -13,
272         -12, -11, -10, -9,
273         -8, -7, -6, -5,
274         -4, -3, -2, -1,
275         0x3b800000, /* 2.0^-8 */
276         0x3c000000, /* 2.0^-7 */
277         0x3c800000, /* 2.0^-6 */
278         0x3d000000, /* 2.0^-5 */
279         0x3d800000, /* 2.0^-4 */
280         0x3e000000, /* 2.0^-3 */
281         0x3e800000, /* 2.0^-2 */
282         0x3f000000, /* 2.0^-1 */
283         0x3f800000, /* 2.0^0 */
284         0x40000000, /* 2.0^1 */
285         0x40800000, /* 2.0^2 */
286         0x41000000, /* 2.0^3 */
287         0x41800000, /* 2.0^4 */
288         0x42000000, /* 2.0^5 */
289         0x42800000, /* 2.0^6 */
290         0x43000000, /* 2.0^7 */
291 };
292 
293 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)294 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
295                          uint32_t packed_small_immediate,
296                          uint32_t *small_immediate)
297 {
298         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
299                 return false;
300 
301         *small_immediate = small_immediates[packed_small_immediate];
302         return true;
303 }
304 
305 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)306 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
307                        uint32_t value,
308                        uint32_t *packed_small_immediate)
309 {
310         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
311 
312         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
313                 if (small_immediates[i] == value) {
314                         *packed_small_immediate = i;
315                         return true;
316                 }
317         }
318 
319         return false;
320 }
321 
322 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)323 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
324                      uint32_t packed_cond,
325                      struct v3d_qpu_flags *cond)
326 {
327         static const enum v3d_qpu_cond cond_map[4] = {
328                 [0] = V3D_QPU_COND_IFA,
329                 [1] = V3D_QPU_COND_IFB,
330                 [2] = V3D_QPU_COND_IFNA,
331                 [3] = V3D_QPU_COND_IFNB,
332         };
333 
334         cond->ac = V3D_QPU_COND_NONE;
335         cond->mc = V3D_QPU_COND_NONE;
336         cond->apf = V3D_QPU_PF_NONE;
337         cond->mpf = V3D_QPU_PF_NONE;
338         cond->auf = V3D_QPU_UF_NONE;
339         cond->muf = V3D_QPU_UF_NONE;
340 
341         if (packed_cond == 0) {
342                 return true;
343         } else if (packed_cond >> 2 == 0) {
344                 cond->apf = packed_cond & 0x3;
345         } else if (packed_cond >> 4 == 0) {
346                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
347         } else if (packed_cond == 0x10) {
348                 return false;
349         } else if (packed_cond >> 2 == 0x4) {
350                 cond->mpf = packed_cond & 0x3;
351         } else if (packed_cond >> 4 == 0x1) {
352                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
353         } else if (packed_cond >> 4 == 0x2) {
354                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
355                 cond->mpf = packed_cond & 0x3;
356         } else if (packed_cond >> 4 == 0x3) {
357                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
358                 cond->apf = packed_cond & 0x3;
359         } else if (packed_cond >> 6) {
360                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
361                 if (((packed_cond >> 2) & 0x3) == 0) {
362                         cond->ac = cond_map[packed_cond & 0x3];
363                 } else {
364                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
365                 }
366         }
367 
368         return true;
369 }
370 
371 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)372 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
373                    const struct v3d_qpu_flags *cond,
374                    uint32_t *packed_cond)
375 {
376 #define AC (1 << 0)
377 #define MC (1 << 1)
378 #define APF (1 << 2)
379 #define MPF (1 << 3)
380 #define AUF (1 << 4)
381 #define MUF (1 << 5)
382         static const struct {
383                 uint8_t flags_present;
384                 uint8_t bits;
385         } flags_table[] = {
386                 { 0,        0 },
387                 { APF,      0 },
388                 { AUF,      0 },
389                 { MPF,      (1 << 4) },
390                 { MUF,      (1 << 4) },
391                 { AC,       (1 << 5) },
392                 { AC | MPF, (1 << 5) },
393                 { MC,       (1 << 5) | (1 << 4) },
394                 { MC | APF, (1 << 5) | (1 << 4) },
395                 { MC | AC,  (1 << 6) },
396                 { MC | AUF, (1 << 6) },
397         };
398 
399         uint8_t flags_present = 0;
400         if (cond->ac != V3D_QPU_COND_NONE)
401                 flags_present |= AC;
402         if (cond->mc != V3D_QPU_COND_NONE)
403                 flags_present |= MC;
404         if (cond->apf != V3D_QPU_PF_NONE)
405                 flags_present |= APF;
406         if (cond->mpf != V3D_QPU_PF_NONE)
407                 flags_present |= MPF;
408         if (cond->auf != V3D_QPU_UF_NONE)
409                 flags_present |= AUF;
410         if (cond->muf != V3D_QPU_UF_NONE)
411                 flags_present |= MUF;
412 
413         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
414                 if (flags_table[i].flags_present != flags_present)
415                         continue;
416 
417                 *packed_cond = flags_table[i].bits;
418 
419                 *packed_cond |= cond->apf;
420                 *packed_cond |= cond->mpf;
421 
422                 if (flags_present & AUF)
423                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
424                 if (flags_present & MUF)
425                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
426 
427                 if (flags_present & AC)
428                         *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
429 
430                 if (flags_present & MC) {
431                         if (*packed_cond & (1 << 6))
432                                 *packed_cond |= (cond->mc -
433                                                  V3D_QPU_COND_IFA) << 4;
434                         else
435                                 *packed_cond |= (cond->mc -
436                                                  V3D_QPU_COND_IFA) << 2;
437                 }
438 
439                 return true;
440         }
441 
442         return false;
443 }
444 
445 /* Make a mapping of the table of opcodes in the spec.  The opcode is
446  * determined by a combination of the opcode field, and in the case of 0 or
447  * 1-arg opcodes, the mux_b field as well.
448  */
449 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
450 #define ANYMUX MUX_MASK(0, 7)
451 
452 struct opcode_desc {
453         uint8_t opcode_first;
454         uint8_t opcode_last;
455         uint8_t mux_b_mask;
456         uint8_t mux_a_mask;
457         uint8_t op;
458         /* 0 if it's the same across V3D versions, or a specific V3D version. */
459         uint8_t ver;
460 };
461 
462 static const struct opcode_desc add_ops[] = {
463         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
464         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
465         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
466         { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
467         { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
468         { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
469         { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
470         { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
471         { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
472         { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
473         { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
474         { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
475         { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
476         { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
477         { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
478         { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
479         { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
480         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
481         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
482         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
483         { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
484 
485         { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
486         { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
487         { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
488 
489         { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
490         { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
491         { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
492         { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
493         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
494         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
495         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
496         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
497         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
498         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
499         { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
500         { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
501         { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
502         { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
503         { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
504         { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
505         { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
506 
507         { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
508         { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
509         { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
510         { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
511 
512         { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
513         { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
514         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
515         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
516         { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
517         { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_PATCHID, 40 },
518         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
519         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
520 
521         { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
522         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
523         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
524         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
525         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
526 
527         /* FIXME: MORE COMPLICATED */
528         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
529 
530         { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
531         { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
532 
533         { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
534         { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
535         { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
536         { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
537         { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
538         { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
539         { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
540         { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
541 
542         { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
543         { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
544 
545         /* The stvpms are distinguished by the waddr field. */
546         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
547         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
548         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
549 
550         { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
551         { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
552         { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
553 };
554 
555 static const struct opcode_desc mul_ops[] = {
556         { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
557         { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
558         { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
559         { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
560         { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
561         { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
562         { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
563         { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
564         { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
565         { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
566         { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
567 };
568 
569 static const struct opcode_desc *
lookup_opcode(const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)570 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
571               uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
572 {
573         for (int i = 0; i < num_opcodes; i++) {
574                 const struct opcode_desc *op_desc = &opcodes[i];
575 
576                 if (opcode < op_desc->opcode_first ||
577                     opcode > op_desc->opcode_last)
578                         continue;
579 
580                 if (!(op_desc->mux_b_mask & (1 << mux_b)))
581                         continue;
582 
583                 if (!(op_desc->mux_a_mask & (1 << mux_a)))
584                         continue;
585 
586                 return op_desc;
587         }
588 
589         return NULL;
590 }
591 
592 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)593 v3d_qpu_float32_unpack_unpack(uint32_t packed,
594                               enum v3d_qpu_input_unpack *unpacked)
595 {
596         switch (packed) {
597         case 0:
598                 *unpacked = V3D_QPU_UNPACK_ABS;
599                 return true;
600         case 1:
601                 *unpacked = V3D_QPU_UNPACK_NONE;
602                 return true;
603         case 2:
604                 *unpacked = V3D_QPU_UNPACK_L;
605                 return true;
606         case 3:
607                 *unpacked = V3D_QPU_UNPACK_H;
608                 return true;
609         default:
610                 return false;
611         }
612 }
613 
614 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)615 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
616                             uint32_t *packed)
617 {
618         switch (unpacked) {
619         case V3D_QPU_UNPACK_ABS:
620                 *packed = 0;
621                 return true;
622         case V3D_QPU_UNPACK_NONE:
623                 *packed = 1;
624                 return true;
625         case V3D_QPU_UNPACK_L:
626                 *packed = 2;
627                 return true;
628         case V3D_QPU_UNPACK_H:
629                 *packed = 3;
630                 return true;
631         default:
632                 return false;
633         }
634 }
635 
636 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)637 v3d_qpu_float16_unpack_unpack(uint32_t packed,
638                               enum v3d_qpu_input_unpack *unpacked)
639 {
640         switch (packed) {
641         case 0:
642                 *unpacked = V3D_QPU_UNPACK_NONE;
643                 return true;
644         case 1:
645                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
646                 return true;
647         case 2:
648                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
649                 return true;
650         case 3:
651                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
652                 return true;
653         case 4:
654                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
655                 return true;
656         default:
657                 return false;
658         }
659 }
660 
661 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)662 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
663                             uint32_t *packed)
664 {
665         switch (unpacked) {
666         case V3D_QPU_UNPACK_NONE:
667                 *packed = 0;
668                 return true;
669         case V3D_QPU_UNPACK_REPLICATE_32F_16:
670                 *packed = 1;
671                 return true;
672         case V3D_QPU_UNPACK_REPLICATE_L_16:
673                 *packed = 2;
674                 return true;
675         case V3D_QPU_UNPACK_REPLICATE_H_16:
676                 *packed = 3;
677                 return true;
678         case V3D_QPU_UNPACK_SWAP_16:
679                 *packed = 4;
680                 return true;
681         default:
682                 return false;
683         }
684 }
685 
686 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)687 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
688                           uint32_t *packed)
689 {
690         switch (unpacked) {
691         case V3D_QPU_PACK_NONE:
692                 *packed = 0;
693                 return true;
694         case V3D_QPU_PACK_L:
695                 *packed = 1;
696                 return true;
697         case V3D_QPU_PACK_H:
698                 *packed = 2;
699                 return true;
700         default:
701                 return false;
702         }
703 }
704 
705 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)706 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
707                    struct v3d_qpu_instr *instr)
708 {
709         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
710         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
711         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
712         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
713 
714         uint32_t map_op = op;
715         /* Some big clusters of opcodes are replicated with unpack
716          * flags
717          */
718         if (map_op >= 249 && map_op <= 251)
719                 map_op = (map_op - 249 + 245);
720         if (map_op >= 253 && map_op <= 255)
721                 map_op = (map_op - 253 + 245);
722 
723         const struct opcode_desc *desc =
724                 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
725                               map_op, mux_a, mux_b);
726         if (!desc)
727                 return false;
728 
729         instr->alu.add.op = desc->op;
730 
731         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
732          * operands.
733          */
734         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
735                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
736                         instr->alu.add.op = V3D_QPU_A_FMAX;
737                 if (instr->alu.add.op == V3D_QPU_A_FADD)
738                         instr->alu.add.op = V3D_QPU_A_FADDNF;
739         }
740 
741         /* Some QPU ops require a bit more than just basic opcode and mux a/b
742          * comparisons to distinguish them.
743          */
744         switch (instr->alu.add.op) {
745         case V3D_QPU_A_STVPMV:
746         case V3D_QPU_A_STVPMD:
747         case V3D_QPU_A_STVPMP:
748                 switch (waddr) {
749                 case 0:
750                         instr->alu.add.op = V3D_QPU_A_STVPMV;
751                         break;
752                 case 1:
753                         instr->alu.add.op = V3D_QPU_A_STVPMD;
754                         break;
755                 case 2:
756                         instr->alu.add.op = V3D_QPU_A_STVPMP;
757                         break;
758                 default:
759                         return false;
760                 }
761                 break;
762         default:
763                 break;
764         }
765 
766         switch (instr->alu.add.op) {
767         case V3D_QPU_A_FADD:
768         case V3D_QPU_A_FADDNF:
769         case V3D_QPU_A_FSUB:
770         case V3D_QPU_A_FMIN:
771         case V3D_QPU_A_FMAX:
772         case V3D_QPU_A_FCMP:
773                 instr->alu.add.output_pack = (op >> 4) & 0x3;
774 
775                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
776                                                    &instr->alu.add.a_unpack)) {
777                         return false;
778                 }
779 
780                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
781                                                    &instr->alu.add.b_unpack)) {
782                         return false;
783                 }
784                 break;
785 
786         case V3D_QPU_A_FFLOOR:
787         case V3D_QPU_A_FROUND:
788         case V3D_QPU_A_FTRUNC:
789         case V3D_QPU_A_FCEIL:
790         case V3D_QPU_A_FDX:
791         case V3D_QPU_A_FDY:
792                 instr->alu.add.output_pack = mux_b & 0x3;
793 
794                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
795                                                    &instr->alu.add.a_unpack)) {
796                         return false;
797                 }
798                 break;
799 
800         case V3D_QPU_A_FTOIN:
801         case V3D_QPU_A_FTOIZ:
802         case V3D_QPU_A_FTOUZ:
803         case V3D_QPU_A_FTOC:
804                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
805 
806                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
807                                                    &instr->alu.add.a_unpack)) {
808                         return false;
809                 }
810                 break;
811 
812         case V3D_QPU_A_VFMIN:
813         case V3D_QPU_A_VFMAX:
814                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
815                                                    &instr->alu.add.a_unpack)) {
816                         return false;
817                 }
818 
819                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
820                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
821                 break;
822 
823         default:
824                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
825                 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
826                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
827                 break;
828         }
829 
830         instr->alu.add.a = mux_a;
831         instr->alu.add.b = mux_b;
832         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
833 
834         instr->alu.add.magic_write = false;
835         if (packed_inst & VC5_QPU_MA) {
836                 switch (instr->alu.add.op) {
837                 case V3D_QPU_A_LDVPMV_IN:
838                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
839                         break;
840                 case V3D_QPU_A_LDVPMD_IN:
841                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
842                         break;
843                 case V3D_QPU_A_LDVPMG_IN:
844                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
845                         break;
846                 default:
847                         instr->alu.add.magic_write = true;
848                         break;
849                 }
850         }
851 
852         return true;
853 }
854 
855 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)856 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
857                    struct v3d_qpu_instr *instr)
858 {
859         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
860         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
861         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
862 
863         {
864                 const struct opcode_desc *desc =
865                         lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
866                                       op, mux_a, mux_b);
867                 if (!desc)
868                         return false;
869 
870                 instr->alu.mul.op = desc->op;
871         }
872 
873         switch (instr->alu.mul.op) {
874         case V3D_QPU_M_FMUL:
875                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
876 
877                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
878                                                    &instr->alu.mul.a_unpack)) {
879                         return false;
880                 }
881 
882                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
883                                                    &instr->alu.mul.b_unpack)) {
884                         return false;
885                 }
886 
887                 break;
888 
889         case V3D_QPU_M_FMOV:
890                 instr->alu.mul.output_pack = (((op & 1) << 1) +
891                                               ((mux_b >> 2) & 1));
892 
893                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
894                                                    &instr->alu.mul.a_unpack)) {
895                         return false;
896                 }
897 
898                 break;
899 
900         case V3D_QPU_M_VFMUL:
901                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
902 
903                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
904                                                    &instr->alu.mul.a_unpack)) {
905                         return false;
906                 }
907 
908                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
909 
910                 break;
911 
912         default:
913                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
914                 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
915                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
916                 break;
917         }
918 
919         instr->alu.mul.a = mux_a;
920         instr->alu.mul.b = mux_b;
921         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
922         instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
923 
924         return true;
925 }
926 
927 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)928 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
929                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
930 {
931         uint32_t waddr = instr->alu.add.waddr;
932         uint32_t mux_a = instr->alu.add.a;
933         uint32_t mux_b = instr->alu.add.b;
934         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
935         const struct opcode_desc *desc;
936 
937         int opcode;
938         for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
939              desc++) {
940                 if (desc->op == instr->alu.add.op)
941                         break;
942         }
943         if (desc == &add_ops[ARRAY_SIZE(add_ops)])
944                 return false;
945 
946         opcode = desc->opcode_first;
947 
948         /* If an operation doesn't use an arg, its mux values may be used to
949          * identify the operation type.
950          */
951         if (nsrc < 2)
952                 mux_b = ffs(desc->mux_b_mask) - 1;
953 
954         if (nsrc < 1)
955                 mux_a = ffs(desc->mux_a_mask) - 1;
956 
957         bool no_magic_write = false;
958 
959         switch (instr->alu.add.op) {
960         case V3D_QPU_A_STVPMV:
961                 waddr = 0;
962                 no_magic_write = true;
963                 break;
964         case V3D_QPU_A_STVPMD:
965                 waddr = 1;
966                 no_magic_write = true;
967                 break;
968         case V3D_QPU_A_STVPMP:
969                 waddr = 2;
970                 no_magic_write = true;
971                 break;
972 
973         case V3D_QPU_A_LDVPMV_IN:
974         case V3D_QPU_A_LDVPMD_IN:
975         case V3D_QPU_A_LDVPMP:
976         case V3D_QPU_A_LDVPMG_IN:
977                 assert(!instr->alu.add.magic_write);
978                 break;
979 
980         case V3D_QPU_A_LDVPMV_OUT:
981         case V3D_QPU_A_LDVPMD_OUT:
982         case V3D_QPU_A_LDVPMG_OUT:
983                 assert(!instr->alu.add.magic_write);
984                 *packed_instr |= VC5_QPU_MA;
985                 break;
986 
987         default:
988                 break;
989         }
990 
991         switch (instr->alu.add.op) {
992         case V3D_QPU_A_FADD:
993         case V3D_QPU_A_FADDNF:
994         case V3D_QPU_A_FSUB:
995         case V3D_QPU_A_FMIN:
996         case V3D_QPU_A_FMAX:
997         case V3D_QPU_A_FCMP: {
998                 uint32_t output_pack;
999                 uint32_t a_unpack;
1000                 uint32_t b_unpack;
1001 
1002                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1003                                                &output_pack)) {
1004                         return false;
1005                 }
1006                 opcode |= output_pack << 4;
1007 
1008                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1009                                                  &a_unpack)) {
1010                         return false;
1011                 }
1012 
1013                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1014                                                  &b_unpack)) {
1015                         return false;
1016                 }
1017 
1018                 /* These operations with commutative operands are
1019                  * distinguished by which order their operands come in.
1020                  */
1021                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1022                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1023                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1024                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1025                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1026                         uint32_t temp;
1027 
1028                         temp = a_unpack;
1029                         a_unpack = b_unpack;
1030                         b_unpack = temp;
1031 
1032                         temp = mux_a;
1033                         mux_a = mux_b;
1034                         mux_b = temp;
1035                 }
1036 
1037                 opcode |= a_unpack << 2;
1038                 opcode |= b_unpack << 0;
1039                 break;
1040         }
1041 
1042         case V3D_QPU_A_FFLOOR:
1043         case V3D_QPU_A_FROUND:
1044         case V3D_QPU_A_FTRUNC:
1045         case V3D_QPU_A_FCEIL:
1046         case V3D_QPU_A_FDX:
1047         case V3D_QPU_A_FDY: {
1048                 uint32_t packed;
1049 
1050                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1051                                                &packed)) {
1052                         return false;
1053                 }
1054                 mux_b |= packed;
1055 
1056                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1057                                                  &packed)) {
1058                         return false;
1059                 }
1060                 if (packed == 0)
1061                         return false;
1062                 opcode |= packed << 2;
1063                 break;
1064         }
1065 
1066         case V3D_QPU_A_FTOIN:
1067         case V3D_QPU_A_FTOIZ:
1068         case V3D_QPU_A_FTOUZ:
1069         case V3D_QPU_A_FTOC:
1070                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1071                         return false;
1072 
1073                 uint32_t packed;
1074                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1075                                                  &packed)) {
1076                         return false;
1077                 }
1078                 if (packed == 0)
1079                         return false;
1080                 opcode |= packed << 2;
1081 
1082                 break;
1083 
1084         case V3D_QPU_A_VFMIN:
1085         case V3D_QPU_A_VFMAX:
1086                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1087                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1088                         return false;
1089                 }
1090 
1091                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1092                                                  &packed)) {
1093                         return false;
1094                 }
1095                 opcode |= packed;
1096                 break;
1097 
1098         default:
1099                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1100                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1101                      instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1102                      instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1103                         return false;
1104                 }
1105                 break;
1106         }
1107 
1108         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1109         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1110         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1111         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1112         if (instr->alu.add.magic_write && !no_magic_write)
1113                 *packed_instr |= VC5_QPU_MA;
1114 
1115         return true;
1116 }
1117 
1118 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1119 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1120                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1121 {
1122         uint32_t mux_a = instr->alu.mul.a;
1123         uint32_t mux_b = instr->alu.mul.b;
1124         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1125         const struct opcode_desc *desc;
1126 
1127         for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1128              desc++) {
1129                 if (desc->op == instr->alu.mul.op)
1130                         break;
1131         }
1132         if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1133                 return false;
1134 
1135         uint32_t opcode = desc->opcode_first;
1136 
1137         /* Some opcodes have a single valid value for their mux a/b, so set
1138          * that here.  If mux a/b determine packing, it will be set below.
1139          */
1140         if (nsrc < 2)
1141                 mux_b = ffs(desc->mux_b_mask) - 1;
1142 
1143         if (nsrc < 1)
1144                 mux_a = ffs(desc->mux_a_mask) - 1;
1145 
1146         switch (instr->alu.mul.op) {
1147         case V3D_QPU_M_FMUL: {
1148                 uint32_t packed;
1149 
1150                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1151                                                &packed)) {
1152                         return false;
1153                 }
1154                 /* No need for a +1 because desc->opcode_first has a 1 in this
1155                  * field.
1156                  */
1157                 opcode += packed << 4;
1158 
1159                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1160                                                  &packed)) {
1161                         return false;
1162                 }
1163                 opcode |= packed << 2;
1164 
1165                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1166                                                  &packed)) {
1167                         return false;
1168                 }
1169                 opcode |= packed << 0;
1170                 break;
1171         }
1172 
1173         case V3D_QPU_M_FMOV: {
1174                 uint32_t packed;
1175 
1176                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1177                                                &packed)) {
1178                         return false;
1179                 }
1180                 opcode |= (packed >> 1) & 1;
1181                 mux_b = (packed & 1) << 2;
1182 
1183                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1184                                                  &packed)) {
1185                         return false;
1186                 }
1187                 mux_b |= packed;
1188                 break;
1189         }
1190 
1191         case V3D_QPU_M_VFMUL: {
1192                 uint32_t packed;
1193 
1194                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1195                         return false;
1196 
1197                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1198                                                  &packed)) {
1199                         return false;
1200                 }
1201                 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1202                         opcode = 8;
1203                 else
1204                         opcode |= (packed + 4) & 7;
1205 
1206                 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1207                         return false;
1208 
1209                 break;
1210         }
1211 
1212         default:
1213                 break;
1214         }
1215 
1216         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1217         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1218 
1219         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1220         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1221         if (instr->alu.mul.magic_write)
1222                 *packed_instr |= VC5_QPU_MM;
1223 
1224         return true;
1225 }
1226 
1227 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1228 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1229                          uint64_t packed_instr,
1230                          struct v3d_qpu_instr *instr)
1231 {
1232         instr->type = V3D_QPU_INSTR_TYPE_ALU;
1233 
1234         if (!v3d_qpu_sig_unpack(devinfo,
1235                                 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1236                                 &instr->sig))
1237                 return false;
1238 
1239         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1240         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1241                 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1242                 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1243 
1244                 instr->flags.ac = V3D_QPU_COND_NONE;
1245                 instr->flags.mc = V3D_QPU_COND_NONE;
1246                 instr->flags.apf = V3D_QPU_PF_NONE;
1247                 instr->flags.mpf = V3D_QPU_PF_NONE;
1248                 instr->flags.auf = V3D_QPU_UF_NONE;
1249                 instr->flags.muf = V3D_QPU_UF_NONE;
1250         } else {
1251                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1252                         return false;
1253         }
1254 
1255         instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1256         instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1257 
1258         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1259                 return false;
1260 
1261         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1262                 return false;
1263 
1264         return true;
1265 }
1266 
1267 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1268 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1269                             uint64_t packed_instr,
1270                             struct v3d_qpu_instr *instr)
1271 {
1272         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1273 
1274         uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1275         if (cond == 0)
1276                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1277         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1278                  V3D_QPU_BRANCH_COND_ALLNA)
1279                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1280         else
1281                 return false;
1282 
1283         uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1284         if (msfign == 3)
1285                 return false;
1286         instr->branch.msfign = msfign;
1287 
1288         instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1289 
1290         instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1291         if (instr->branch.ub) {
1292                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1293                                                   VC5_QPU_BRANCH_BDU);
1294         }
1295 
1296         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1297                                               VC5_QPU_RADDR_A);
1298 
1299         instr->branch.offset = 0;
1300 
1301         instr->branch.offset +=
1302                 QPU_GET_FIELD(packed_instr,
1303                               VC5_QPU_BRANCH_ADDR_LOW) << 3;
1304 
1305         instr->branch.offset +=
1306                 QPU_GET_FIELD(packed_instr,
1307                               VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1308 
1309         return true;
1310 }
1311 
1312 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1313 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1314                      uint64_t packed_instr,
1315                      struct v3d_qpu_instr *instr)
1316 {
1317         if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1318                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1319         } else {
1320                 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1321 
1322                 if ((sig & 24) == 16) {
1323                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1324                                                            instr);
1325                 } else {
1326                         return false;
1327                 }
1328         }
1329 }
1330 
1331 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1332 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1333                        const struct v3d_qpu_instr *instr,
1334                        uint64_t *packed_instr)
1335 {
1336         uint32_t sig;
1337         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1338                 return false;
1339         *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1340 
1341         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1342                 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1343                 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1344 
1345                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1346                         return false;
1347                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1348                         return false;
1349 
1350                 uint32_t flags;
1351                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1352                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
1353                             instr->flags.mc != V3D_QPU_COND_NONE ||
1354                             instr->flags.apf != V3D_QPU_PF_NONE ||
1355                             instr->flags.mpf != V3D_QPU_PF_NONE ||
1356                             instr->flags.auf != V3D_QPU_UF_NONE ||
1357                             instr->flags.muf != V3D_QPU_UF_NONE) {
1358                                 return false;
1359                         }
1360 
1361                         flags = instr->sig_addr;
1362                         if (instr->sig_magic)
1363                                 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1364                 } else {
1365                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1366                                 return false;
1367                 }
1368 
1369                 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1370         } else {
1371                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1372                         return false;
1373         }
1374 
1375         return true;
1376 }
1377 
1378 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1379 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1380                           const struct v3d_qpu_instr *instr,
1381                           uint64_t *packed_instr)
1382 {
1383         *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1384 
1385         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1386                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1387                                                     V3D_QPU_BRANCH_COND_A0),
1388                                                VC5_QPU_BRANCH_COND);
1389         }
1390 
1391         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1392                                        VC5_QPU_BRANCH_MSFIGN);
1393 
1394         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1395                                        VC5_QPU_BRANCH_BDI);
1396 
1397         if (instr->branch.ub) {
1398                 *packed_instr |= VC5_QPU_BRANCH_UB;
1399                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1400                                                VC5_QPU_BRANCH_BDU);
1401         }
1402 
1403         switch (instr->branch.bdi) {
1404         case V3D_QPU_BRANCH_DEST_ABS:
1405         case V3D_QPU_BRANCH_DEST_REL:
1406                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1407                                                VC5_QPU_BRANCH_MSFIGN);
1408 
1409                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1410                                                 ~0xff000000) >> 3,
1411                                                VC5_QPU_BRANCH_ADDR_LOW);
1412 
1413                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1414                                                VC5_QPU_BRANCH_ADDR_HIGH);
1415 
1416         case V3D_QPU_BRANCH_DEST_REGFILE:
1417                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1418                                                VC5_QPU_RADDR_A);
1419                 break;
1420 
1421         default:
1422                 break;
1423         }
1424 
1425         return true;
1426 }
1427 
1428 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1429 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1430                    const struct v3d_qpu_instr *instr,
1431                    uint64_t *packed_instr)
1432 {
1433         *packed_instr = 0;
1434 
1435         switch (instr->type) {
1436         case V3D_QPU_INSTR_TYPE_ALU:
1437                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1438         case V3D_QPU_INSTR_TYPE_BRANCH:
1439                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1440         default:
1441                 return false;
1442         }
1443 }
1444