1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define VC5_QPU_OP_MUL_SHIFT                58
48 #define VC5_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define VC5_QPU_SIG_SHIFT                   53
51 #define VC5_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define VC5_QPU_COND_SHIFT                  46
54 #define VC5_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define VC5_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define VC5_QPU_MM                          QPU_MASK(45, 45)
58 #define VC5_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define VC5_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define VC5_QPU_BRANCH_COND_SHIFT           32
70 #define VC5_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define VC5_QPU_OP_ADD_SHIFT                24
76 #define VC5_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define VC5_QPU_MUL_B_SHIFT                 21
79 #define VC5_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define VC5_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define VC5_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define VC5_QPU_MUL_A_SHIFT                 18
85 #define VC5_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define VC5_QPU_ADD_B_SHIFT                 15
88 #define VC5_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
89 
90 #define VC5_QPU_BRANCH_BDU_SHIFT            15
91 #define VC5_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
92 
93 #define VC5_QPU_BRANCH_UB                   QPU_MASK(14, 14)
94 
95 #define VC5_QPU_ADD_A_SHIFT                 12
96 #define VC5_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
97 
98 #define VC5_QPU_BRANCH_BDI_SHIFT            12
99 #define VC5_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
100 
101 #define VC5_QPU_RADDR_A_SHIFT               6
102 #define VC5_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
103 
104 #define VC5_QPU_RADDR_B_SHIFT               0
105 #define VC5_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
106 
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
121 
122 static const struct v3d_qpu_sig v33_sig_map[] = {
123         /*      MISC   R3       R4      R5 */
124         [0]  = {                               },
125         [1]  = { THRSW,                        },
126         [2]  = {                        LDUNIF },
127         [3]  = { THRSW,                 LDUNIF },
128         [4]  = {                LDTMU,         },
129         [5]  = { THRSW,         LDTMU,         },
130         [6]  = {                LDTMU,  LDUNIF },
131         [7]  = { THRSW,         LDTMU,  LDUNIF },
132         [8]  = {        LDVARY,                },
133         [9]  = { THRSW, LDVARY,                },
134         [10] = {        LDVARY,         LDUNIF },
135         [11] = { THRSW, LDVARY,         LDUNIF },
136         [12] = {        LDVARY, LDTMU,         },
137         [13] = { THRSW, LDVARY, LDTMU,         },
138         [14] = { SMIMM, LDVARY,                },
139         [15] = { SMIMM,                        },
140         [16] = {        LDTLB,                 },
141         [17] = {        LDTLBU,                },
142         /* 18-21 reserved */
143         [22] = { UCB,                          },
144         [23] = { ROT,                          },
145         [24] = {        LDVPM,                 },
146         [25] = { THRSW, LDVPM,                 },
147         [26] = {        LDVPM,          LDUNIF },
148         [27] = { THRSW, LDVPM,          LDUNIF },
149         [28] = {        LDVPM, LDTMU,          },
150         [29] = { THRSW, LDVPM, LDTMU,          },
151         [30] = { SMIMM, LDVPM,                 },
152         [31] = { SMIMM,                        },
153 };
154 
155 static const struct v3d_qpu_sig v40_sig_map[] = {
156         /*      MISC    R3      R4      R5 */
157         [0]  = {                               },
158         [1]  = { THRSW,                        },
159         [2]  = {                        LDUNIF },
160         [3]  = { THRSW,                 LDUNIF },
161         [4]  = {                LDTMU,         },
162         [5]  = { THRSW,         LDTMU,         },
163         [6]  = {                LDTMU,  LDUNIF },
164         [7]  = { THRSW,         LDTMU,  LDUNIF },
165         [8]  = {        LDVARY,                },
166         [9]  = { THRSW, LDVARY,                },
167         [10] = {        LDVARY,         LDUNIF },
168         [11] = { THRSW, LDVARY,         LDUNIF },
169         /* 12-13 reserved */
170         [14] = { SMIMM, LDVARY,                },
171         [15] = { SMIMM,                        },
172         [16] = {        LDTLB,                 },
173         [17] = {        LDTLBU,                },
174         [18] = {                        WRTMUC },
175         [19] = { THRSW,                 WRTMUC },
176         [20] = {        LDVARY,         WRTMUC },
177         [21] = { THRSW, LDVARY,         WRTMUC },
178         [22] = { UCB,                          },
179         [23] = { ROT,                          },
180         /* 24-30 reserved */
181         [31] = { SMIMM,         LDTMU,         },
182 };
183 
184 static const struct v3d_qpu_sig v41_sig_map[] = {
185         /*      MISC       phys    R5 */
186         [0]  = {                          },
187         [1]  = { THRSW,                   },
188         [2]  = {                   LDUNIF },
189         [3]  = { THRSW,            LDUNIF },
190         [4]  = {           LDTMU,         },
191         [5]  = { THRSW,    LDTMU,         },
192         [6]  = {           LDTMU,  LDUNIF },
193         [7]  = { THRSW,    LDTMU,  LDUNIF },
194         [8]  = {           LDVARY,        },
195         [9]  = { THRSW,    LDVARY,        },
196         [10] = {           LDVARY, LDUNIF },
197         [11] = { THRSW,    LDVARY, LDUNIF },
198         [12] = { LDUNIFRF                 },
199         [13] = { THRSW,    LDUNIFRF       },
200         [14] = { SMIMM,    LDVARY,        },
201         [15] = { SMIMM,                   },
202         [16] = {           LDTLB,         },
203         [17] = {           LDTLBU,        },
204         [18] = {                          WRTMUC },
205         [19] = { THRSW,                   WRTMUC },
206         [20] = {           LDVARY,        WRTMUC },
207         [21] = { THRSW,    LDVARY,        WRTMUC },
208         [22] = { UCB,                     },
209         [23] = { ROT,                     },
210         /* 24-30 reserved */
211         [24] = {                   LDUNIFA},
212         [25] = { LDUNIFARF                },
213         [31] = { SMIMM,            LDTMU, },
214 };
215 
216 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)217 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
218                    uint32_t packed_sig,
219                    struct v3d_qpu_sig *sig)
220 {
221         if (packed_sig >= ARRAY_SIZE(v33_sig_map))
222                 return false;
223 
224         if (devinfo->ver >= 41)
225                 *sig = v41_sig_map[packed_sig];
226         else if (devinfo->ver == 40)
227                 *sig = v40_sig_map[packed_sig];
228         else
229                 *sig = v33_sig_map[packed_sig];
230 
231         /* Signals with zeroed unpacked contents after element 0 are reserved. */
232         return (packed_sig == 0 ||
233                 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
234 }
235 
236 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)237 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
238                  const struct v3d_qpu_sig *sig,
239                  uint32_t *packed_sig)
240 {
241         static const struct v3d_qpu_sig *map;
242 
243         if (devinfo->ver >= 41)
244                 map = v41_sig_map;
245         else if (devinfo->ver == 40)
246                 map = v40_sig_map;
247         else
248                 map = v33_sig_map;
249 
250         for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
251                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
252                         *packed_sig = i;
253                         return true;
254                 }
255         }
256 
257         return false;
258 }
259 static inline unsigned
fui(float f)260 fui( float f )
261 {
262         union {float f; unsigned ui;} fi;
263    fi.f = f;
264    return fi.ui;
265 }
266 
267 static const uint32_t small_immediates[] = {
268         0, 1, 2, 3,
269         4, 5, 6, 7,
270         8, 9, 10, 11,
271         12, 13, 14, 15,
272         -16, -15, -14, -13,
273         -12, -11, -10, -9,
274         -8, -7, -6, -5,
275         -4, -3, -2, -1,
276         0x3b800000, /* 2.0^-8 */
277         0x3c000000, /* 2.0^-7 */
278         0x3c800000, /* 2.0^-6 */
279         0x3d000000, /* 2.0^-5 */
280         0x3d800000, /* 2.0^-4 */
281         0x3e000000, /* 2.0^-3 */
282         0x3e800000, /* 2.0^-2 */
283         0x3f000000, /* 2.0^-1 */
284         0x3f800000, /* 2.0^0 */
285         0x40000000, /* 2.0^1 */
286         0x40800000, /* 2.0^2 */
287         0x41000000, /* 2.0^3 */
288         0x41800000, /* 2.0^4 */
289         0x42000000, /* 2.0^5 */
290         0x42800000, /* 2.0^6 */
291         0x43000000, /* 2.0^7 */
292 };
293 
294 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)295 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
296                          uint32_t packed_small_immediate,
297                          uint32_t *small_immediate)
298 {
299         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
300                 return false;
301 
302         *small_immediate = small_immediates[packed_small_immediate];
303         return true;
304 }
305 
306 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)307 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
308                        uint32_t value,
309                        uint32_t *packed_small_immediate)
310 {
311         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
312 
313         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
314                 if (small_immediates[i] == value) {
315                         *packed_small_immediate = i;
316                         return true;
317                 }
318         }
319 
320         return false;
321 }
322 
323 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)324 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
325                      uint32_t packed_cond,
326                      struct v3d_qpu_flags *cond)
327 {
328         static const enum v3d_qpu_cond cond_map[4] = {
329                 [0] = V3D_QPU_COND_IFA,
330                 [1] = V3D_QPU_COND_IFB,
331                 [2] = V3D_QPU_COND_IFNA,
332                 [3] = V3D_QPU_COND_IFNB,
333         };
334 
335         cond->ac = V3D_QPU_COND_NONE;
336         cond->mc = V3D_QPU_COND_NONE;
337         cond->apf = V3D_QPU_PF_NONE;
338         cond->mpf = V3D_QPU_PF_NONE;
339         cond->auf = V3D_QPU_UF_NONE;
340         cond->muf = V3D_QPU_UF_NONE;
341 
342         if (packed_cond == 0) {
343                 return true;
344         } else if (packed_cond >> 2 == 0) {
345                 cond->apf = packed_cond & 0x3;
346         } else if (packed_cond >> 4 == 0) {
347                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
348         } else if (packed_cond == 0x10) {
349                 return false;
350         } else if (packed_cond >> 2 == 0x4) {
351                 cond->mpf = packed_cond & 0x3;
352         } else if (packed_cond >> 4 == 0x1) {
353                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
354         } else if (packed_cond >> 4 == 0x2) {
355                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
356                 cond->mpf = packed_cond & 0x3;
357         } else if (packed_cond >> 4 == 0x3) {
358                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
359                 cond->apf = packed_cond & 0x3;
360         } else if (packed_cond >> 6) {
361                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
362                 if (((packed_cond >> 2) & 0x3) == 0) {
363                         cond->ac = cond_map[packed_cond & 0x3];
364                 } else {
365                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
366                 }
367         }
368 
369         return true;
370 }
371 
372 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)373 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
374                    const struct v3d_qpu_flags *cond,
375                    uint32_t *packed_cond)
376 {
377 #define AC (1 << 0)
378 #define MC (1 << 1)
379 #define APF (1 << 2)
380 #define MPF (1 << 3)
381 #define AUF (1 << 4)
382 #define MUF (1 << 5)
383         static const struct {
384                 uint8_t flags_present;
385                 uint8_t bits;
386         } flags_table[] = {
387                 { 0,        0 },
388                 { APF,      0 },
389                 { AUF,      0 },
390                 { MPF,      (1 << 4) },
391                 { MUF,      (1 << 4) },
392                 { AC,       (1 << 5) },
393                 { AC | MPF, (1 << 5) },
394                 { MC,       (1 << 5) | (1 << 4) },
395                 { MC | APF, (1 << 5) | (1 << 4) },
396                 { MC | AC,  (1 << 6) },
397                 { MC | AUF, (1 << 6) },
398         };
399 
400         uint8_t flags_present = 0;
401         if (cond->ac != V3D_QPU_COND_NONE)
402                 flags_present |= AC;
403         if (cond->mc != V3D_QPU_COND_NONE)
404                 flags_present |= MC;
405         if (cond->apf != V3D_QPU_PF_NONE)
406                 flags_present |= APF;
407         if (cond->mpf != V3D_QPU_PF_NONE)
408                 flags_present |= MPF;
409         if (cond->auf != V3D_QPU_UF_NONE)
410                 flags_present |= AUF;
411         if (cond->muf != V3D_QPU_UF_NONE)
412                 flags_present |= MUF;
413 
414         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
415                 if (flags_table[i].flags_present != flags_present)
416                         continue;
417 
418                 *packed_cond = flags_table[i].bits;
419 
420                 *packed_cond |= cond->apf;
421                 *packed_cond |= cond->mpf;
422 
423                 if (flags_present & AUF)
424                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
425                 if (flags_present & MUF)
426                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
427 
428                 if (flags_present & AC)
429                         *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
430 
431                 if (flags_present & MC) {
432                         if (*packed_cond & (1 << 6))
433                                 *packed_cond |= (cond->mc -
434                                                  V3D_QPU_COND_IFA) << 4;
435                         else
436                                 *packed_cond |= (cond->mc -
437                                                  V3D_QPU_COND_IFA) << 2;
438                 }
439 
440                 return true;
441         }
442 
443         return false;
444 }
445 
446 /* Make a mapping of the table of opcodes in the spec.  The opcode is
447  * determined by a combination of the opcode field, and in the case of 0 or
448  * 1-arg opcodes, the mux_b field as well.
449  */
450 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451 #define ANYMUX MUX_MASK(0, 7)
452 
453 struct opcode_desc {
454         uint8_t opcode_first;
455         uint8_t opcode_last;
456         uint8_t mux_b_mask;
457         uint8_t mux_a_mask;
458         uint8_t op;
459         /* 0 if it's the same across V3D versions, or a specific V3D version. */
460         uint8_t ver;
461 };
462 
463 static const struct opcode_desc add_ops[] = {
464         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
465         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADD },
466         { 0,   47,  ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
467         { 53,  55,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
468         { 56,  56,  ANYMUX, ANYMUX, V3D_QPU_A_ADD },
469         { 57,  59,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
470         { 60,  60,  ANYMUX, ANYMUX, V3D_QPU_A_SUB },
471         { 61,  63,  ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
472         { 64,  111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
473         { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
474         { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
475         { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
476         { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
477         { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
478         { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
479         { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
480         { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
481         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
482         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
483         { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
484         { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
485 
486         { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
487         { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
488         { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
489 
490         { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
491         { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
492         { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
493         { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
494         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
495         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
496         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
497         { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
498         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
499         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
500         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
501         { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
502         { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
503         { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
504         { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
505         { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
506         { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
507         { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
508 
509         { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
510         { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
511         { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
512         { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
513 
514         { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
515         { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
516         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },
517         { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },
518         { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },
519         { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },
520         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
521         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
522 
523         { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
524         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
525         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
526         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
527         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
528         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
529         { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
530         { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
531         { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
532         { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
533         { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
534         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
535         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
536 
537         /* FIXME: MORE COMPLICATED */
538         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
539 
540         { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
541         { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
542 
543         { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
544         { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
545         { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
546         { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
547         { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
548         { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
549         { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
550         { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
551 
552         { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
553         { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
554 
555         /* The stvpms are distinguished by the waddr field. */
556         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
557         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
558         { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
559 
560         { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
561         { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
562         { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
563 };
564 
565 static const struct opcode_desc mul_ops[] = {
566         { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
567         { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
568         { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
569         { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
570         { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
571         { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
572         { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
573         { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
574         { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
575         { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
576         { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
577 };
578 
579 static const struct opcode_desc *
lookup_opcode(const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b)580 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
581               uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
582 {
583         for (int i = 0; i < num_opcodes; i++) {
584                 const struct opcode_desc *op_desc = &opcodes[i];
585 
586                 if (opcode < op_desc->opcode_first ||
587                     opcode > op_desc->opcode_last)
588                         continue;
589 
590                 if (!(op_desc->mux_b_mask & (1 << mux_b)))
591                         continue;
592 
593                 if (!(op_desc->mux_a_mask & (1 << mux_a)))
594                         continue;
595 
596                 return op_desc;
597         }
598 
599         return NULL;
600 }
601 
602 static bool
v3d_qpu_float32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)603 v3d_qpu_float32_unpack_unpack(uint32_t packed,
604                               enum v3d_qpu_input_unpack *unpacked)
605 {
606         switch (packed) {
607         case 0:
608                 *unpacked = V3D_QPU_UNPACK_ABS;
609                 return true;
610         case 1:
611                 *unpacked = V3D_QPU_UNPACK_NONE;
612                 return true;
613         case 2:
614                 *unpacked = V3D_QPU_UNPACK_L;
615                 return true;
616         case 3:
617                 *unpacked = V3D_QPU_UNPACK_H;
618                 return true;
619         default:
620                 return false;
621         }
622 }
623 
624 static bool
v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)625 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
626                             uint32_t *packed)
627 {
628         switch (unpacked) {
629         case V3D_QPU_UNPACK_ABS:
630                 *packed = 0;
631                 return true;
632         case V3D_QPU_UNPACK_NONE:
633                 *packed = 1;
634                 return true;
635         case V3D_QPU_UNPACK_L:
636                 *packed = 2;
637                 return true;
638         case V3D_QPU_UNPACK_H:
639                 *packed = 3;
640                 return true;
641         default:
642                 return false;
643         }
644 }
645 
646 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)647 v3d_qpu_float16_unpack_unpack(uint32_t packed,
648                               enum v3d_qpu_input_unpack *unpacked)
649 {
650         switch (packed) {
651         case 0:
652                 *unpacked = V3D_QPU_UNPACK_NONE;
653                 return true;
654         case 1:
655                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
656                 return true;
657         case 2:
658                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
659                 return true;
660         case 3:
661                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
662                 return true;
663         case 4:
664                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
665                 return true;
666         default:
667                 return false;
668         }
669 }
670 
671 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)672 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
673                             uint32_t *packed)
674 {
675         switch (unpacked) {
676         case V3D_QPU_UNPACK_NONE:
677                 *packed = 0;
678                 return true;
679         case V3D_QPU_UNPACK_REPLICATE_32F_16:
680                 *packed = 1;
681                 return true;
682         case V3D_QPU_UNPACK_REPLICATE_L_16:
683                 *packed = 2;
684                 return true;
685         case V3D_QPU_UNPACK_REPLICATE_H_16:
686                 *packed = 3;
687                 return true;
688         case V3D_QPU_UNPACK_SWAP_16:
689                 *packed = 4;
690                 return true;
691         default:
692                 return false;
693         }
694 }
695 
696 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)697 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
698                           uint32_t *packed)
699 {
700         switch (unpacked) {
701         case V3D_QPU_PACK_NONE:
702                 *packed = 0;
703                 return true;
704         case V3D_QPU_PACK_L:
705                 *packed = 1;
706                 return true;
707         case V3D_QPU_PACK_H:
708                 *packed = 2;
709                 return true;
710         default:
711                 return false;
712         }
713 }
714 
715 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)716 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
717                    struct v3d_qpu_instr *instr)
718 {
719         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
720         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
721         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
722         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
723 
724         uint32_t map_op = op;
725         /* Some big clusters of opcodes are replicated with unpack
726          * flags
727          */
728         if (map_op >= 249 && map_op <= 251)
729                 map_op = (map_op - 249 + 245);
730         if (map_op >= 253 && map_op <= 255)
731                 map_op = (map_op - 253 + 245);
732 
733         const struct opcode_desc *desc =
734                 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
735                               map_op, mux_a, mux_b);
736         if (!desc)
737                 return false;
738 
739         instr->alu.add.op = desc->op;
740 
741         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
742          * operands.
743          */
744         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
745                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
746                         instr->alu.add.op = V3D_QPU_A_FMAX;
747                 if (instr->alu.add.op == V3D_QPU_A_FADD)
748                         instr->alu.add.op = V3D_QPU_A_FADDNF;
749         }
750 
751         /* Some QPU ops require a bit more than just basic opcode and mux a/b
752          * comparisons to distinguish them.
753          */
754         switch (instr->alu.add.op) {
755         case V3D_QPU_A_STVPMV:
756         case V3D_QPU_A_STVPMD:
757         case V3D_QPU_A_STVPMP:
758                 switch (waddr) {
759                 case 0:
760                         instr->alu.add.op = V3D_QPU_A_STVPMV;
761                         break;
762                 case 1:
763                         instr->alu.add.op = V3D_QPU_A_STVPMD;
764                         break;
765                 case 2:
766                         instr->alu.add.op = V3D_QPU_A_STVPMP;
767                         break;
768                 default:
769                         return false;
770                 }
771                 break;
772         default:
773                 break;
774         }
775 
776         switch (instr->alu.add.op) {
777         case V3D_QPU_A_FADD:
778         case V3D_QPU_A_FADDNF:
779         case V3D_QPU_A_FSUB:
780         case V3D_QPU_A_FMIN:
781         case V3D_QPU_A_FMAX:
782         case V3D_QPU_A_FCMP:
783         case V3D_QPU_A_VFPACK:
784                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
785                         instr->alu.add.output_pack = (op >> 4) & 0x3;
786                 else
787                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
788 
789                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
790                                                    &instr->alu.add.a_unpack)) {
791                         return false;
792                 }
793 
794                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
795                                                    &instr->alu.add.b_unpack)) {
796                         return false;
797                 }
798                 break;
799 
800         case V3D_QPU_A_FFLOOR:
801         case V3D_QPU_A_FROUND:
802         case V3D_QPU_A_FTRUNC:
803         case V3D_QPU_A_FCEIL:
804         case V3D_QPU_A_FDX:
805         case V3D_QPU_A_FDY:
806                 instr->alu.add.output_pack = mux_b & 0x3;
807 
808                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
809                                                    &instr->alu.add.a_unpack)) {
810                         return false;
811                 }
812                 break;
813 
814         case V3D_QPU_A_FTOIN:
815         case V3D_QPU_A_FTOIZ:
816         case V3D_QPU_A_FTOUZ:
817         case V3D_QPU_A_FTOC:
818                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
819 
820                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
821                                                    &instr->alu.add.a_unpack)) {
822                         return false;
823                 }
824                 break;
825 
826         case V3D_QPU_A_VFMIN:
827         case V3D_QPU_A_VFMAX:
828                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
829                                                    &instr->alu.add.a_unpack)) {
830                         return false;
831                 }
832 
833                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
834                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
835                 break;
836 
837         default:
838                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
839                 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
840                 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
841                 break;
842         }
843 
844         instr->alu.add.a = mux_a;
845         instr->alu.add.b = mux_b;
846         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
847 
848         instr->alu.add.magic_write = false;
849         if (packed_inst & VC5_QPU_MA) {
850                 switch (instr->alu.add.op) {
851                 case V3D_QPU_A_LDVPMV_IN:
852                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
853                         break;
854                 case V3D_QPU_A_LDVPMD_IN:
855                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
856                         break;
857                 case V3D_QPU_A_LDVPMG_IN:
858                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
859                         break;
860                 default:
861                         instr->alu.add.magic_write = true;
862                         break;
863                 }
864         }
865 
866         return true;
867 }
868 
869 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)870 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
871                    struct v3d_qpu_instr *instr)
872 {
873         uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
874         uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
875         uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
876 
877         {
878                 const struct opcode_desc *desc =
879                         lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
880                                       op, mux_a, mux_b);
881                 if (!desc)
882                         return false;
883 
884                 instr->alu.mul.op = desc->op;
885         }
886 
887         switch (instr->alu.mul.op) {
888         case V3D_QPU_M_FMUL:
889                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
890 
891                 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
892                                                    &instr->alu.mul.a_unpack)) {
893                         return false;
894                 }
895 
896                 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
897                                                    &instr->alu.mul.b_unpack)) {
898                         return false;
899                 }
900 
901                 break;
902 
903         case V3D_QPU_M_FMOV:
904                 instr->alu.mul.output_pack = (((op & 1) << 1) +
905                                               ((mux_b >> 2) & 1));
906 
907                 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
908                                                    &instr->alu.mul.a_unpack)) {
909                         return false;
910                 }
911 
912                 break;
913 
914         case V3D_QPU_M_VFMUL:
915                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
916 
917                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
918                                                    &instr->alu.mul.a_unpack)) {
919                         return false;
920                 }
921 
922                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
923 
924                 break;
925 
926         default:
927                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
928                 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
929                 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
930                 break;
931         }
932 
933         instr->alu.mul.a = mux_a;
934         instr->alu.mul.b = mux_b;
935         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
936         instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
937 
938         return true;
939 }
940 
941 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)942 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
943                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
944 {
945         uint32_t waddr = instr->alu.add.waddr;
946         uint32_t mux_a = instr->alu.add.a;
947         uint32_t mux_b = instr->alu.add.b;
948         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
949         const struct opcode_desc *desc;
950 
951         int opcode;
952         for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
953              desc++) {
954                 if (desc->op == instr->alu.add.op)
955                         break;
956         }
957         if (desc == &add_ops[ARRAY_SIZE(add_ops)])
958                 return false;
959 
960         opcode = desc->opcode_first;
961 
962         /* If an operation doesn't use an arg, its mux values may be used to
963          * identify the operation type.
964          */
965         if (nsrc < 2)
966                 mux_b = ffs(desc->mux_b_mask) - 1;
967 
968         if (nsrc < 1)
969                 mux_a = ffs(desc->mux_a_mask) - 1;
970 
971         bool no_magic_write = false;
972 
973         switch (instr->alu.add.op) {
974         case V3D_QPU_A_STVPMV:
975                 waddr = 0;
976                 no_magic_write = true;
977                 break;
978         case V3D_QPU_A_STVPMD:
979                 waddr = 1;
980                 no_magic_write = true;
981                 break;
982         case V3D_QPU_A_STVPMP:
983                 waddr = 2;
984                 no_magic_write = true;
985                 break;
986 
987         case V3D_QPU_A_LDVPMV_IN:
988         case V3D_QPU_A_LDVPMD_IN:
989         case V3D_QPU_A_LDVPMP:
990         case V3D_QPU_A_LDVPMG_IN:
991                 assert(!instr->alu.add.magic_write);
992                 break;
993 
994         case V3D_QPU_A_LDVPMV_OUT:
995         case V3D_QPU_A_LDVPMD_OUT:
996         case V3D_QPU_A_LDVPMG_OUT:
997                 assert(!instr->alu.add.magic_write);
998                 *packed_instr |= VC5_QPU_MA;
999                 break;
1000 
1001         default:
1002                 break;
1003         }
1004 
1005         switch (instr->alu.add.op) {
1006         case V3D_QPU_A_FADD:
1007         case V3D_QPU_A_FADDNF:
1008         case V3D_QPU_A_FSUB:
1009         case V3D_QPU_A_FMIN:
1010         case V3D_QPU_A_FMAX:
1011         case V3D_QPU_A_FCMP: {
1012                 uint32_t output_pack;
1013                 uint32_t a_unpack;
1014                 uint32_t b_unpack;
1015 
1016                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1017                                                &output_pack)) {
1018                         return false;
1019                 }
1020                 opcode |= output_pack << 4;
1021 
1022                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1023                                                  &a_unpack)) {
1024                         return false;
1025                 }
1026 
1027                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1028                                                  &b_unpack)) {
1029                         return false;
1030                 }
1031 
1032                 /* These operations with commutative operands are
1033                  * distinguished by which order their operands come in.
1034                  */
1035                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1036                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1037                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1038                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1039                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1040                         uint32_t temp;
1041 
1042                         temp = a_unpack;
1043                         a_unpack = b_unpack;
1044                         b_unpack = temp;
1045 
1046                         temp = mux_a;
1047                         mux_a = mux_b;
1048                         mux_b = temp;
1049                 }
1050 
1051                 opcode |= a_unpack << 2;
1052                 opcode |= b_unpack << 0;
1053 
1054                 break;
1055         }
1056 
1057         case V3D_QPU_A_VFPACK: {
1058                 uint32_t a_unpack;
1059                 uint32_t b_unpack;
1060 
1061                 if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||
1062                     instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {
1063                         return false;
1064                 }
1065 
1066                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1067                                                  &a_unpack)) {
1068                         return false;
1069                 }
1070 
1071                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
1072                                                  &b_unpack)) {
1073                         return false;
1074                 }
1075 
1076                 opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);
1077                 opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);
1078 
1079                 break;
1080         }
1081 
1082         case V3D_QPU_A_FFLOOR:
1083         case V3D_QPU_A_FROUND:
1084         case V3D_QPU_A_FTRUNC:
1085         case V3D_QPU_A_FCEIL:
1086         case V3D_QPU_A_FDX:
1087         case V3D_QPU_A_FDY: {
1088                 uint32_t packed;
1089 
1090                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1091                                                &packed)) {
1092                         return false;
1093                 }
1094                 mux_b |= packed;
1095 
1096                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1097                                                  &packed)) {
1098                         return false;
1099                 }
1100                 if (packed == 0)
1101                         return false;
1102                 opcode = (opcode & ~(1 << 2)) | packed << 2;
1103                 break;
1104         }
1105 
1106         case V3D_QPU_A_FTOIN:
1107         case V3D_QPU_A_FTOIZ:
1108         case V3D_QPU_A_FTOUZ:
1109         case V3D_QPU_A_FTOC:
1110                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1111                         return false;
1112 
1113                 uint32_t packed;
1114                 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
1115                                                  &packed)) {
1116                         return false;
1117                 }
1118                 if (packed == 0)
1119                         return false;
1120                 opcode |= packed << 2;
1121 
1122                 break;
1123 
1124         case V3D_QPU_A_VFMIN:
1125         case V3D_QPU_A_VFMAX:
1126                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1127                     instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
1128                         return false;
1129                 }
1130 
1131                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
1132                                                  &packed)) {
1133                         return false;
1134                 }
1135                 opcode |= packed;
1136                 break;
1137 
1138         default:
1139                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1140                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1141                      instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
1142                      instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1143                         return false;
1144                 }
1145                 break;
1146         }
1147 
1148         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1149         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1150         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1151         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1152         if (instr->alu.add.magic_write && !no_magic_write)
1153                 *packed_instr |= VC5_QPU_MA;
1154 
1155         return true;
1156 }
1157 
1158 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1159 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1160                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1161 {
1162         uint32_t mux_a = instr->alu.mul.a;
1163         uint32_t mux_b = instr->alu.mul.b;
1164         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1165         const struct opcode_desc *desc;
1166 
1167         for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1168              desc++) {
1169                 if (desc->op == instr->alu.mul.op)
1170                         break;
1171         }
1172         if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1173                 return false;
1174 
1175         uint32_t opcode = desc->opcode_first;
1176 
1177         /* Some opcodes have a single valid value for their mux a/b, so set
1178          * that here.  If mux a/b determine packing, it will be set below.
1179          */
1180         if (nsrc < 2)
1181                 mux_b = ffs(desc->mux_b_mask) - 1;
1182 
1183         if (nsrc < 1)
1184                 mux_a = ffs(desc->mux_a_mask) - 1;
1185 
1186         switch (instr->alu.mul.op) {
1187         case V3D_QPU_M_FMUL: {
1188                 uint32_t packed;
1189 
1190                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1191                                                &packed)) {
1192                         return false;
1193                 }
1194                 /* No need for a +1 because desc->opcode_first has a 1 in this
1195                  * field.
1196                  */
1197                 opcode += packed << 4;
1198 
1199                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1200                                                  &packed)) {
1201                         return false;
1202                 }
1203                 opcode |= packed << 2;
1204 
1205                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1206                                                  &packed)) {
1207                         return false;
1208                 }
1209                 opcode |= packed << 0;
1210                 break;
1211         }
1212 
1213         case V3D_QPU_M_FMOV: {
1214                 uint32_t packed;
1215 
1216                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1217                                                &packed)) {
1218                         return false;
1219                 }
1220                 opcode |= (packed >> 1) & 1;
1221                 mux_b = (packed & 1) << 2;
1222 
1223                 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1224                                                  &packed)) {
1225                         return false;
1226                 }
1227                 mux_b |= packed;
1228                 break;
1229         }
1230 
1231         case V3D_QPU_M_VFMUL: {
1232                 uint32_t packed;
1233 
1234                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1235                         return false;
1236 
1237                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1238                                                  &packed)) {
1239                         return false;
1240                 }
1241                 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1242                         opcode = 8;
1243                 else
1244                         opcode |= (packed + 4) & 7;
1245 
1246                 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1247                         return false;
1248 
1249                 break;
1250         }
1251 
1252         default:
1253                 break;
1254         }
1255 
1256         *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1257         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1258 
1259         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1260         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1261         if (instr->alu.mul.magic_write)
1262                 *packed_instr |= VC5_QPU_MM;
1263 
1264         return true;
1265 }
1266 
1267 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1268 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1269                          uint64_t packed_instr,
1270                          struct v3d_qpu_instr *instr)
1271 {
1272         instr->type = V3D_QPU_INSTR_TYPE_ALU;
1273 
1274         if (!v3d_qpu_sig_unpack(devinfo,
1275                                 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1276                                 &instr->sig))
1277                 return false;
1278 
1279         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1280         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1281                 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1282                 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1283 
1284                 instr->flags.ac = V3D_QPU_COND_NONE;
1285                 instr->flags.mc = V3D_QPU_COND_NONE;
1286                 instr->flags.apf = V3D_QPU_PF_NONE;
1287                 instr->flags.mpf = V3D_QPU_PF_NONE;
1288                 instr->flags.auf = V3D_QPU_UF_NONE;
1289                 instr->flags.muf = V3D_QPU_UF_NONE;
1290         } else {
1291                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1292                         return false;
1293         }
1294 
1295         instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1296         instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1297 
1298         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1299                 return false;
1300 
1301         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1302                 return false;
1303 
1304         return true;
1305 }
1306 
1307 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1308 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1309                             uint64_t packed_instr,
1310                             struct v3d_qpu_instr *instr)
1311 {
1312         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1313 
1314         uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1315         if (cond == 0)
1316                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1317         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1318                  V3D_QPU_BRANCH_COND_ALLNA)
1319                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1320         else
1321                 return false;
1322 
1323         uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1324         if (msfign == 3)
1325                 return false;
1326         instr->branch.msfign = msfign;
1327 
1328         instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1329 
1330         instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1331         if (instr->branch.ub) {
1332                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1333                                                   VC5_QPU_BRANCH_BDU);
1334         }
1335 
1336         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1337                                               VC5_QPU_RADDR_A);
1338 
1339         instr->branch.offset = 0;
1340 
1341         instr->branch.offset +=
1342                 QPU_GET_FIELD(packed_instr,
1343                               VC5_QPU_BRANCH_ADDR_LOW) << 3;
1344 
1345         instr->branch.offset +=
1346                 QPU_GET_FIELD(packed_instr,
1347                               VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1348 
1349         return true;
1350 }
1351 
1352 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)1353 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1354                      uint64_t packed_instr,
1355                      struct v3d_qpu_instr *instr)
1356 {
1357         if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1358                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1359         } else {
1360                 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1361 
1362                 if ((sig & 24) == 16) {
1363                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1364                                                            instr);
1365                 } else {
1366                         return false;
1367                 }
1368         }
1369 }
1370 
1371 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1372 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1373                        const struct v3d_qpu_instr *instr,
1374                        uint64_t *packed_instr)
1375 {
1376         uint32_t sig;
1377         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1378                 return false;
1379         *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1380 
1381         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1382                 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1383                 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1384 
1385                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1386                         return false;
1387                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1388                         return false;
1389 
1390                 uint32_t flags;
1391                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1392                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
1393                             instr->flags.mc != V3D_QPU_COND_NONE ||
1394                             instr->flags.apf != V3D_QPU_PF_NONE ||
1395                             instr->flags.mpf != V3D_QPU_PF_NONE ||
1396                             instr->flags.auf != V3D_QPU_UF_NONE ||
1397                             instr->flags.muf != V3D_QPU_UF_NONE) {
1398                                 return false;
1399                         }
1400 
1401                         flags = instr->sig_addr;
1402                         if (instr->sig_magic)
1403                                 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1404                 } else {
1405                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1406                                 return false;
1407                 }
1408 
1409                 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1410         } else {
1411                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1412                         return false;
1413         }
1414 
1415         return true;
1416 }
1417 
1418 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1419 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1420                           const struct v3d_qpu_instr *instr,
1421                           uint64_t *packed_instr)
1422 {
1423         *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1424 
1425         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1426                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1427                                                     V3D_QPU_BRANCH_COND_A0),
1428                                                VC5_QPU_BRANCH_COND);
1429         }
1430 
1431         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1432                                        VC5_QPU_BRANCH_MSFIGN);
1433 
1434         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1435                                        VC5_QPU_BRANCH_BDI);
1436 
1437         if (instr->branch.ub) {
1438                 *packed_instr |= VC5_QPU_BRANCH_UB;
1439                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1440                                                VC5_QPU_BRANCH_BDU);
1441         }
1442 
1443         switch (instr->branch.bdi) {
1444         case V3D_QPU_BRANCH_DEST_ABS:
1445         case V3D_QPU_BRANCH_DEST_REL:
1446                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1447                                                VC5_QPU_BRANCH_MSFIGN);
1448 
1449                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1450                                                 ~0xff000000) >> 3,
1451                                                VC5_QPU_BRANCH_ADDR_LOW);
1452 
1453                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1454                                                VC5_QPU_BRANCH_ADDR_HIGH);
1455         default:
1456                 break;
1457         }
1458 
1459         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
1460             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
1461                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1462                                                VC5_QPU_RADDR_A);
1463         }
1464 
1465         return true;
1466 }
1467 
1468 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1469 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1470                    const struct v3d_qpu_instr *instr,
1471                    uint64_t *packed_instr)
1472 {
1473         *packed_instr = 0;
1474 
1475         switch (instr->type) {
1476         case V3D_QPU_INSTR_TYPE_ALU:
1477                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1478         case V3D_QPU_INSTR_TYPE_BRANCH:
1479                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1480         default:
1481                 return false;
1482         }
1483 }
1484