1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "bit.h"
28 #include "bi_print.h"
29 #include "util/half_float.h"
30 #include "bifrost/disassemble.h"
31 
32 /* Instruction packing tests */
33 
34 static void
bit_test_single(struct panfrost_device * dev,bi_instruction * ins,uint32_t input[4],bool fma,enum bit_debug debug)35 bit_test_single(struct panfrost_device *dev,
36                 bi_instruction *ins,
37                 uint32_t input[4],
38                 bool fma, enum bit_debug debug)
39 {
40         /* First, simulate the instruction */
41         struct bit_state s = { 0 };
42         memcpy(s.r, input, 16);
43         bit_step(&s, ins, fma);
44 
45         /* Next, wrap it up and pack it */
46 
47         bi_instruction ldubo = {
48                 .type = BI_LOAD_UNIFORM,
49                 .segment = BI_SEGMENT_UBO,
50                 .src = {
51                         BIR_INDEX_CONSTANT,
52                         BIR_INDEX_ZERO
53                 },
54                 .src_types = {
55                         nir_type_uint32,
56                         nir_type_uint32,
57                 },
58                 .dest = BIR_INDEX_REGISTER | 0,
59                 .dest_type = nir_type_uint32,
60                 .vector_channels = 4,
61         };
62 
63         bi_instruction ldva = {
64                 .type = BI_LOAD_VAR_ADDRESS,
65                 .vector_channels = 3,
66                 .dest = BIR_INDEX_REGISTER | 32,
67                 .dest_type = nir_type_uint32,
68                 .format = nir_type_uint32,
69                 .src = {
70                         BIR_INDEX_CONSTANT,
71                         BIR_INDEX_REGISTER | 61,
72                         BIR_INDEX_REGISTER | 62,
73                         0,
74                 },
75                 .src_types = {
76                         nir_type_uint32,
77                         nir_type_uint32,
78                         nir_type_uint32,
79                         nir_type_uint32,
80                 }
81         };
82 
83         bi_instruction st = {
84                 .type = BI_STORE_VAR,
85                 .src = {
86                         BIR_INDEX_REGISTER | 0,
87                         ldva.dest, ldva.dest + 1, ldva.dest + 2,
88                 },
89                 .src_types = {
90                         nir_type_uint32,
91                         nir_type_uint32, nir_type_uint32, nir_type_uint32,
92                 },
93                 .vector_channels = 4
94         };
95 
96         bi_context *ctx = rzalloc(NULL, bi_context);
97         ctx->stage = MESA_SHADER_VERTEX;
98 
99         bi_block *blk = rzalloc(ctx, bi_block);
100         blk->scheduled = true;
101 
102         blk->base.predecessors = _mesa_set_create(blk,
103                         _mesa_hash_pointer,
104                         _mesa_key_pointer_equal);
105 
106         list_inithead(&ctx->blocks);
107         list_addtail(&blk->base.link, &ctx->blocks);
108         list_inithead(&blk->clauses);
109 
110         bi_clause *clauses[4] = {
111                 rzalloc(ctx, bi_clause),
112                 rzalloc(ctx, bi_clause),
113                 rzalloc(ctx, bi_clause),
114                 rzalloc(ctx, bi_clause)
115         };
116 
117         for (unsigned i = 0; i < 4; ++i) {
118                 clauses[i]->bundle_count = 1;
119                 list_addtail(&clauses[i]->link, &blk->clauses);
120                 clauses[i]->scoreboard_id = (i & 1);
121 
122                 if (i) {
123                         clauses[i]->dependencies = 1 << (~i & 1);
124                         clauses[i]->staging_barrier = true;
125                 }
126         }
127 
128         clauses[0]->bundles[0].add = &ldubo;
129         clauses[0]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
130 
131         if (fma)
132                 clauses[1]->bundles[0].fma = ins;
133         else
134                 clauses[1]->bundles[0].add = ins;
135 
136         clauses[0]->constant_count = 1;
137         clauses[1]->constant_count = 1;
138         clauses[1]->constants[0] = ins->constant.u64;
139 
140         clauses[2]->bundles[0].add = &ldva;
141         clauses[3]->bundles[0].add = &st;
142 
143         clauses[2]->message_type = BIFROST_MESSAGE_ATTRIBUTE;
144         clauses[3]->message_type = BIFROST_MESSAGE_STORE;
145 
146         panfrost_program prog = { 0 };
147         util_dynarray_init(&prog.compiled, NULL);
148         bi_pack(ctx, &prog.compiled);
149 
150         bool succ = bit_vertex(dev, &prog, input, 16, NULL, 0,
151                         s.r, 16, debug);
152 
153         if (debug >= BIT_DEBUG_ALL || (!succ && debug >= BIT_DEBUG_FAIL)) {
154                 bi_print_shader(ctx, stderr);
155                 disassemble_bifrost(stderr, prog.compiled.data, prog.compiled.size, true);
156         }
157 
158         if (!succ)
159                 fprintf(stderr, "FAIL\n");
160 }
161 
162 /* Utilities for generating tests */
163 
164 static void
bit_generate_float4(float * mem)165 bit_generate_float4(float *mem)
166 {
167         for (unsigned i = 0; i < 4; ++i)
168                 mem[i] = (float) ((rand() & 255) - 127) / 16.0;
169 }
170 
171 static void
bit_generate_half8(uint16_t * mem)172 bit_generate_half8(uint16_t *mem)
173 {
174         for (unsigned i = 0; i < 8; ++i)
175                 mem[i] = _mesa_float_to_half(((float) (rand() & 255) - 127) / 16.0);
176 }
177 
178 static bi_instruction
bit_ins(enum bi_class C,unsigned argc,nir_alu_type base,unsigned size)179 bit_ins(enum bi_class C, unsigned argc, nir_alu_type base, unsigned size)
180 {
181         nir_alu_type T = base | size;
182 
183         bi_instruction ins = {
184                 .type = C,
185                 .dest = BIR_INDEX_REGISTER | 0,
186                 .dest_type = T,
187         };
188 
189         for (unsigned i = 0; i < argc; ++i) {
190                 ins.src[i] = BIR_INDEX_REGISTER | i;
191                 ins.src_types[i] = T;
192         }
193 
194         return ins;
195 }
196 
197 #define BIT_FOREACH_SWIZZLE(swz, args, sz) \
198         for (unsigned swz = 0; swz < ((sz == 16) ? (1 << (2 * args)) : 1); ++swz)
199 
200 static void
bit_apply_swizzle(bi_instruction * ins,unsigned swz,unsigned args,unsigned sz)201 bit_apply_swizzle(bi_instruction *ins, unsigned swz, unsigned args, unsigned sz)
202 {
203         unsigned slots_per_arg = (sz == 16) ? 4 : 1;
204         unsigned slots_per_chan = (sz == 16) ? 1 : 0;
205         unsigned mask = (sz == 16) ? 1 : 0;
206 
207         for (unsigned i = 0; i < args; ++i) {
208                 for (unsigned j = 0; j < (32 / sz); ++j) {
209                         ins->swizzle[i][j] = ((swz >> (slots_per_arg * i)) >> (slots_per_chan * j)) & mask;
210                 }
211         }
212 }
213 
214 /* Tests all 64 combinations of floating point modifiers for a given
215  * instruction / floating-type / test type */
216 
217 static void
bit_fmod_helper(struct panfrost_device * dev,enum bi_class c,unsigned size,bool fma,uint32_t * input,enum bit_debug debug,unsigned op)218 bit_fmod_helper(struct panfrost_device *dev,
219                 enum bi_class c, unsigned size, bool fma,
220                 uint32_t *input, enum bit_debug debug, unsigned op)
221 {
222         bi_instruction ins = bit_ins(c, 2, nir_type_float, size);
223 
224         bool fp16 = (size == 16);
225         bool has_outmods = fma || !fp16;
226 
227         for (unsigned outmod = 0; outmod < (has_outmods ? 4 : 1); ++outmod) {
228         BIT_FOREACH_SWIZZLE(swz, 2, size) {
229                 for (unsigned inmod = 0; inmod < 16; ++inmod) {
230                         ins.outmod = outmod;
231                         ins.op.minmax = op;
232                         ins.src_abs[0] = (inmod & 0x1);
233                         ins.src_abs[1] = (inmod & 0x2);
234                         ins.src_neg[0] = (inmod & 0x4);
235                         ins.src_neg[1] = (inmod & 0x8);
236                         bit_apply_swizzle(&ins, swz, 2, size);
237                         bit_test_single(dev, &ins, input, fma, debug);
238                 }
239         }
240         }
241 }
242 
243 static void
bit_fma_helper(struct panfrost_device * dev,unsigned size,uint32_t * input,enum bit_debug debug)244 bit_fma_helper(struct panfrost_device *dev,
245                 unsigned size, uint32_t *input, enum bit_debug debug)
246 {
247         bi_instruction ins = bit_ins(BI_FMA, 3, nir_type_float, size);
248 
249         for (unsigned outmod = 0; outmod < 4; ++outmod) {
250                 for (unsigned inmod = 0; inmod < 8; ++inmod) {
251                         ins.outmod = outmod;
252                         ins.src_neg[0] = (inmod & 0x1);
253                         ins.src_neg[1] = (inmod & 0x2);
254                         ins.src_neg[2] = (inmod & 0x4);
255                         bit_test_single(dev, &ins, input, true, debug);
256                 }
257         }
258 }
259 
260 static void
bit_fma_mscale_helper(struct panfrost_device * dev,uint32_t * input,enum bit_debug debug)261 bit_fma_mscale_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
262 {
263         bi_instruction ins = bit_ins(BI_FMA, 4, nir_type_float, 32);
264         ins.op.mscale = true;
265         ins.src_types[3] = nir_type_int32;
266         ins.src[2] = ins.src[3]; /* Not enough ports! */
267 
268         for (unsigned outmod = 0; outmod < 4; ++outmod) {
269                 for (unsigned inmod = 0; inmod < 8; ++inmod) {
270                         ins.outmod = outmod;
271                         ins.src_abs[0] = (inmod & 0x1);
272                         ins.src_neg[1] = (inmod & 0x2);
273                         ins.src_neg[2] = (inmod & 0x4);
274                         bit_test_single(dev, &ins, input, true, debug);
275                 }
276         }
277 }
278 
279 static void
bit_csel_helper(struct panfrost_device * dev,unsigned size,uint32_t * input,enum bit_debug debug)280 bit_csel_helper(struct panfrost_device *dev,
281                 unsigned size, uint32_t *input, enum bit_debug debug)
282 {
283         bi_instruction ins = bit_ins(BI_CSEL, 4, nir_type_uint, size);
284 
285         /* SCHEDULER: We can only read 3 registers at once. */
286         ins.src[2] = ins.src[0];
287 
288         for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
289                 ins.cond = cond;
290                 bit_test_single(dev, &ins, input, true, debug);
291         }
292 }
293 
294 static void
bit_special_helper(struct panfrost_device * dev,unsigned size,uint32_t * input,enum bit_debug debug)295 bit_special_helper(struct panfrost_device *dev,
296                 unsigned size, uint32_t *input, enum bit_debug debug)
297 {
298         bi_instruction ins = bit_ins(BI_SPECIAL_ADD, 2, nir_type_float, size);
299         uint32_t exp_input[4];
300 
301         for (enum bi_special_op op = BI_SPECIAL_FRCP; op <= BI_SPECIAL_EXP2_LOW; ++op) {
302                 if (op == BI_SPECIAL_EXP2_LOW) {
303                         /* exp2 only supported in fp32 mode */
304                         if (size != 32)
305                                 continue;
306 
307                         /* Give expected input */
308                         exp_input[1] = input[0];
309                         float *ff = (float *) input;
310                         exp_input[0] = (int) (ff[0] * (1 << 24));
311                 }
312 
313                 for (unsigned c = 0; c < ((size == 16) ? 2 : 1); ++c) {
314                         ins.op.special = op;
315                         ins.swizzle[0][0] = c;
316                         bit_test_single(dev, &ins,
317                                                 op == BI_SPECIAL_EXP2_LOW ? exp_input : input,
318                                                 false, debug);
319                 }
320         }
321 }
322 
323 static void
bit_table_helper(struct panfrost_device * dev,uint32_t * input,enum bit_debug debug)324 bit_table_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
325 {
326         bi_instruction ins = bit_ins(BI_TABLE, 1, nir_type_float, 32);
327 
328         for (enum bi_table_op op = 0; op <= BI_TABLE_LOG2_U_OVER_U_1_LOW; ++op) {
329                 ins.op.table = op;
330                 bit_test_single(dev, &ins, input, false, debug);
331         }
332 }
333 
334 static void
bit_frexp_helper(struct panfrost_device * dev,uint32_t * input,enum bit_debug debug)335 bit_frexp_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
336 {
337         bi_instruction ins = bit_ins(BI_FREXP, 1, nir_type_float, 32);
338         ins.dest_type = nir_type_int32;
339 
340         for (enum bi_frexp_op op = 0; op <= BI_FREXPE_LOG; ++op) {
341                 ins.op.frexp = op;
342                 bit_test_single(dev, &ins, input, true, debug);
343         }
344 }
345 
346 static void
bit_round_helper(struct panfrost_device * dev,uint32_t * input,unsigned sz,bool FMA,enum bit_debug debug)347 bit_round_helper(struct panfrost_device *dev, uint32_t *input, unsigned sz, bool FMA, enum bit_debug debug)
348 {
349         bi_instruction ins = bit_ins(BI_ROUND, 1, nir_type_float, sz);
350 
351         for (enum bifrost_roundmode mode = 0; mode <= 3; ++mode) {
352         BIT_FOREACH_SWIZZLE(swz, 1, sz) {
353                 bit_apply_swizzle(&ins, swz, 1, sz);
354                 ins.roundmode = mode;
355                 bit_test_single(dev, &ins, input, FMA, debug);
356         }
357         }
358 }
359 
360 static void
bit_reduce_helper(struct panfrost_device * dev,uint32_t * input,enum bit_debug debug)361 bit_reduce_helper(struct panfrost_device *dev, uint32_t *input, enum bit_debug debug)
362 {
363         bi_instruction ins = bit_ins(BI_REDUCE_FMA, 2, nir_type_float, 32);
364 
365         for (enum bi_reduce_op op = 0; op <= BI_REDUCE_ADD_FREXPM; ++op) {
366                 ins.op.reduce = op;
367                 bit_test_single(dev, &ins, input, true, debug);
368         }
369 }
370 
371 static void
bit_select_helper(struct panfrost_device * dev,uint32_t * input,unsigned size,enum bit_debug debug)372 bit_select_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
373 {
374         unsigned C = 32 / size;
375         bi_instruction ins = bit_ins(BI_SELECT, C, nir_type_uint, 32);
376 
377         for (unsigned c = 0; c < C; ++c)
378                 ins.src_types[c] = nir_type_uint | size;
379 
380         if (size == 8) {
381                 /* SCHEDULER: We can only read 3 registers at once. */
382                 ins.src[2] = ins.src[0];
383         }
384 
385         /* Each argument has swizzle {lo, hi} so 2^C options */
386         unsigned hi = (size == 16) ? 1 : 2;
387 
388         for (unsigned add = 0; add < ((size == 16) ? 2 : 1); ++add) {
389                 for (unsigned swizzle = 0; swizzle < (1 << C); ++swizzle) {
390                         for (unsigned i = 0; i < C; ++i)
391                                 ins.swizzle[i][0] = ((swizzle >> i) & 1) ? hi : 0;
392 
393                         bit_test_single(dev, &ins, input, !add, debug);
394                 }
395         }
396 }
397 
398 static void
bit_fcmp_helper(struct panfrost_device * dev,uint32_t * input,unsigned size,enum bit_debug debug,bool FMA)399 bit_fcmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
400 {
401         bi_instruction ins = bit_ins(BI_CMP, 2, nir_type_float, size);
402         ins.dest_type = nir_type_uint | size;
403 
404         /* 16-bit has swizzles and abs. 32-bit has abs/neg mods. */
405         unsigned max_mods = (size == 16) ? 64 : (size == 32) ? 16 : 1;
406 
407         for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
408                 for (unsigned mods = 0; mods < max_mods; ++mods) {
409                         ins.cond = cond;
410 
411                         if (size == 16) {
412                                 for (unsigned i = 0; i < 2; ++i) {
413                                         ins.swizzle[i][0] = ((mods >> (i * 2)) & 1) ? 1 : 0;
414                                         ins.swizzle[i][1] = ((mods >> (i * 2)) & 2) ? 1 : 0;
415                                 }
416 
417                                 ins.src_abs[0] = (mods & 16) ? true : false;
418                                 ins.src_abs[1] = (mods & 32) ? true : false;
419                         } else if (size == 8) {
420                                 for (unsigned i = 0; i < 2; ++i) {
421                                         for (unsigned j = 0; j < 4; ++j)
422                                                 ins.swizzle[i][j] = j;
423                                 }
424                         } else if (size == 32) {
425                                 ins.src_abs[0] = (mods & 1) ? true : false;
426                                 ins.src_abs[1] = (mods & 2) ? true : false;
427                                 ins.src_neg[0] = (mods & 4) ? true : false;
428                                 ins.src_neg[1] = (mods & 8) ? true : false;
429                         }
430 
431                         bit_test_single(dev, &ins, input, FMA, debug);
432                 }
433         }
434 }
435 
436 static void
bit_icmp_helper(struct panfrost_device * dev,uint32_t * input,unsigned size,nir_alu_type T,enum bit_debug debug)437 bit_icmp_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, nir_alu_type T, enum bit_debug debug)
438 {
439         bi_instruction ins = bit_ins(BI_CMP, 2, T, size);
440         ins.dest_type = nir_type_uint | size;
441 
442         for (enum bi_cond cond = BI_COND_LT; cond <= BI_COND_NE; ++cond) {
443         BIT_FOREACH_SWIZZLE(swz, 2, size) {
444                 ins.cond = cond;
445                 bit_apply_swizzle(&ins, swz, 2, size);
446                 bit_test_single(dev, &ins, input, false, debug);
447         }
448         }
449 }
450 
451 
452 
453 static void
bit_convert_helper(struct panfrost_device * dev,unsigned from_size,unsigned to_size,unsigned cx,unsigned cy,bool FMA,enum bifrost_roundmode roundmode,uint32_t * input,enum bit_debug debug)454 bit_convert_helper(struct panfrost_device *dev, unsigned from_size,
455                 unsigned to_size, unsigned cx, unsigned cy, bool FMA,
456                 enum bifrost_roundmode roundmode,
457                 uint32_t *input, enum bit_debug debug)
458 {
459         bi_instruction ins = {
460                 .type = BI_CONVERT,
461                 .dest = BIR_INDEX_REGISTER | 0,
462                 .src = { BIR_INDEX_REGISTER | 0 }
463         };
464 
465         nir_alu_type Ts[3] = { nir_type_float, nir_type_uint, nir_type_int };
466 
467         for (unsigned from_base = 0; from_base < 3; ++from_base) {
468                 for (unsigned to_base = 0; to_base < 3; ++to_base) {
469                         /* Discard invalid combinations.. */
470                         if ((from_size == to_size) && (from_base == to_base))
471                                 continue;
472 
473                         /* Can't switch signedness */
474                         if (from_base && to_base)
475                                 continue;
476 
477                         /* No F16_TO_I32, etc */
478                         if (from_size != to_size && from_base == 0 && to_base)
479                                 continue;
480 
481                         if (from_size != to_size && from_base && to_base == 0)
482                                 continue;
483 
484                         /* No need, just ignore the upper half */
485                         if (from_size > to_size && from_base == to_base && from_base)
486                                 continue;
487 
488                         ins.dest_type = Ts[to_base] | to_size;
489                         ins.src_types[0] = Ts[from_base] | from_size;
490                         ins.roundmode = roundmode;
491                         ins.swizzle[0][0] = cx;
492                         ins.swizzle[0][1] = cy;
493 
494                         if (to_size == 16 && from_size == 32) {
495                                 ins.src_types[1] = ins.src_types[0];
496                                 ins.src[1] = ins.src[0];
497                         } else {
498                                 ins.src[1] = ins.src_types[1] = 0;
499                         }
500 
501                         bit_test_single(dev, &ins, input, FMA, debug);
502                 }
503         }
504 }
505 
506 static void
bit_constant_helper(struct panfrost_device * dev,uint32_t * input,enum bit_debug debug)507 bit_constant_helper(struct panfrost_device *dev,
508                 uint32_t *input, enum bit_debug debug)
509 {
510         enum bi_class C[3] = { BI_MOV, BI_ADD, BI_FMA };
511 
512         for (unsigned doubled = 0; doubled < 2; ++doubled) {
513                 for (unsigned count = 1; count <= 3; ++count) {
514                         bi_instruction ins = bit_ins(C[count - 1], count, nir_type_float, 32);
515 
516                         ins.src[0] = BIR_INDEX_CONSTANT | 0;
517                         ins.src[1] = (count >= 2) ? BIR_INDEX_CONSTANT | (doubled ? 32 : 0) : 0;
518                         ins.src[2] = (count >= 3) ? BIR_INDEX_ZERO : 0;
519 
520                         ins.constant.u64 = doubled ?
521                                 0x3f800000ull | (0x3f000000ull << 32ull) :
522                                 0x3f800000ull;
523 
524                         bit_test_single(dev, &ins, input, true, debug);
525                 }
526         }
527 }
528 
529 static void
bit_swizzle_identity(bi_instruction * ins,unsigned args,unsigned size)530 bit_swizzle_identity(bi_instruction *ins, unsigned args, unsigned size)
531 {
532         for (unsigned i = 0; i < 2; ++i) {
533                 for (unsigned j = 0; j < (32 / size); ++j)
534                         ins->swizzle[i][j] = j;
535         }
536 }
537 
538 static void
bit_bitwise_helper(struct panfrost_device * dev,uint32_t * input,unsigned size,enum bit_debug debug)539 bit_bitwise_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug)
540 {
541         bi_instruction ins = bit_ins(BI_BITWISE, 3, nir_type_uint, size);
542         bit_swizzle_identity(&ins, 2, size);
543 
544         /* TODO: shifts */
545         ins.src[2] = BIR_INDEX_ZERO;
546         ins.src_types[2] = nir_type_uint8;
547 
548         for (unsigned op = BI_BITWISE_AND; op <= BI_BITWISE_XOR; ++op) {
549                 ins.op.bitwise = op;
550 
551                 for (unsigned mods = 0; mods < 4; ++mods) {
552                         ins.bitwise.dest_invert = mods & 1;
553                         ins.bitwise.src1_invert = mods & 2;
554 
555                         /* Skip out-of-spec combinations */
556                         if (ins.bitwise.src1_invert && op == BI_BITWISE_XOR)
557                                 continue;
558 
559                         bit_test_single(dev, &ins, input, true, debug);
560                 }
561         }
562 }
563 
564 static void
bit_imath_helper(struct panfrost_device * dev,uint32_t * input,unsigned size,enum bit_debug debug,bool FMA)565 bit_imath_helper(struct panfrost_device *dev, uint32_t *input, unsigned size, enum bit_debug debug, bool FMA)
566 {
567         bi_instruction ins = bit_ins(BI_IMATH, 2, nir_type_uint, size);
568         bit_swizzle_identity(&ins, 2, size);
569         ins.src[2] = BIR_INDEX_ZERO; /* carry/borrow for FMA */
570 
571         for (unsigned op = BI_IMATH_ADD; op <= BI_IMATH_SUB; ++op) {
572                 ins.op.imath = op;
573                 bit_test_single(dev, &ins, input, FMA, debug);
574         }
575 }
576 
577 void
bit_packing(struct panfrost_device * dev,enum bit_debug debug)578 bit_packing(struct panfrost_device *dev, enum bit_debug debug)
579 {
580         float input32[4];
581         uint16_t input16[8];
582 
583         bit_generate_float4(input32);
584         bit_generate_half8(input16);
585 
586         bit_constant_helper(dev, (uint32_t *) input32, debug);
587 
588         for (unsigned sz = 16; sz <= 32; sz *= 2) {
589                 uint32_t *input =
590                         (sz == 16) ? (uint32_t *) input16 :
591                         (uint32_t *) input32;
592 
593                 bit_fmod_helper(dev, BI_ADD, sz, true, input, debug, 0);
594                 bit_fmod_helper(dev, BI_ADD, sz, false, input, debug, 0);
595                 bit_round_helper(dev, (uint32_t *) input32, sz, true, debug);
596 
597                 bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MIN);
598                 bit_fmod_helper(dev, BI_MINMAX, sz, false, input, debug, BI_MINMAX_MAX);
599 
600                 bit_fma_helper(dev, sz, input, debug);
601                 bit_icmp_helper(dev, input, sz, nir_type_uint, debug);
602                 bit_icmp_helper(dev, input, sz, nir_type_int, debug);
603         }
604 
605         for (unsigned sz = 16; sz <= 32; sz *= 2)
606                 bit_csel_helper(dev, sz, (uint32_t *) input32, debug);
607 
608         float special[4] = { 0.9 };
609         uint32_t special16[4] = { _mesa_float_to_half(special[0]) | (_mesa_float_to_half(0.2) << 16) };
610 
611         bit_table_helper(dev, (uint32_t *) special, debug);
612 
613         for (unsigned sz = 16; sz <= 32; sz *= 2) {
614                 uint32_t *input =
615                         (sz == 16) ? special16 :
616                         (uint32_t *) special;
617 
618                 bit_special_helper(dev, sz, input, debug);
619         }
620 
621         for (unsigned rm = 0; rm < 4; ++rm) {
622                 bit_convert_helper(dev, 32, 32, 0, 0, false, rm, (uint32_t *) input32, debug);
623 
624                 for (unsigned c = 0; c < 2; ++c)
625                         bit_convert_helper(dev, 32, 16, c, 0, false, rm, (uint32_t *) input32, debug);
626 
627                 bit_convert_helper(dev, 16, 32, 0, 0, false, rm, (uint32_t *) input16, debug);
628 
629                 for (unsigned c = 0; c < 4; ++c)
630                         bit_convert_helper(dev, 16, 16, c & 1, c >> 1, false, rm, (uint32_t *) input16, debug);
631         }
632 
633         bit_frexp_helper(dev, (uint32_t *) input32, debug);
634         bit_reduce_helper(dev, (uint32_t *) input32, debug);
635 
636         uint32_t mscale_input[4];
637         memcpy(mscale_input, input32, sizeof(input32));
638         mscale_input[3] = 0x7;
639         bit_fma_mscale_helper(dev, mscale_input, debug);
640 
641         for (unsigned sz = 8; sz <= 16; sz *= 2) {
642                 bit_select_helper(dev, (uint32_t *) input32, sz, debug);
643         }
644 
645         bit_fcmp_helper(dev, (uint32_t *) input32, 32, debug, true);
646         bit_fcmp_helper(dev, (uint32_t *) input32, 16, debug, true);
647 
648         for (unsigned sz = 8; sz <= 32; sz *= 2) {
649                 bit_bitwise_helper(dev, (uint32_t *) input32, sz, debug);
650                 bit_imath_helper(dev, (uint32_t *) input32, sz, debug, false);
651         }
652 
653         bit_imath_helper(dev, (uint32_t *) input32, 32, debug, true);
654 }
655