1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include <math.h>
28 #include "bit.h"
29 #include "util/half_float.h"
30 
31 typedef union {
32         uint64_t u64;
33         uint32_t u32;
34         uint16_t u16[2];
35         uint8_t u8[4];
36         int64_t i64;
37         int32_t i32;
38         int16_t i16[2];
39         int8_t i8[4];
40         double f64;
41         float f32;
42         uint16_t f16[2];
43 } bit_t;
44 
45 /* Interprets a subset of Bifrost IR required for automated testing */
46 
47 static uint64_t
bit_read(struct bit_state * s,bi_instruction * ins,unsigned index,nir_alu_type T,bool FMA)48 bit_read(struct bit_state *s, bi_instruction *ins, unsigned index, nir_alu_type T, bool FMA)
49 {
50         if (index & BIR_INDEX_REGISTER) {
51                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
52                 assert(reg < 64);
53                 return s->r[reg];
54         } else if (index & BIR_INDEX_UNIFORM) {
55                 unreachable("Uniform registers to be implemented");
56         } else if (index & BIR_INDEX_CONSTANT) {
57                 return ins->constant.u64 >> (index & ~BIR_INDEX_CONSTANT);
58         } else if (index & BIR_INDEX_ZERO) {
59                 return 0;
60         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_STAGE)) {
61                 return FMA ? 0 : s->T;
62         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_FMA)) {
63                 return s->T0;
64         } else if (index & (BIR_INDEX_PASS | BIFROST_SRC_PASS_ADD)) {
65                 return s->T1;
66         } else if (!index) {
67                 /* Placeholder */
68                 return 0;
69         } else {
70                 unreachable("Invalid source");
71         }
72 }
73 
74 static void
bit_write(struct bit_state * s,unsigned index,nir_alu_type T,bit_t value,bool FMA)75 bit_write(struct bit_state *s, unsigned index, nir_alu_type T, bit_t value, bool FMA)
76 {
77         /* Always write stage passthrough */
78         if (FMA)
79                 s->T = value.u32;
80 
81         if (index & BIR_INDEX_REGISTER) {
82                 uint32_t reg = index & ~BIR_INDEX_REGISTER;
83                 assert(reg < 64);
84                 s->r[reg] = value.u32;
85         } else if (!index) {
86                 /* Nothing to do */
87         } else {
88                 unreachable("Invalid destination");
89         }
90 }
91 
92 #define bh _mesa_float_to_half
93 #define bf _mesa_half_to_float
94 
95 #define bv2f16(fxn) \
96         for (unsigned c = 0; c < 2; ++c) { \
97                 dest.f16[c] = bh(fxn(bf(srcs[0].f16[ins->swizzle[0][c]]), \
98                                         bf(srcs[1].f16[ins->swizzle[1][c]]), \
99                                         bf(srcs[2].f16[ins->swizzle[2][c]]), \
100                                         bf(srcs[3].f16[ins->swizzle[3][c]]))); \
101         }
102 
103 #define bv2i16(fxn) \
104         for (unsigned c = 0; c < 2; ++c) { \
105                 dest.f16[c] = fxn(srcs[0].u16[ins->swizzle[0][c]], \
106                                         srcs[1].u16[ins->swizzle[1][c]], \
107                                         srcs[2].u16[ins->swizzle[2][c]], \
108                                         srcs[3].u16[ins->swizzle[3][c]]); \
109         }
110 
111 #define bv4i8(fxn) \
112         for (unsigned c = 0; c < 4; ++c) { \
113                 dest.u8[c] = fxn(srcs[0].u8[ins->swizzle[0][c]], \
114                                         srcs[1].u8[ins->swizzle[1][c]], \
115                                         srcs[2].u8[ins->swizzle[2][c]], \
116                                         srcs[3].u8[ins->swizzle[3][c]]); \
117         }
118 
119 #define bf32(fxn) dest.f32 = fxn(srcs[0].f32, srcs[1].f32, srcs[2].f32, srcs[3].f32)
120 #define bi32(fxn) dest.i32 = fxn(srcs[0].u32, srcs[1].u32, srcs[2].u32, srcs[3].i32)
121 
122 #define bfloat(fxn64, fxn32) \
123         if (ins->dest_type == nir_type_float64) { \
124                 unreachable("TODO: 64-bit"); \
125         } else if (ins->dest_type == nir_type_float32) { \
126                 bf32(fxn64); \
127                 break; \
128         } else if (ins->dest_type == nir_type_float16) { \
129                 bv2f16(fxn32); \
130                 break; \
131         }
132 
133 #define bint(fxn64, fxn32, fxn16, fxn8) \
134         if (ins->dest_type == nir_type_int64 || ins->dest_type == nir_type_uint64) { \
135                 unreachable("TODO: 64-bit"); \
136         } else if (ins->dest_type == nir_type_int32 || ins->dest_type == nir_type_uint32) { \
137                 bi32(fxn32); \
138                 break; \
139         } else if (ins->dest_type == nir_type_int16 || ins->dest_type == nir_type_uint16) { \
140                 bv2i16(fxn16); \
141                 break; \
142         } else if (ins->dest_type == nir_type_int8 || ins->dest_type == nir_type_uint8) { \
143                 bv4i8(fxn8); \
144                 break; \
145         }
146 
147 #define bpoly(name) \
148         bfloat(bit_f64 ## name, bit_f32 ## name); \
149         bint(bit_i64 ## name, bit_i32 ## name, bit_i16 ## name, bit_i8 ## name); \
150         unreachable("Invalid type");
151 
152 #define bit_make_float_2(name, expr32, expr64) \
153         static inline double \
154         bit_f64 ## name(double a, double b, double c, double d) \
155         { \
156                 return expr64; \
157         } \
158         static inline float \
159         bit_f32 ## name(float a, float b, float c, float d) \
160         { \
161                 return expr32; \
162         } \
163 
164 #define bit_make_float(name, expr) \
165         bit_make_float_2(name, expr, expr)
166 
167 #define bit_make_int(name, expr) \
168         static inline int64_t \
169         bit_i64 ## name (int64_t a, int64_t b, int64_t c, int64_t d) \
170         { \
171                 return expr; \
172         } \
173         \
174         static inline int32_t \
175         bit_i32 ## name (int32_t a, int32_t b, int32_t c, int32_t d) \
176         { \
177                 return expr; \
178         } \
179         \
180         static inline int16_t \
181         bit_i16 ## name (int16_t a, int16_t b, int16_t c, int16_t d) \
182         { \
183                 return expr; \
184         } \
185         \
186         static inline int8_t \
187         bit_i8 ## name (int8_t a, int8_t b, int8_t c, int8_t d) \
188         { \
189                 return expr; \
190         } \
191 
192 #define bit_make_poly(name, expr) \
193         bit_make_float(name, expr) \
194         bit_make_int(name, expr) \
195 
196 bit_make_poly(add, a + b);
197 bit_make_int(sub, a - b);
198 bit_make_float(fma, (a * b) + c);
199 bit_make_poly(mov, a);
200 bit_make_poly(min, MIN2(a, b));
201 bit_make_poly(max, MAX2(a, b));
202 bit_make_float_2(floor, floorf(a), floor(a));
203 bit_make_float_2(ceil,  ceilf(a), ceil(a));
204 bit_make_float_2(trunc, truncf(a), trunc(a));
205 bit_make_float_2(nearbyint, nearbyintf(a), nearbyint(a));
206 
207 /* Modifiers */
208 
209 static float
bit_outmod(float raw,enum bifrost_outmod mod)210 bit_outmod(float raw, enum bifrost_outmod mod)
211 {
212         switch (mod) {
213         case BIFROST_POS:
214                 return MAX2(raw, 0.0);
215         case BIFROST_SAT_SIGNED:
216                 return CLAMP(raw, -1.0, 1.0);
217         case BIFROST_SAT:
218                 return SATURATE(raw);
219         default:
220                 return raw;
221         }
222 }
223 
224 static float
bit_srcmod(float raw,bool abs,bool neg)225 bit_srcmod(float raw, bool abs, bool neg)
226 {
227         if (abs)
228                 raw = fabs(raw);
229 
230         if (neg)
231                 raw = -raw;
232 
233         return raw;
234 }
235 
236 #define BIT_COND(cond, left, right) \
237         if (cond == BI_COND_LT) return left < right; \
238         else if (cond == BI_COND_LE) return left <= right; \
239         else if (cond == BI_COND_GE) return left >= right; \
240         else if (cond == BI_COND_GT) return left > right; \
241         else if (cond == BI_COND_EQ) return left == right; \
242         else if (cond == BI_COND_NE) return left != right; \
243         else { return true; }
244 
245 static bool
bit_eval_cond(enum bi_cond cond,bit_t l,bit_t r,nir_alu_type T,unsigned cl,unsigned cr)246 bit_eval_cond(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr)
247 {
248         if (T == nir_type_float32) {
249                 BIT_COND(cond, l.f32, r.f32);
250         } else if (T == nir_type_float16) {
251                 float left = bf(l.f16[cl]);
252                 float right = bf(r.f16[cr]);
253                 BIT_COND(cond, left, right);
254         } else if (T == nir_type_int32) {
255                 int32_t left = l.u32;
256                 int32_t right = r.u32;
257                 BIT_COND(cond, left, right);
258         } else if (T == nir_type_int16) {
259                 int16_t left = l.i16[cl];
260                 int16_t right = r.i16[cr];
261                 BIT_COND(cond, left, right);
262         } else if (T == nir_type_uint32) {
263                 BIT_COND(cond, l.u32, r.u32);
264         } else if (T == nir_type_uint16) {
265                 BIT_COND(cond, l.u16[cl], r.u16[cr]);
266         } else {
267                 unreachable("Unknown type evaluated");
268         }
269 }
270 
271 static unsigned
bit_cmp(enum bi_cond cond,bit_t l,bit_t r,nir_alu_type T,unsigned cl,unsigned cr,bool d3d)272 bit_cmp(enum bi_cond cond, bit_t l, bit_t r, nir_alu_type T, unsigned cl, unsigned cr, bool d3d)
273 {
274         bool v = bit_eval_cond(cond, l, r, T, cl, cr);
275 
276         /* Fill for D3D but only up to 32-bit... 64-bit is only partial
277          * (although we probably need a cleverer representation for 64-bit) */
278 
279         unsigned sz = MIN2(nir_alu_type_get_type_size(T), 32);
280         unsigned max = (sz == 32) ? (~0) : ((1 << sz) - 1);
281 
282         return v ? (d3d ? max : 1) : 0;
283 }
284 
285 static float
biti_special(float Q,enum bi_special_op op)286 biti_special(float Q, enum bi_special_op op)
287 {
288         switch (op) {
289         case BI_SPECIAL_FRCP: return 1.0 / Q;
290         case BI_SPECIAL_FRSQ: {
291               double Qf = 1.0 / sqrt(Q);
292               return Qf;
293         }
294         default: unreachable("Invalid special");
295         }
296 }
297 
298 /* For BI_CONVERT. */
299 
300 #define _AS_ROUNDMODE(mode) \
301         ((mode == BIFROST_RTZ) ? FP_INT_TOWARDZERO : \
302         (mode == BIFROST_RTE) ? FP_INT_TONEAREST : \
303         (mode == BIFROST_RTN) ? FP_INT_DOWNWARD : \
304         FP_INT_UPWARD)
305 
306 static float
bit_as_float32(nir_alu_type T,bit_t src,unsigned C)307 bit_as_float32(nir_alu_type T, bit_t src, unsigned C)
308 {
309         switch (T) {
310         case nir_type_int32:   return src.i32;
311         case nir_type_uint32:  return src.u32;
312         case nir_type_float16: return bf(src.u16[C]);
313         default: unreachable("Invalid");
314         }
315 }
316 
317 static uint32_t
bit_as_uint32(nir_alu_type T,bit_t src,unsigned C,enum bifrost_roundmode rm)318 bit_as_uint32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
319 {
320         switch (T) {
321         case nir_type_float16: return bf(src.u16[C]);
322         case nir_type_float32: return ufromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
323         default: unreachable("Invalid");
324         }
325 }
326 
327 static int32_t
bit_as_int32(nir_alu_type T,bit_t src,unsigned C,enum bifrost_roundmode rm)328 bit_as_int32(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
329 {
330         switch (T) {
331         case nir_type_float16: return bf(src.u16[C]);
332         case nir_type_float32: return fromfpf(src.f32, _AS_ROUNDMODE(rm), 32);
333         default: unreachable("Invalid");
334         }
335 }
336 
337 static uint16_t
bit_as_float16(nir_alu_type T,bit_t src,unsigned C)338 bit_as_float16(nir_alu_type T, bit_t src, unsigned C)
339 {
340         switch (T) {
341         case nir_type_int32:   return bh(src.i32);
342         case nir_type_uint32:  return bh(src.u32);
343         case nir_type_float32: return bh(src.f32);
344         case nir_type_int16:   return bh(src.i16[C]);
345         case nir_type_uint16:  return bh(src.u16[C]);
346         default: unreachable("Invalid");
347         }
348 }
349 
350 static uint16_t
bit_as_uint16(nir_alu_type T,bit_t src,unsigned C,enum bifrost_roundmode rm)351 bit_as_uint16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
352 {
353         switch (T) {
354         case nir_type_int32:   return src.i32;
355         case nir_type_uint32:  return src.u32;
356         case nir_type_float16: return ufromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
357         case nir_type_float32: return src.f32;
358         default: unreachable("Invalid");
359         }
360 }
361 
362 static int16_t
bit_as_int16(nir_alu_type T,bit_t src,unsigned C,enum bifrost_roundmode rm)363 bit_as_int16(nir_alu_type T, bit_t src, unsigned C, enum bifrost_roundmode rm)
364 {
365         switch (T) {
366         case nir_type_int32:   return src.i32;
367         case nir_type_uint32:  return src.u32;
368         case nir_type_float16: return fromfpf(bf(src.u16[C]), _AS_ROUNDMODE(rm), 16);
369         case nir_type_float32: return src.f32;
370         default: unreachable("Invalid");
371         }
372 }
373 
374 static float
frexp_log(float x,int * e)375 frexp_log(float x, int *e)
376 {
377         /* Ignore sign until end */
378         float xa = fabs(x);
379 
380         /* frexp reduces to [0.5, 1) */
381         float f = frexpf(xa, e);
382 
383         /* reduce to [0.75, 1.5) */
384         if (f < 0.75) {
385                 f *= 2.0;
386                 (*e)--;
387         }
388 
389         /* Reattach sign */
390         if (xa < 0.0)
391                 f = -f;
392 
393         return f;
394 }
395 
396 void
bit_step(struct bit_state * s,bi_instruction * ins,bool FMA)397 bit_step(struct bit_state *s, bi_instruction *ins, bool FMA)
398 {
399         /* First, load sources */
400         bit_t srcs[BIR_SRC_COUNT] = { 0 };
401 
402         bi_foreach_src(ins, src)
403                 srcs[src].u64 = bit_read(s, ins, ins->src[src], ins->src_types[src], FMA);
404 
405         /* Apply source modifiers if we need to */
406         if (bi_has_source_mods(ins)) {
407                 bi_foreach_src(ins, src) {
408                         if (ins->src_types[src] == nir_type_float16) {
409                                 for (unsigned c = 0; c < 2; ++c) {
410                                         srcs[src].f16[c] = bh(bit_srcmod(bf(srcs[src].f16[c]),
411                                                         ins->src_abs[src],
412                                                         ins->src_neg[src]));
413                                 }
414                         } else if (ins->src_types[src] == nir_type_float32) {
415                                 srcs[src].f32 = bit_srcmod(srcs[src].f32,
416                                                         ins->src_abs[src],
417                                                         ins->src_neg[src]);
418                         }
419                 }
420         }
421 
422         /* Next, do the action of the instruction */
423         bit_t dest = { 0 };
424 
425         switch (ins->type) {
426         case BI_ADD:
427                 bpoly(add);
428 
429         case BI_BRANCH:
430                 unreachable("Unsupported op");
431 
432         case BI_CMP: {
433                 nir_alu_type T = ins->src_types[0];
434                 unsigned sz = nir_alu_type_get_type_size(T);
435 
436                 if (sz == 32 || sz == 64) {
437                         dest.u32 = bit_cmp(ins->cond, srcs[0], srcs[1], T, 0, 0, true);
438                 } else if (sz == 16) {
439                         for (unsigned c = 0; c < 2; ++c) {
440                                 dest.u16[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
441                                                 T, ins->swizzle[0][c], ins->swizzle[1][c],
442                                                 true);
443                         }
444                 } else if (sz == 8) {
445                         for (unsigned c = 0; c < 4; ++c) {
446                                 dest.u8[c] = bit_cmp(ins->cond, srcs[0], srcs[1],
447                                                 T, ins->swizzle[0][c], ins->swizzle[1][c],
448                                                 true);
449                         }
450                 } else {
451                         unreachable("Invalid");
452                 }
453 
454                 break;
455         }
456 
457         case BI_BITWISE: {
458                 /* Apply inverts first */
459                 if (ins->bitwise.src1_invert)
460                         srcs[1].u64 = ~srcs[1].u64;
461 
462                 /* TODO: Shifting */
463                 assert(srcs[2].u32 == 0);
464 
465                 if (ins->op.bitwise == BI_BITWISE_AND)
466                         dest.u64 = srcs[0].u64 & srcs[1].u64;
467                 else if (ins->op.bitwise == BI_BITWISE_OR)
468                         dest.u64 = srcs[0].u64 | srcs[1].u64;
469                 else if (ins->op.bitwise == BI_BITWISE_XOR)
470                         dest.u64 = srcs[0].u64 ^ srcs[1].u64;
471                 else
472                         unreachable("Unsupported op");
473 
474                 if (ins->bitwise.dest_invert)
475                         dest.u64 = ~dest.u64;
476 
477                 break;
478          }
479 
480         case BI_CONVERT: {
481                 /* If it exists */
482                 unsigned comp = ins->swizzle[0][1];
483 
484                 if (ins->dest_type == nir_type_float32)
485                         dest.f32 = bit_as_float32(ins->src_types[0], srcs[0], comp);
486                 else if (ins->dest_type == nir_type_uint32)
487                         dest.u32 = bit_as_uint32(ins->src_types[0], srcs[0], comp, ins->roundmode);
488                 else if (ins->dest_type == nir_type_int32)
489                         dest.i32 = bit_as_int32(ins->src_types[0], srcs[0], comp, ins->roundmode);
490                 else if (ins->dest_type == nir_type_float16) {
491                         dest.u16[0] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][0]);
492                         dest.u16[1] = bit_as_float16(ins->src_types[0], srcs[0], ins->swizzle[0][1]);
493                 } else if (ins->dest_type == nir_type_uint16) {
494                         dest.u16[0] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
495                         dest.u16[1] = bit_as_uint16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
496                 } else if (ins->dest_type == nir_type_int16) {
497                         dest.i16[0] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][0], ins->roundmode);
498                         dest.i16[1] = bit_as_int16(ins->src_types[0], srcs[0], ins->swizzle[0][1], ins->roundmode);
499                 } else {
500                         unreachable("Unknown convert type");
501                 }
502 
503                 break;
504         }
505 
506         case BI_CSEL: {
507                 bool direct = ins->cond == BI_COND_ALWAYS;
508                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[0]);
509 
510                 if (sz == 32) {
511                         bool cond = direct ? srcs[0].u32 :
512                                 bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], 0, 0);
513 
514                         dest = cond ? srcs[2] : srcs[3];
515                 } else if (sz == 16) {
516                         for (unsigned c = 0; c < 2; ++c) {
517                                 bool cond = direct ? srcs[0].u16[c] :
518                                         bit_eval_cond(ins->cond, srcs[0], srcs[1], ins->src_types[0], c, c);
519 
520                                 dest.u16[c] = cond ? srcs[2].u16[c] : srcs[3].u16[c];
521                         }
522                 } else {
523                         unreachable("Remaining types todo");
524                 }
525 
526                 break;
527         }
528 
529         case BI_FMA: {
530                 bfloat(bit_f64fma, bit_f32fma);
531                 unreachable("Unknown type");
532         }
533 
534         case BI_FREXP: {
535                 if (ins->src_types[0] != nir_type_float32)
536                         unreachable("Unknown frexp type");
537 
538 
539                 if (ins->op.frexp == BI_FREXPE_LOG)
540                         frexp_log(srcs[0].f32, &dest.i32);
541                 else
542                         unreachable("Unknown frexp");
543 
544                 break;
545         }
546 
547         case BI_IMATH: {
548                 if (ins->op.imath == BI_IMATH_ADD) {
549                         bint(bit_i64add, bit_i32add, bit_i16add, bit_i8add);
550                 } else if (ins->op.imath == BI_IMATH_SUB) {
551                         bint(bit_i64sub, bit_i32sub, bit_i16sub, bit_i8sub);
552                 } else {
553                         unreachable("Unsupported op");
554                 }
555 
556                 break;
557         }
558 
559         case BI_MINMAX: {
560                 if (ins->op.minmax == BI_MINMAX_MIN) {
561                         bpoly(min);
562                 } else {
563                         bpoly(max);
564                 }
565         }
566 
567         case BI_MOV:
568                 bpoly(mov);
569 
570         case BI_REDUCE_FMA: {
571                 if (ins->src_types[0] != nir_type_float32)
572                         unreachable("Unknown reduce type");
573 
574                 if (ins->op.reduce == BI_REDUCE_ADD_FREXPM) {
575                         int _nop = 0;
576                         float f = frexp_log(srcs[1].f32, &_nop);
577                         dest.f32 = srcs[0].f32 + f;
578                 } else {
579                         unreachable("Unknown reduce");
580                 }
581 
582                 break;
583         }
584 
585         case BI_SPECIAL_FMA:
586         case BI_SPECIAL_ADD: {
587                 assert(nir_alu_type_get_base_type(ins->dest_type) == nir_type_float);
588                 assert(ins->dest_type != nir_type_float64);
589 
590                 if (ins->op.special == BI_SPECIAL_EXP2_LOW) {
591                         assert(ins->dest_type == nir_type_float32);
592                         dest.f32 = exp2f(srcs[1].f32);
593                         break;
594                 }
595 
596                 float Q = (ins->dest_type == nir_type_float16) ?
597                         bf(srcs[0].u16[ins->swizzle[0][0]]) :
598                         srcs[0].f32;
599 
600                 float R = biti_special(Q, ins->op.special);
601 
602                 if (ins->dest_type == nir_type_float16) {
603                         dest.f16[0] = bh(R);
604 
605                         if (!ins->swizzle[0][0] && ins->op.special == BI_SPECIAL_FRSQ) {
606                                 /* Sorry. */
607                                 dest.f16[0]++;
608                         }
609                 } else {
610                         dest.f32 = R;
611                 }
612                 break;
613         }
614 
615         case BI_TABLE: {
616                 if (ins->op.table == BI_TABLE_LOG2_U_OVER_U_1_LOW) {
617                         assert(ins->dest_type == nir_type_float32);
618                         int _nop = 0;
619                         float f = frexp_log(srcs[0].f32, &_nop);
620                         dest.f32 = log2f(f) / (f - 1.0);
621                         dest.u32++; /* Sorry. */
622                 } else {
623                         unreachable("Unknown table op");
624                 }
625                 break;
626        }
627 
628         case BI_SELECT: {
629                 if (ins->src_types[0] == nir_type_uint16) {
630                         for (unsigned c = 0; c < 2; ++c)
631                                 dest.u16[c] = srcs[c].u16[ins->swizzle[c][0]];
632                 } else if (ins->src_types[0] == nir_type_uint8) {
633                         for (unsigned c = 0; c < 4; ++c)
634                                 dest.u8[c] = srcs[c].u8[ins->swizzle[c][0]];
635                 } else {
636                         unreachable("Unknown type");
637                 }
638                 break;
639         }
640 
641         case BI_ROUND: {
642                 if (ins->roundmode == BIFROST_RTP) {
643                         bfloat(bit_f64ceil, bit_f32ceil);
644                 } else if (ins->roundmode == BIFROST_RTN) {
645                         bfloat(bit_f64floor, bit_f32floor);
646                 } else if (ins->roundmode == BIFROST_RTE) {
647                         bfloat(bit_f64nearbyint, bit_f32nearbyint);
648                 } else if (ins->roundmode == BIFROST_RTZ) {
649                         bfloat(bit_f64trunc, bit_f32trunc);
650                 } else
651                         unreachable("Invalid");
652 
653                 break;
654         }
655 
656         /* We only interpret vertex shaders */
657         case BI_DISCARD:
658         case BI_LOAD_VAR:
659         case BI_ATEST:
660         case BI_BLEND:
661                 unreachable("Fragment op used in interpreter");
662 
663         /* Modeling main memory is more than I bargained for */
664         case BI_LOAD_UNIFORM:
665         case BI_LOAD_ATTR:
666         case BI_LOAD_VAR_ADDRESS:
667         case BI_LOAD:
668         case BI_STORE:
669         case BI_STORE_VAR:
670         case BI_TEXS:
671         case BI_TEXC:
672         case BI_TEXC_DUAL:
673                 unreachable("Unsupported I/O in interpreter");
674 
675         default:
676                 unreachable("Unsupported op");
677         }
678 
679         /* Apply _MSCALE */
680         if ((ins->type == BI_FMA || ins->type == BI_ADD) && ins->op.mscale) {
681                 unsigned idx = (ins->type == BI_FMA) ? 3 : 2;
682 
683                 assert(ins->src_types[idx] == nir_type_int32);
684                 assert(ins->dest_type == nir_type_float32);
685 
686                 int32_t scale = srcs[idx].i32;
687                 dest.f32 *= exp2f(scale);
688         }
689 
690         /* Apply outmod */
691         if (bi_has_outmod(ins) && ins->outmod != BIFROST_NONE) {
692                 if (ins->dest_type == nir_type_float16) {
693                         for (unsigned c = 0; c < 2; ++c)
694                                 dest.f16[c] = bh(bit_outmod(bf(dest.f16[c]), ins->outmod));
695                 } else {
696                         dest.f32 = bit_outmod(dest.f32, ins->outmod);
697                 }
698         }
699 
700         /* Finally, store the result */
701         bit_write(s, ins->dest, ins->dest_type, dest, FMA);
702 
703         /* For ADD - change out the passthrough */
704         if (!FMA) {
705                 s->T0 = s->T;
706                 s->T1 = dest.u32;
707         }
708 }
709 
710 #undef bh
711 #undef bf
712