1 /*
2  * Copyright (C) 2018-2019 Alyssa Rosenzweig <alyssa@rosenzweig.io>
3  * Copyright (C) 2019-2020 Collabora, Ltd.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  */
24 
25 #include "compiler.h"
26 #include "midgard_ops.h"
27 #include "midgard_quirks.h"
28 
29 static midgard_int_mod
mir_get_imod(bool shift,nir_alu_type T,bool half,bool scalar)30 mir_get_imod(bool shift, nir_alu_type T, bool half, bool scalar)
31 {
32         if (!half) {
33                 assert(!shift);
34                 /* Sign-extension, really... */
35                 return scalar ? 0 : midgard_int_normal;
36         }
37 
38         if (shift)
39                 return midgard_int_shift;
40 
41         if (nir_alu_type_get_base_type(T) == nir_type_int)
42                 return midgard_int_sign_extend;
43         else
44                 return midgard_int_zero_extend;
45 }
46 
47 unsigned
mir_pack_mod(midgard_instruction * ins,unsigned i,bool scalar)48 mir_pack_mod(midgard_instruction *ins, unsigned i, bool scalar)
49 {
50         bool integer = midgard_is_integer_op(ins->op);
51         unsigned base_size = max_bitsize_for_alu(ins);
52         unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
53         bool half = (sz == (base_size >> 1));
54 
55         return integer ?
56                 mir_get_imod(ins->src_shift[i], ins->src_types[i], half, scalar) :
57                 ((ins->src_abs[i] << 0) |
58                  ((ins->src_neg[i] << 1)));
59 }
60 
61 /* Midgard IR only knows vector ALU types, but we sometimes need to actually
62  * use scalar ALU instructions, for functional or performance reasons. To do
63  * this, we just demote vector ALU payloads to scalar. */
64 
65 static int
component_from_mask(unsigned mask)66 component_from_mask(unsigned mask)
67 {
68         for (int c = 0; c < 8; ++c) {
69                 if (mask & (1 << c))
70                         return c;
71         }
72 
73         assert(0);
74         return 0;
75 }
76 
77 static unsigned
mir_pack_scalar_source(unsigned mod,bool is_full,unsigned component)78 mir_pack_scalar_source(unsigned mod, bool is_full, unsigned component)
79 {
80         midgard_scalar_alu_src s = {
81                 .mod = mod,
82                 .full = is_full,
83                 .component = component << (is_full ? 1 : 0)
84         };
85 
86         unsigned o;
87         memcpy(&o, &s, sizeof(s));
88 
89         return o & ((1 << 6) - 1);
90 }
91 
92 static midgard_scalar_alu
vector_to_scalar_alu(midgard_vector_alu v,midgard_instruction * ins)93 vector_to_scalar_alu(midgard_vector_alu v, midgard_instruction *ins)
94 {
95         bool is_full = nir_alu_type_get_type_size(ins->dest_type) == 32;
96 
97         bool half_0 = nir_alu_type_get_type_size(ins->src_types[0]) == 16;
98         bool half_1 = nir_alu_type_get_type_size(ins->src_types[1]) == 16;
99         unsigned comp = component_from_mask(ins->mask);
100 
101         unsigned packed_src[2] = {
102                 mir_pack_scalar_source(mir_pack_mod(ins, 0, true), !half_0, ins->swizzle[0][comp]),
103                 mir_pack_scalar_source(mir_pack_mod(ins, 1, true), !half_1, ins->swizzle[1][comp])
104         };
105 
106         /* The output component is from the mask */
107         midgard_scalar_alu s = {
108                 .op = v.op,
109                 .src1 = packed_src[0],
110                 .src2 = packed_src[1],
111                 .unknown = 0,
112                 .outmod = v.outmod,
113                 .output_full = is_full,
114                 .output_component = comp
115         };
116 
117         /* Full components are physically spaced out */
118         if (is_full) {
119                 assert(s.output_component < 4);
120                 s.output_component <<= 1;
121         }
122 
123         /* Inline constant is passed along rather than trying to extract it
124          * from v */
125 
126         if (ins->has_inline_constant) {
127                 uint16_t imm = 0;
128                 int lower_11 = ins->inline_constant & ((1 << 12) - 1);
129                 imm |= (lower_11 >> 9) & 3;
130                 imm |= (lower_11 >> 6) & 4;
131                 imm |= (lower_11 >> 2) & 0x38;
132                 imm |= (lower_11 & 63) << 6;
133 
134                 s.src2 = imm;
135         }
136 
137         return s;
138 }
139 
140 /* 64-bit swizzles are super easy since there are 2 components of 2 components
141  * in an 8-bit field ... lots of duplication to go around!
142  *
143  * Swizzles of 32-bit vectors accessed from 64-bit instructions are a little
144  * funny -- pack them *as if* they were native 64-bit, using rep_* flags to
145  * flag upper. For instance, xy would become 64-bit XY but that's just xyzw
146  * native. Likewise, zz would become 64-bit XX with rep* so it would be xyxy
147  * with rep. Pretty nifty, huh? */
148 
149 static unsigned
mir_pack_swizzle_64(unsigned * swizzle,unsigned max_component)150 mir_pack_swizzle_64(unsigned *swizzle, unsigned max_component)
151 {
152         unsigned packed = 0;
153 
154         for (unsigned i = 0; i < 2; ++i) {
155                 assert(swizzle[i] <= max_component);
156 
157                 unsigned a = (swizzle[i] & 1) ?
158                         (COMPONENT_W << 2) | COMPONENT_Z :
159                         (COMPONENT_Y << 2) | COMPONENT_X;
160 
161                 packed |= a << (i * 4);
162         }
163 
164         return packed;
165 }
166 
167 static void
mir_pack_mask_alu(midgard_instruction * ins,midgard_vector_alu * alu)168 mir_pack_mask_alu(midgard_instruction *ins, midgard_vector_alu *alu)
169 {
170         unsigned effective = ins->mask;
171 
172         /* If we have a destination override, we need to figure out whether to
173          * override to the lower or upper half, shifting the effective mask in
174          * the latter, so AAAA.... becomes AAAA */
175 
176         unsigned inst_size = max_bitsize_for_alu(ins);
177         signed upper_shift = mir_upper_override(ins, inst_size);
178 
179         if (upper_shift >= 0) {
180                 effective >>= upper_shift;
181                 alu->dest_override = upper_shift ?
182                         midgard_dest_override_upper :
183                         midgard_dest_override_lower;
184         } else {
185                 alu->dest_override = midgard_dest_override_none;
186         }
187 
188         if (inst_size == 32)
189                 alu->mask = expand_writemask(effective, 2);
190         else if (inst_size == 64)
191                 alu->mask = expand_writemask(effective, 1);
192         else
193                 alu->mask = effective;
194 }
195 
196 static unsigned
mir_pack_swizzle(unsigned mask,unsigned * swizzle,nir_alu_type T,midgard_reg_mode reg_mode,bool op_channeled,bool * rep_low,bool * rep_high)197 mir_pack_swizzle(unsigned mask, unsigned *swizzle,
198                 nir_alu_type T, midgard_reg_mode reg_mode,
199                 bool op_channeled, bool *rep_low, bool *rep_high)
200 {
201         unsigned packed = 0;
202         unsigned sz = nir_alu_type_get_type_size(T);
203 
204         if (reg_mode == midgard_reg_mode_64) {
205                 assert(sz == 64 || sz == 32);
206                 unsigned components = (sz == 32) ? 4 : 2;
207 
208                 packed = mir_pack_swizzle_64(swizzle, components);
209 
210                 if (sz == 32) {
211                         bool lo = swizzle[0] >= COMPONENT_Z;
212                         bool hi = swizzle[1] >= COMPONENT_Z;
213 
214                         if (mask & 0x1) {
215                                 /* We can't mix halves... */
216                                 if (mask & 2)
217                                         assert(lo == hi);
218 
219                                 *rep_low = lo;
220                         } else {
221                                 *rep_low = hi;
222                         }
223                 } else if (sz < 32) {
224                         unreachable("Cannot encode 8/16 swizzle in 64-bit");
225                 }
226         } else {
227                 /* For 32-bit, swizzle packing is stupid-simple. For 16-bit,
228                  * the strategy is to check whether the nibble we're on is
229                  * upper or lower. We need all components to be on the same
230                  * "side"; that much is enforced by the ISA and should have
231                  * been lowered. TODO: 8-bit packing. TODO: vec8 */
232 
233                 unsigned first = mask ? ffs(mask) - 1 : 0;
234                 bool upper = swizzle[first] > 3;
235 
236                 if (upper && mask)
237                         assert(sz <= 16);
238 
239                 bool dest_up = !op_channeled && (first >= 4);
240 
241                 for (unsigned c = (dest_up ? 4 : 0); c < (dest_up ? 8 : 4); ++c) {
242                         unsigned v = swizzle[c];
243 
244                         ASSERTED bool t_upper = v > 3;
245 
246                         /* Ensure we're doing something sane */
247 
248                         if (mask & (1 << c)) {
249                                 assert(t_upper == upper);
250                                 assert(v <= 7);
251                         }
252 
253                         /* Use the non upper part */
254                         v &= 0x3;
255 
256                         packed |= v << (2 * (c % 4));
257                 }
258 
259 
260                 /* Replicate for now.. should really pick a side for
261                  * dot products */
262 
263                 if (reg_mode == midgard_reg_mode_16 && sz == 16) {
264                         *rep_low = !upper;
265                         *rep_high = upper;
266                 } else if (reg_mode == midgard_reg_mode_16 && sz == 8) {
267                         *rep_low = upper;
268                         *rep_high = upper;
269                 } else if (reg_mode == midgard_reg_mode_32) {
270                         *rep_low = upper;
271                 } else {
272                         unreachable("Unhandled reg mode");
273                 }
274         }
275 
276         return packed;
277 }
278 
279 static void
mir_pack_vector_srcs(midgard_instruction * ins,midgard_vector_alu * alu)280 mir_pack_vector_srcs(midgard_instruction *ins, midgard_vector_alu *alu)
281 {
282         bool channeled = GET_CHANNEL_COUNT(alu_opcode_props[ins->op].props);
283 
284         unsigned base_size = max_bitsize_for_alu(ins);
285 
286         for (unsigned i = 0; i < 2; ++i) {
287                 if (ins->has_inline_constant && (i == 1))
288                         continue;
289 
290                 if (ins->src[i] == ~0)
291                         continue;
292 
293                 bool rep_lo = false, rep_hi = false;
294                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[i]);
295                 bool half = (sz == (base_size >> 1));
296 
297                 assert((sz == base_size) || half);
298 
299                 unsigned swizzle = mir_pack_swizzle(ins->mask, ins->swizzle[i],
300                                 ins->src_types[i], reg_mode_for_bitsize(base_size),
301                                 channeled, &rep_lo, &rep_hi);
302 
303                 midgard_vector_alu_src pack = {
304                         .mod = mir_pack_mod(ins, i, false),
305                         .rep_low = rep_lo,
306                         .rep_high = rep_hi,
307                         .half = half,
308                         .swizzle = swizzle
309                 };
310 
311                 unsigned p = vector_alu_srco_unsigned(pack);
312 
313                 if (i == 0)
314                         alu->src1 = p;
315                 else
316                         alu->src2 = p;
317         }
318 }
319 
320 static void
mir_pack_swizzle_ldst(midgard_instruction * ins)321 mir_pack_swizzle_ldst(midgard_instruction *ins)
322 {
323         /* TODO: non-32-bit, non-vec4 */
324         for (unsigned c = 0; c < 4; ++c) {
325                 unsigned v = ins->swizzle[0][c];
326 
327                 /* Check vec4 */
328                 assert(v <= 3);
329 
330                 ins->load_store.swizzle |= v << (2 * c);
331         }
332 
333         /* TODO: arg_1/2 */
334 }
335 
336 static void
mir_pack_swizzle_tex(midgard_instruction * ins)337 mir_pack_swizzle_tex(midgard_instruction *ins)
338 {
339         for (unsigned i = 0; i < 2; ++i) {
340                 unsigned packed = 0;
341 
342                 for (unsigned c = 0; c < 4; ++c) {
343                         unsigned v = ins->swizzle[i][c];
344 
345                         /* Check vec4 */
346                         assert(v <= 3);
347 
348                         packed |= v << (2 * c);
349                 }
350 
351                 if (i == 0)
352                         ins->texture.swizzle = packed;
353                 else
354                         ins->texture.in_reg_swizzle = packed;
355         }
356 
357         /* TODO: bias component */
358 }
359 
360 /* Up to 3 { ALU, LDST } bundles can execute in parallel with a texture op.
361  * Given a texture op, lookahead to see how many such bundles we can flag for
362  * OoO execution */
363 
364 static bool
mir_can_run_ooo(midgard_block * block,midgard_bundle * bundle,unsigned dependency)365 mir_can_run_ooo(midgard_block *block, midgard_bundle *bundle,
366                 unsigned dependency)
367 {
368         /* Don't read out of bounds */
369         if (bundle >= (midgard_bundle *) ((char *) block->bundles.data + block->bundles.size))
370                 return false;
371 
372         /* Texture ops can't execute with other texture ops */
373         if (!IS_ALU(bundle->tag) && bundle->tag != TAG_LOAD_STORE_4)
374                 return false;
375 
376         /* Ensure there is no read-after-write dependency */
377 
378         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
379                 midgard_instruction *ins = bundle->instructions[i];
380 
381                 mir_foreach_src(ins, s) {
382                         if (ins->src[s] == dependency)
383                                 return false;
384                 }
385         }
386 
387         /* Otherwise, we're okay */
388         return true;
389 }
390 
391 static void
mir_pack_tex_ooo(midgard_block * block,midgard_bundle * bundle,midgard_instruction * ins)392 mir_pack_tex_ooo(midgard_block *block, midgard_bundle *bundle, midgard_instruction *ins)
393 {
394         unsigned count = 0;
395 
396         for (count = 0; count < 3; ++count) {
397                 if (!mir_can_run_ooo(block, bundle + count + 1, ins->dest))
398                         break;
399         }
400 
401         ins->texture.out_of_order = count;
402 }
403 
404 /* Load store masks are 4-bits. Load/store ops pack for that. vec4 is the
405  * natural mask width; vec8 is constrained to be in pairs, vec2 is duplicated. TODO: 8-bit?
406  */
407 
408 static void
mir_pack_ldst_mask(midgard_instruction * ins)409 mir_pack_ldst_mask(midgard_instruction *ins)
410 {
411         unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
412         unsigned packed = ins->mask;
413 
414         if (sz == 64) {
415                 packed = ((ins->mask & 0x2) ? (0x8 | 0x4) : 0) |
416                          ((ins->mask & 0x1) ? (0x2 | 0x1) : 0);
417         } else if (sz == 16) {
418                 packed = 0;
419 
420                 for (unsigned i = 0; i < 4; ++i) {
421                         /* Make sure we're duplicated */
422                         bool u = (ins->mask & (1 << (2*i + 0))) != 0;
423                         ASSERTED bool v = (ins->mask & (1 << (2*i + 1))) != 0;
424                         assert(u == v);
425 
426                         packed |= (u << i);
427                 }
428         } else {
429                 assert(sz == 32);
430         }
431 
432         ins->load_store.mask = packed;
433 }
434 
435 static void
mir_lower_inverts(midgard_instruction * ins)436 mir_lower_inverts(midgard_instruction *ins)
437 {
438         bool inv[3] = {
439                 ins->src_invert[0],
440                 ins->src_invert[1],
441                 ins->src_invert[2]
442         };
443 
444         switch (ins->op) {
445         case midgard_alu_op_iand:
446                 /* a & ~b = iandnot(a, b) */
447                 /* ~a & ~b = ~(a | b) = inor(a, b) */
448 
449                 if (inv[0] && inv[1])
450                         ins->op = midgard_alu_op_inor;
451                 else if (inv[1])
452                         ins->op = midgard_alu_op_iandnot;
453 
454                 break;
455         case midgard_alu_op_ior:
456                 /*  a | ~b = iornot(a, b) */
457                 /* ~a | ~b = ~(a & b) = inand(a, b) */
458 
459                 if (inv[0] && inv[1])
460                         ins->op = midgard_alu_op_inand;
461                 else if (inv[1])
462                         ins->op = midgard_alu_op_iornot;
463 
464                 break;
465 
466         case midgard_alu_op_ixor:
467                 /* ~a ^ b = a ^ ~b = ~(a ^ b) = inxor(a, b) */
468                 /* ~a ^ ~b = a ^ b */
469 
470                 if (inv[0] ^ inv[1])
471                         ins->op = midgard_alu_op_inxor;
472 
473                 break;
474 
475         default:
476                 break;
477         }
478 }
479 
480 /* Opcodes with ROUNDS are the base (rte/0) type so we can just add */
481 
482 static void
mir_lower_roundmode(midgard_instruction * ins)483 mir_lower_roundmode(midgard_instruction *ins)
484 {
485         if (alu_opcode_props[ins->op].props & MIDGARD_ROUNDS) {
486                 assert(ins->roundmode <= 0x3);
487                 ins->op += ins->roundmode;
488         }
489 }
490 
491 static midgard_load_store_word
load_store_from_instr(midgard_instruction * ins)492 load_store_from_instr(midgard_instruction *ins)
493 {
494         midgard_load_store_word ldst = ins->load_store;
495         ldst.op = ins->op;
496 
497         if (OP_IS_STORE(ldst.op)) {
498                 ldst.reg = SSA_REG_FROM_FIXED(ins->src[0]) & 1;
499         } else {
500                 ldst.reg = SSA_REG_FROM_FIXED(ins->dest);
501         }
502 
503         /* Atomic opcode swizzles have a special meaning:
504          *   - The first two bits say which component of the implicit register should be used
505          *   - The next two bits say if the implicit register is r26 or r27 */
506         if (OP_IS_ATOMIC(ins->op)) {
507                 ldst.swizzle = 0;
508                 ldst.swizzle |= ins->swizzle[3][0] & 3;
509                 ldst.swizzle |= (SSA_REG_FROM_FIXED(ins->src[3]) & 1 ? 1 : 0) << 2;
510         }
511 
512         if (ins->src[1] != ~0) {
513                 unsigned src = SSA_REG_FROM_FIXED(ins->src[1]);
514                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[1]);
515                 ldst.arg_1 |= midgard_ldst_reg(src, ins->swizzle[1][0], sz);
516         }
517 
518         if (ins->src[2] != ~0) {
519                 unsigned src = SSA_REG_FROM_FIXED(ins->src[2]);
520                 unsigned sz = nir_alu_type_get_type_size(ins->src_types[2]);
521                 ldst.arg_2 |= midgard_ldst_reg(src, ins->swizzle[2][0], sz);
522         }
523 
524         return ldst;
525 }
526 
527 static midgard_texture_word
texture_word_from_instr(midgard_instruction * ins)528 texture_word_from_instr(midgard_instruction *ins)
529 {
530         midgard_texture_word tex = ins->texture;
531         tex.op = ins->op;
532 
533         unsigned src1 = ins->src[1] == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->src[1]);
534         tex.in_reg_select = src1 & 1;
535 
536         unsigned dest = ins->dest == ~0 ? REGISTER_UNUSED : SSA_REG_FROM_FIXED(ins->dest);
537         tex.out_reg_select = dest & 1;
538 
539         if (ins->src[2] != ~0) {
540                 midgard_tex_register_select sel = {
541                         .select = SSA_REG_FROM_FIXED(ins->src[2]) & 1,
542                         .full = 1,
543                         .component = ins->swizzle[2][0]
544                 };
545                 uint8_t packed;
546                 memcpy(&packed, &sel, sizeof(packed));
547                 tex.bias = packed;
548         }
549 
550         if (ins->src[3] != ~0) {
551                 unsigned x = ins->swizzle[3][0];
552                 unsigned y = x + 1;
553                 unsigned z = x + 2;
554 
555                 /* Check range, TODO: half-registers */
556                 assert(z < 4);
557 
558                 unsigned offset_reg = SSA_REG_FROM_FIXED(ins->src[3]);
559                 tex.offset =
560                         (1)                   | /* full */
561                         (offset_reg & 1) << 1 | /* select */
562                         (0 << 2)              | /* upper */
563                         (x << 3)              | /* swizzle */
564                         (y << 5)              | /* swizzle */
565                         (z << 7);               /* swizzle */
566         }
567 
568         return tex;
569 }
570 
571 static midgard_vector_alu
vector_alu_from_instr(midgard_instruction * ins)572 vector_alu_from_instr(midgard_instruction *ins)
573 {
574         midgard_vector_alu alu = {
575                 .op = ins->op,
576                 .outmod = ins->outmod,
577                 .reg_mode = reg_mode_for_bitsize(max_bitsize_for_alu(ins))
578         };
579 
580         if (ins->has_inline_constant) {
581                 /* Encode inline 16-bit constant. See disassembler for
582                  * where the algorithm is from */
583 
584                 int lower_11 = ins->inline_constant & ((1 << 12) - 1);
585                 uint16_t imm = ((lower_11 >> 8) & 0x7) |
586                                ((lower_11 & 0xFF) << 3);
587 
588                 alu.src2 = imm << 2;
589         }
590 
591         return alu;
592 }
593 
594 static midgard_branch_extended
midgard_create_branch_extended(midgard_condition cond,midgard_jmp_writeout_op op,unsigned dest_tag,signed quadword_offset)595 midgard_create_branch_extended( midgard_condition cond,
596                                 midgard_jmp_writeout_op op,
597                                 unsigned dest_tag,
598                                 signed quadword_offset)
599 {
600         /* The condition code is actually a LUT describing a function to
601          * combine multiple condition codes. However, we only support a single
602          * condition code at the moment, so we just duplicate over a bunch of
603          * times. */
604 
605         uint16_t duplicated_cond =
606                 (cond << 14) |
607                 (cond << 12) |
608                 (cond << 10) |
609                 (cond << 8) |
610                 (cond << 6) |
611                 (cond << 4) |
612                 (cond << 2) |
613                 (cond << 0);
614 
615         midgard_branch_extended branch = {
616                 .op = op,
617                 .dest_tag = dest_tag,
618                 .offset = quadword_offset,
619                 .cond = duplicated_cond
620         };
621 
622         return branch;
623 }
624 
625 static void
emit_branch(midgard_instruction * ins,compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission)626 emit_branch(midgard_instruction *ins,
627             compiler_context *ctx,
628             midgard_block *block,
629             midgard_bundle *bundle,
630             struct util_dynarray *emission)
631 {
632         /* Parse some basic branch info */
633         bool is_compact = ins->unit == ALU_ENAB_BR_COMPACT;
634         bool is_conditional = ins->branch.conditional;
635         bool is_inverted = ins->branch.invert_conditional;
636         bool is_discard = ins->branch.target_type == TARGET_DISCARD;
637         bool is_tilebuf_wait = ins->branch.target_type == TARGET_TILEBUF_WAIT;
638         bool is_special = is_discard || is_tilebuf_wait;
639         bool is_writeout = ins->writeout;
640 
641         /* Determine the block we're jumping to */
642         int target_number = ins->branch.target_block;
643 
644         /* Report the destination tag */
645         int dest_tag = is_discard ? 0 :
646                 is_tilebuf_wait ? bundle->tag :
647                 midgard_get_first_tag_from_block(ctx, target_number);
648 
649         /* Count up the number of quadwords we're
650          * jumping over = number of quadwords until
651          * (br_block_idx, target_number) */
652 
653         int quadword_offset = 0;
654 
655         if (is_discard) {
656                 /* Fixed encoding, not actually an offset */
657                 quadword_offset = 0x2;
658         } else if (is_tilebuf_wait) {
659                 quadword_offset = -1;
660         } else if (target_number > block->base.name) {
661                 /* Jump forward */
662 
663                 for (int idx = block->base.name+1; idx < target_number; ++idx) {
664                         midgard_block *blk = mir_get_block(ctx, idx);
665                         assert(blk);
666 
667                         quadword_offset += blk->quadword_count;
668                 }
669         } else {
670                 /* Jump backwards */
671 
672                 for (int idx = block->base.name; idx >= target_number; --idx) {
673                         midgard_block *blk = mir_get_block(ctx, idx);
674                         assert(blk);
675 
676                         quadword_offset -= blk->quadword_count;
677                 }
678         }
679 
680         /* Unconditional extended branches (far jumps)
681          * have issues, so we always use a conditional
682          * branch, setting the condition to always for
683          * unconditional. For compact unconditional
684          * branches, cond isn't used so it doesn't
685          * matter what we pick. */
686 
687         midgard_condition cond =
688                 !is_conditional ? midgard_condition_always :
689                 is_inverted ? midgard_condition_false :
690                 midgard_condition_true;
691 
692         midgard_jmp_writeout_op op =
693                 is_discard ? midgard_jmp_writeout_op_discard :
694                 is_tilebuf_wait ? midgard_jmp_writeout_op_tilebuffer_pending :
695                 is_writeout ? midgard_jmp_writeout_op_writeout :
696                 (is_compact && !is_conditional) ?
697                 midgard_jmp_writeout_op_branch_uncond :
698                 midgard_jmp_writeout_op_branch_cond;
699 
700         if (is_compact) {
701                 unsigned size = sizeof(midgard_branch_cond);
702 
703                 if (is_conditional || is_special) {
704                         midgard_branch_cond branch = {
705                                 .op = op,
706                                 .dest_tag = dest_tag,
707                                 .offset = quadword_offset,
708                                 .cond = cond
709                         };
710                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
711                 } else {
712                         assert(op == midgard_jmp_writeout_op_branch_uncond);
713                         midgard_branch_uncond branch = {
714                                 .op = op,
715                                 .dest_tag = dest_tag,
716                                 .offset = quadword_offset,
717                                 .unknown = 1
718                         };
719                         assert(branch.offset == quadword_offset);
720                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
721                 }
722         } else { /* `ins->compact_branch`,  misnomer */
723                 unsigned size = sizeof(midgard_branch_extended);
724 
725                 midgard_branch_extended branch =
726                         midgard_create_branch_extended(
727                                         cond, op,
728                                         dest_tag,
729                                         quadword_offset);
730 
731                 memcpy(util_dynarray_grow_bytes(emission, size, 1), &branch, size);
732         }
733 }
734 
735 static void
emit_alu_bundle(compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission,unsigned lookahead)736 emit_alu_bundle(compiler_context *ctx,
737                 midgard_block *block,
738                 midgard_bundle *bundle,
739                 struct util_dynarray *emission,
740                 unsigned lookahead)
741 {
742         /* Emit the control word */
743         util_dynarray_append(emission, uint32_t, bundle->control | lookahead);
744 
745         /* Next up, emit register words */
746         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
747                 midgard_instruction *ins = bundle->instructions[i];
748 
749                 /* Check if this instruction has registers */
750                 if (ins->compact_branch) continue;
751 
752                 unsigned src2_reg = REGISTER_UNUSED;
753                 if (ins->has_inline_constant)
754                         src2_reg = ins->inline_constant >> 11;
755                 else if (ins->src[1] != ~0)
756                         src2_reg = SSA_REG_FROM_FIXED(ins->src[1]);
757 
758                 /* Otherwise, just emit the registers */
759                 uint16_t reg_word = 0;
760                 midgard_reg_info registers = {
761                         .src1_reg = (ins->src[0] == ~0 ?
762                                         REGISTER_UNUSED :
763                                         SSA_REG_FROM_FIXED(ins->src[0])),
764                         .src2_reg = src2_reg,
765                         .src2_imm = ins->has_inline_constant,
766                         .out_reg = (ins->dest == ~0 ?
767                                         REGISTER_UNUSED :
768                                         SSA_REG_FROM_FIXED(ins->dest)),
769                 };
770                 memcpy(&reg_word, &registers, sizeof(uint16_t));
771                 util_dynarray_append(emission, uint16_t, reg_word);
772         }
773 
774         /* Now, we emit the body itself */
775         for (unsigned i = 0; i < bundle->instruction_count; ++i) {
776                 midgard_instruction *ins = bundle->instructions[i];
777 
778                 if (!ins->compact_branch) {
779                         mir_lower_inverts(ins);
780                         mir_lower_roundmode(ins);
781                 }
782 
783                 if (midgard_is_branch_unit(ins->unit)) {
784                         emit_branch(ins, ctx, block, bundle, emission);
785                 } else if (ins->unit & UNITS_ANY_VECTOR) {
786                         midgard_vector_alu source = vector_alu_from_instr(ins);
787                         mir_pack_mask_alu(ins, &source);
788                         mir_pack_vector_srcs(ins, &source);
789                         unsigned size = sizeof(source);
790                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
791                 } else {
792                         midgard_scalar_alu source = vector_to_scalar_alu(vector_alu_from_instr(ins), ins);
793                         unsigned size = sizeof(source);
794                         memcpy(util_dynarray_grow_bytes(emission, size, 1), &source, size);
795                 }
796         }
797 
798         /* Emit padding (all zero) */
799         memset(util_dynarray_grow_bytes(emission, bundle->padding, 1), 0, bundle->padding);
800 
801         /* Tack on constants */
802 
803         if (bundle->has_embedded_constants)
804                 util_dynarray_append(emission, midgard_constants, bundle->constants);
805 }
806 
807 /* Shift applied to the immediate used as an offset. Probably this is papering
808  * over some other semantic distinction else well, but it unifies things in the
809  * compiler so I don't mind. */
810 
811 static unsigned
mir_ldst_imm_shift(midgard_load_store_op op)812 mir_ldst_imm_shift(midgard_load_store_op op)
813 {
814         if (OP_IS_UBO_READ(op))
815                 return 3;
816         else
817                 return 1;
818 }
819 
820 static enum mali_sampler_type
midgard_sampler_type(nir_alu_type t)821 midgard_sampler_type(nir_alu_type t) {
822         switch (nir_alu_type_get_base_type(t))
823         {
824         case nir_type_float:
825                 return MALI_SAMPLER_FLOAT;
826         case nir_type_int:
827                 return MALI_SAMPLER_SIGNED;
828         case nir_type_uint:
829                 return MALI_SAMPLER_UNSIGNED;
830         default:
831                 unreachable("Unknown sampler type");
832         }
833 }
834 
835 /* After everything is scheduled, emit whole bundles at a time */
836 
837 void
emit_binary_bundle(compiler_context * ctx,midgard_block * block,midgard_bundle * bundle,struct util_dynarray * emission,int next_tag)838 emit_binary_bundle(compiler_context *ctx,
839                    midgard_block *block,
840                    midgard_bundle *bundle,
841                    struct util_dynarray *emission,
842                    int next_tag)
843 {
844         int lookahead = next_tag << 4;
845 
846         switch (bundle->tag) {
847         case TAG_ALU_4:
848         case TAG_ALU_8:
849         case TAG_ALU_12:
850         case TAG_ALU_16:
851         case TAG_ALU_4 + 4:
852         case TAG_ALU_8 + 4:
853         case TAG_ALU_12 + 4:
854         case TAG_ALU_16 + 4:
855                 emit_alu_bundle(ctx, block, bundle, emission, lookahead);
856                 break;
857 
858         case TAG_LOAD_STORE_4: {
859                 /* One or two composing instructions */
860 
861                 uint64_t current64, next64 = LDST_NOP;
862 
863                 /* Copy masks */
864 
865                 for (unsigned i = 0; i < bundle->instruction_count; ++i) {
866                         mir_pack_ldst_mask(bundle->instructions[i]);
867 
868                         /* Atomic ops don't use this swizzle the same way as other ops */
869                         if (!OP_IS_ATOMIC(bundle->instructions[i]->op))
870                                 mir_pack_swizzle_ldst(bundle->instructions[i]);
871 
872                         /* Apply a constant offset */
873                         unsigned offset = bundle->instructions[i]->constants.u32[0];
874 
875                         if (offset) {
876                                 unsigned shift = mir_ldst_imm_shift(bundle->instructions[i]->op);
877                                 unsigned upper_shift = 10 - shift;
878 
879                                 bundle->instructions[i]->load_store.varying_parameters |= (offset & ((1 << upper_shift) - 1)) << shift;
880                                 bundle->instructions[i]->load_store.address |= (offset >> upper_shift);
881                         }
882                 }
883 
884                 midgard_load_store_word ldst0 =
885                         load_store_from_instr(bundle->instructions[0]);
886                 memcpy(&current64, &ldst0, sizeof(current64));
887 
888                 if (bundle->instruction_count == 2) {
889                         midgard_load_store_word ldst1 =
890                                 load_store_from_instr(bundle->instructions[1]);
891                         memcpy(&next64, &ldst1, sizeof(next64));
892                 }
893 
894                 midgard_load_store instruction = {
895                         .type = bundle->tag,
896                         .next_type = next_tag,
897                         .word1 = current64,
898                         .word2 = next64
899                 };
900 
901                 util_dynarray_append(emission, midgard_load_store, instruction);
902 
903                 break;
904         }
905 
906         case TAG_TEXTURE_4:
907         case TAG_TEXTURE_4_VTX:
908         case TAG_TEXTURE_4_BARRIER: {
909                 /* Texture instructions are easy, since there is no pipelining
910                  * nor VLIW to worry about. We may need to set .cont/.last
911                  * flags. */
912 
913                 midgard_instruction *ins = bundle->instructions[0];
914 
915                 ins->texture.type = bundle->tag;
916                 ins->texture.next_type = next_tag;
917 
918                 /* Nothing else to pack for barriers */
919                 if (ins->op == TEXTURE_OP_BARRIER) {
920                         ins->texture.cont = ins->texture.last = 1;
921                         ins->texture.op = ins->op;
922                         util_dynarray_append(emission, midgard_texture_word, ins->texture);
923                         return;
924                 }
925 
926                 signed override = mir_upper_override(ins, 32);
927 
928                 ins->texture.mask = override > 0 ?
929                         ins->mask >> override :
930                         ins->mask;
931 
932                 mir_pack_swizzle_tex(ins);
933 
934                 if (!(ctx->quirks & MIDGARD_NO_OOO))
935                         mir_pack_tex_ooo(block, bundle, ins);
936 
937                 unsigned osz = nir_alu_type_get_type_size(ins->dest_type);
938                 unsigned isz = nir_alu_type_get_type_size(ins->src_types[1]);
939 
940                 assert(osz == 32 || osz == 16);
941                 assert(isz == 32 || isz == 16);
942 
943                 ins->texture.out_full = (osz == 32);
944                 ins->texture.out_upper = override > 0;
945                 ins->texture.in_reg_full = (isz == 32);
946                 ins->texture.sampler_type = midgard_sampler_type(ins->dest_type);
947                 ins->texture.outmod = ins->outmod;
948 
949                 if (mir_op_computes_derivatives(ctx->stage, ins->op)) {
950                         ins->texture.cont = !ins->helper_terminate;
951                         ins->texture.last = ins->helper_terminate || ins->helper_execute;
952                 } else {
953                         ins->texture.cont = ins->texture.last = 1;
954                 }
955 
956                 midgard_texture_word texture = texture_word_from_instr(ins);
957                 util_dynarray_append(emission, midgard_texture_word, texture);
958                 break;
959         }
960 
961         default:
962                 unreachable("Unknown midgard instruction type\n");
963         }
964 }
965