1 /*
2  * Copyright (C) 2020 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 /* Autogenerated file, do not edit */
25 
26 #ifndef _BI_GENERATED_PACK_H
27 #define _BI_GENERATED_PACK_H
28 
29 #include "compiler.h"
30 #include "bi_pack_helpers.h"
31 
32 static inline unsigned
pan_pack_fma_rshift_and_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)33 pan_pack_fma_rshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
34 {
35     unsigned src0 = bi_get_src(ins, regs, 0);
36     assert((1 << src0) & 0xfb);
37     unsigned src1 = bi_get_src(ins, regs, 1);
38     assert((1 << src1) & 0xfb);
39     unsigned src2 = bi_get_src(ins, regs, 2);
40 
41     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
42     unsigned lane2_temp = 0;
43     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
44     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
45     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
46     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
47     else unreachable("Could not pattern match widen");
48     unsigned lane2 = lane2_temp;
49     assert(lane2 < 4);
50 
51     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
52     assert(not1 < 2);
53 
54     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
55     assert(not_result < 2);
56 
57     return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
58 }
59 
60 static inline unsigned
pan_pack_add_iadd_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)61 pan_pack_add_iadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
62 {
63     unsigned src0 = bi_get_src(ins, regs, 0);
64     unsigned src1 = bi_get_src(ins, regs, 1);
65 
66     unsigned saturate = 0;
67 
68     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
69     unsigned lanes1_temp = 0;
70     if (lanes1_sz == 32) lanes1_temp = 0;
71     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
72     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
73     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
74     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
75     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
76     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
77     else unreachable("Could not pattern match widen");
78     unsigned lanes1 = lanes1_temp;
79     assert(lanes1 < 8);
80 
81     if (lanes1 == 0) {
82         unsigned derived_7 = 0;
83         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
84         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
85         else unreachable("No pattern match at pos 7");
86 
87         return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
88     } else if ((lanes1 == 1) || (lanes1 == 2)) {
89         unsigned derived_7 = 0;
90         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
91         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
92         else unreachable("No pattern match at pos 7");
93 
94         unsigned derived_9 = 0;
95         if (lanes1 == 1) derived_9 = 0;
96         else if (lanes1 == 2) derived_9 = 1;
97         else unreachable("No pattern match at pos 9");
98 
99         return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
100     } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
101         unsigned derived_7 = 0;
102         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
103         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
104         else unreachable("No pattern match at pos 7");
105 
106         unsigned derived_9 = 0;
107         if (lanes1 == 3) derived_9 = 0;
108         else if (lanes1 == 4) derived_9 = 1;
109         else if (lanes1 == 5) derived_9 = 2;
110         else if (lanes1 == 6) derived_9 = 3;
111         else unreachable("No pattern match at pos 9");
112 
113         return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
114     } else {
115         unreachable("No matching state found in add_iadd_u32");
116     }
117 }
118 
119 static inline unsigned
pan_pack_add_ld_var_flat(bi_clause * clause,bi_instruction * ins,bi_registers * regs)120 pan_pack_add_ld_var_flat(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
121 {
122     unsigned src0 = bi_get_src(ins, regs, 0);
123 
124     unsigned vecsize = ins->vector_channels - 1;
125     assert(vecsize < 4);
126 
127     unsigned register_format_temp = 0;
128     if (ins->format == nir_type_float32) register_format_temp = 0;
129     else if (ins->format == nir_type_float16) register_format_temp = 1;
130     else if (ins->format == nir_type_uint32) register_format_temp = 2;
131     else if (ins->format == nir_type_int32) register_format_temp = 3;
132     else unreachable("Could not pattern match register format");
133     unsigned register_format = register_format_temp;
134     assert(register_format < 8);
135 
136     unsigned function = 3;
137 
138     bi_write_staging_register(clause, ins);
139     if (register_format != 4) {
140         unsigned derived_10 = 0;
141         if ((register_format == 0) || (register_format == 1)) derived_10 = 0;
142         else if ((register_format == 2) || (register_format == 3)) derived_10 = 1;
143         else unreachable("No pattern match at pos 10");
144 
145         unsigned derived_19 = 0;
146         if ((register_format == 0) || (register_format == 2)) derived_19 = 0;
147         else if ((register_format == 1) || (register_format == 3)) derived_19 = 1;
148         else unreachable("No pattern match at pos 19");
149 
150         return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19);
151     } else if (register_format == 4) {
152         return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0);
153     } else {
154         unreachable("No matching state found in add_ld_var_flat");
155     }
156 }
157 
158 static inline unsigned
pan_pack_add_store_i24(bi_clause * clause,bi_instruction * ins,bi_registers * regs)159 pan_pack_add_store_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
160 {
161     unsigned src0 = bi_get_src(ins, regs, 1);
162     unsigned src1 = bi_get_src(ins, regs, 2);
163 
164     assert(ins->segment);
165     unsigned seg = ins->segment;
166     assert(seg < 8);
167 
168     bi_read_staging_register(clause, ins);
169     return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6);
170 }
171 
172 static inline unsigned
pan_pack_fma_clz_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)173 pan_pack_fma_clz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
174 {
175     unsigned src0 = bi_get_src(ins, regs, 0);
176     assert((1 << src0) & 0xfb);
177 
178     unsigned mask = 0;
179 
180     return 0x701fd0 | (src0 << 0) | (mask << 3);
181 }
182 
183 static inline unsigned
pan_pack_fma_clz_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)184 pan_pack_fma_clz_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
185 {
186     unsigned src0 = bi_get_src(ins, regs, 0);
187     assert((1 << src0) & 0xfb);
188 
189     unsigned mask = 0;
190 
191     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
192     unsigned swz0_temp = 0;
193     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
194     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
195     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
196     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
197     else unreachable("Could not pattern match widen");
198     unsigned swz0 = swz0_temp;
199     assert(swz0 < 4);
200 
201     return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4);
202 }
203 
204 static inline unsigned
pan_pack_fma_popcount_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)205 pan_pack_fma_popcount_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
206 {
207     unsigned src0 = bi_get_src(ins, regs, 0);
208     assert((1 << src0) & 0xfb);
209 
210     return 0x73c6d8 | (src0 << 0);
211 }
212 
213 static inline unsigned
pan_pack_add_fatan_table_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)214 pan_pack_add_fatan_table_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
215 {
216     unsigned src0 = bi_get_src(ins, regs, 0);
217     assert((1 << src0) & 0xf7);
218     unsigned src1 = bi_get_src(ins, regs, 1);
219     assert((1 << src1) & 0xf7);
220 
221     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
222     unsigned lane1_temp = 0;
223     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
224     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
225     else unreachable("Could not pattern match widen");
226     unsigned lane1 = lane1_temp;
227     assert(lane1 < 2);
228 
229     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
230     unsigned lane0_temp = 0;
231     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
232     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
233     else unreachable("Could not pattern match widen");
234     unsigned lane0 = lane0_temp;
235     assert(lane0 < 2);
236 
237     return 0x67900 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7);
238 }
239 
240 static inline unsigned
pan_pack_fma_rrot_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)241 pan_pack_fma_rrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
242 {
243     unsigned src0 = bi_get_src(ins, regs, 0);
244     assert((1 << src0) & 0xfb);
245     unsigned src1 = bi_get_src(ins, regs, 1);
246     assert((1 << src1) & 0xfb);
247     unsigned src2 = bi_get_src(ins, regs, 2);
248 
249     unsigned bytes2 = 0;
250 
251     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
252     unsigned lane2_temp = 0;
253     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
254     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
255     else unreachable("Could not pattern match widen");
256     unsigned lane2 = lane2_temp;
257     assert(lane2 < 2);
258 
259     unsigned result_word = 0;
260 
261     return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
262 }
263 
264 static inline unsigned
pan_pack_fma_isubb_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)265 pan_pack_fma_isubb_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
266 {
267     unsigned src0 = bi_get_src(ins, regs, 0);
268     assert((1 << src0) & 0xfb);
269     unsigned src1 = bi_get_src(ins, regs, 1);
270     assert((1 << src1) & 0xfb);
271     unsigned src2 = bi_get_src(ins, regs, 2);
272 
273     return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
274 }
275 
276 static inline unsigned
pan_pack_add_frcbrt_approx_b_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)277 pan_pack_add_frcbrt_approx_b_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
278 {
279     unsigned src0 = bi_get_src(ins, regs, 0);
280     assert((1 << src0) & 0xf7);
281 
282     return 0x67ab0 | (src0 << 0);
283 }
284 
285 static inline unsigned
pan_pack_fma_lshift_xor_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)286 pan_pack_fma_lshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
287 {
288     unsigned src0 = bi_get_src(ins, regs, 0);
289     assert((1 << src0) & 0xfb);
290     unsigned src1 = bi_get_src(ins, regs, 1);
291     assert((1 << src1) & 0xfb);
292     unsigned src2 = bi_get_src(ins, regs, 2);
293 
294     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
295     unsigned lanes2_temp = 0;
296     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
297     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
298     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
299     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
300     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
301     else unreachable("Could not pattern match widen");
302     unsigned lanes2 = lanes2_temp;
303     assert(lanes2 < 8);
304 
305     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
306     assert(not_result < 2);
307 
308     if (lanes2 != 0) {
309         unsigned derived_9 = 0;
310         if (lanes2 == 1) derived_9 = 0;
311         else if (lanes2 == 2) derived_9 = 1;
312         else if (lanes2 == 3) derived_9 = 2;
313         else if (lanes2 == 4) derived_9 = 3;
314         else unreachable("No pattern match at pos 9");
315 
316         return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
317     } else if (lanes2 == 0) {
318         return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13);
319     } else {
320         unreachable("No matching state found in fma_lshift_xor_v4i8");
321     }
322 }
323 
324 static inline unsigned
pan_pack_add_texs_cube_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)325 pan_pack_add_texs_cube_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
326 {
327     unsigned src0 = bi_get_src(ins, regs, 0);
328     unsigned src1 = bi_get_src(ins, regs, 1);
329     unsigned src2 = bi_get_src(ins, regs, 2);
330 
331     unsigned skip = ins->skip;
332     assert(skip < 2);
333 
334     unsigned sampler_index = ins->texture.sampler_index;
335     unsigned texture_index = ins->texture.texture_index;
336     bi_write_staging_register(clause, ins);
337     return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12);
338 }
339 
340 static inline unsigned
pan_pack_add_fround_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)341 pan_pack_add_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
342 {
343     unsigned src0 = bi_get_src(ins, regs, 0);
344 
345     unsigned abs0 = ins->src_abs[0];
346     assert(abs0 < 2);
347 
348     unsigned neg0 = ins->src_neg[0];
349     assert(neg0 < 2);
350 
351     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
352     unsigned widen0_temp = 0;
353     if (widen0_sz == 32) widen0_temp = 1;
354     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
355     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
356     else unreachable("Could not pattern match widen");
357     unsigned widen0 = widen0_temp;
358     assert(widen0 < 4);
359 
360     unsigned round = ins->roundmode;
361     assert(round < 4);
362 
363     return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9);
364 }
365 
366 static inline unsigned
pan_pack_add_fexp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)367 pan_pack_add_fexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
368 {
369     unsigned src0 = bi_get_src(ins, regs, 0);
370     assert((1 << src0) & 0xf7);
371     unsigned src1 = bi_get_src(ins, regs, 1);
372     assert((1 << src1) & 0xf7);
373 
374     return 0x66ac0 | (src0 << 0) | (src1 << 3);
375 }
376 
377 static inline unsigned
pan_pack_add_doorbell(bi_clause * clause,bi_instruction * ins,bi_registers * regs)378 pan_pack_add_doorbell(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
379 {
380     unsigned src0 = bi_get_src(ins, regs, 0);
381 
382     return 0xd7860 | (src0 << 0);
383 }
384 
385 static inline unsigned
pan_pack_add_logb_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)386 pan_pack_add_logb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
387 {
388     unsigned src0 = bi_get_src(ins, regs, 0);
389 
390     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
391     unsigned swz0_temp = 0;
392     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
393     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
394     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
395     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
396     else unreachable("Could not pattern match widen");
397     unsigned swz0 = swz0_temp;
398     assert(swz0 < 4);
399 
400     return 0x3d980 | (src0 << 0) | (swz0 << 3);
401 }
402 
403 static inline unsigned
pan_pack_add_store_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)404 pan_pack_add_store_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
405 {
406     unsigned src0 = bi_get_src(ins, regs, 1);
407     unsigned src1 = bi_get_src(ins, regs, 2);
408 
409     assert(ins->segment);
410     unsigned seg = ins->segment;
411     assert(seg < 8);
412 
413     bi_read_staging_register(clause, ins);
414     return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6);
415 }
416 
417 static inline unsigned
pan_pack_fma_arshift_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)418 pan_pack_fma_arshift_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
419 {
420     unsigned src0 = bi_get_src(ins, regs, 0);
421     assert((1 << src0) & 0xfb);
422     unsigned src1 = bi_get_src(ins, regs, 1);
423     assert((1 << src1) & 0x8);
424     unsigned src2 = bi_get_src(ins, regs, 2);
425 
426     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
427     unsigned lanes2_temp = 0;
428     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
429     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
430     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
431     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
432     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
433     else unreachable("Could not pattern match widen");
434     unsigned lanes2 = lanes2_temp;
435     assert(lanes2 < 8);
436 
437     if (lanes2 != 0) {
438         unsigned derived_9 = 0;
439         if (lanes2 == 1) derived_9 = 0;
440         else if (lanes2 == 2) derived_9 = 1;
441         else if (lanes2 == 3) derived_9 = 2;
442         else if (lanes2 == 4) derived_9 = 3;
443         else unreachable("No pattern match at pos 9");
444 
445         return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
446     } else if (lanes2 == 0) {
447         return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6);
448     } else {
449         unreachable("No matching state found in fma_arshift_v4i8");
450     }
451 }
452 
453 static inline unsigned
pan_pack_fma_vn_asst1_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)454 pan_pack_fma_vn_asst1_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
455 {
456     unsigned src0 = bi_get_src(ins, regs, 0);
457     assert((1 << src0) & 0xfb);
458     unsigned src1 = bi_get_src(ins, regs, 1);
459     assert((1 << src1) & 0xfb);
460     unsigned src2 = bi_get_src(ins, regs, 2);
461     unsigned src3 = bi_get_src(ins, regs, 3);
462 
463     unsigned neg2 = ins->src_neg[2];
464     assert(neg2 < 2);
465 
466     return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12);
467 }
468 
469 static inline unsigned
pan_pack_add_ldexp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)470 pan_pack_add_ldexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
471 {
472     unsigned src0 = bi_get_src(ins, regs, 0);
473     unsigned src1 = bi_get_src(ins, regs, 1);
474 
475     unsigned round = ins->roundmode;
476     assert(round < 8);
477 
478     return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6);
479 }
480 
481 static inline unsigned
pan_pack_add_isub_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)482 pan_pack_add_isub_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
483 {
484     unsigned src0 = bi_get_src(ins, regs, 0);
485     unsigned src1 = bi_get_src(ins, regs, 1);
486 
487     unsigned saturate = 0;
488 
489     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
490     unsigned lanes0_temp = 0;
491     if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
492     else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
493     else unreachable("Could not pattern match widen");
494     unsigned lanes0 = lanes0_temp;
495     assert(lanes0 < 2);
496 
497     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
498     unsigned lanes1_temp = 0;
499     if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
500     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
501     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
502     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
503     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
504     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
505     else unreachable("Could not pattern match widen");
506     unsigned lanes1 = lanes1_temp;
507     assert(lanes1 < 8);
508 
509     if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
510         unsigned derived_7 = 0;
511         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
512         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
513         else unreachable("No pattern match at pos 7");
514 
515         unsigned derived_9 = 0;
516         if (lanes1 == 0) derived_9 = 0;
517         else if (lanes1 == 1) derived_9 = 1;
518         else unreachable("No pattern match at pos 9");
519 
520         unsigned derived_10 = 0;
521         if (lanes0 == 0) derived_10 = 0;
522         else if (lanes0 == 1) derived_10 = 1;
523         else unreachable("No pattern match at pos 10");
524 
525         return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10);
526     } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
527         unsigned derived_7 = 0;
528         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
529         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
530         else unreachable("No pattern match at pos 7");
531 
532         unsigned derived_9 = 0;
533         if (lanes1 == 2) derived_9 = 0;
534         else if (lanes1 == 3) derived_9 = 1;
535         else unreachable("No pattern match at pos 9");
536 
537         return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
538     } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
539         unsigned derived_7 = 0;
540         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
541         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
542         else unreachable("No pattern match at pos 7");
543 
544         unsigned derived_9 = 0;
545         if (lanes1 == 4) derived_9 = 0;
546         else if (lanes1 == 5) derived_9 = 1;
547         else unreachable("No pattern match at pos 9");
548 
549         return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
550     } else {
551         unreachable("No matching state found in add_isub_v2u16");
552     }
553 }
554 
555 static inline unsigned
pan_pack_add_branchc_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)556 pan_pack_add_branchc_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
557 {
558     unsigned src0 = bi_get_src(ins, regs, 0);
559     unsigned src1 = bi_get_src(ins, regs, 1);
560     assert((1 << src1) & 0xf7);
561 
562     unsigned combine = 0;
563 
564     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
565     unsigned lane0_temp = 0;
566     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
567     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
568     else unreachable("Could not pattern match widen");
569     unsigned lane0 = lane0_temp;
570     assert(lane0 < 2);
571 
572     unsigned derived_9 = 0;
573     if (lane0 == 0) derived_9 = 0;
574     else if (lane0 == 1) derived_9 = 1;
575     else unreachable("No pattern match at pos 9");
576 
577     unsigned derived_3 = 0;
578     if (lane0 == 1) derived_3 = 0;
579     else if (lane0 == 0) derived_3 = 1;
580     else unreachable("No pattern match at pos 3");
581 
582     return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3);
583 }
584 
585 static inline unsigned
pan_pack_fma_fround_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)586 pan_pack_fma_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
587 {
588     unsigned src0 = bi_get_src(ins, regs, 0);
589     assert((1 << src0) & 0xfb);
590 
591     unsigned abs0 = ins->src_abs[0];
592     assert(abs0 < 2);
593 
594     unsigned neg0 = ins->src_neg[0];
595     assert(neg0 < 2);
596 
597     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
598     unsigned widen0_temp = 0;
599     if (widen0_sz == 32) widen0_temp = 1;
600     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
601     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
602     else unreachable("Could not pattern match widen");
603     unsigned widen0 = widen0_temp;
604     assert(widen0 < 4);
605 
606     unsigned round = ins->roundmode;
607     assert(round < 8);
608 
609     if (round != 4) {
610         unsigned derived_9 = 0;
611         if (round == 0) derived_9 = 0;
612         else if (round == 1) derived_9 = 1;
613         else if (round == 2) derived_9 = 2;
614         else if (round == 3) derived_9 = 3;
615         else unreachable("No pattern match at pos 9");
616 
617         return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9);
618     } else if (round == 4) {
619         return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3);
620     } else {
621         unreachable("No matching state found in fma_fround_f32");
622     }
623 }
624 
625 static inline unsigned
pan_pack_add_vn_asst2_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)626 pan_pack_add_vn_asst2_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
627 {
628     unsigned src0 = bi_get_src(ins, regs, 0);
629 
630     unsigned neg0 = ins->src_neg[0];
631     assert(neg0 < 2);
632 
633     return 0x3dfa0 | (src0 << 0) | (neg0 << 3);
634 }
635 
636 static inline unsigned
pan_pack_add_fround_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)637 pan_pack_add_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
638 {
639     unsigned src0 = bi_get_src(ins, regs, 0);
640 
641     unsigned abs0 = ins->src_abs[0];
642     assert(abs0 < 2);
643 
644     unsigned neg0 = ins->src_neg[0];
645     assert(neg0 < 2);
646 
647     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
648     unsigned swz0_temp = 0;
649     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
650     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
651     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
652     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
653     else unreachable("Could not pattern match widen");
654     unsigned swz0 = swz0_temp;
655     assert(swz0 < 4);
656 
657     unsigned round = ins->roundmode;
658     assert(round < 4);
659 
660     return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9);
661 }
662 
663 static inline unsigned
pan_pack_fma_atom_c_return_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)664 pan_pack_fma_atom_c_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
665 {
666     unsigned src0 = bi_get_src(ins, regs, 0);
667     assert((1 << src0) & 0xf3);
668     unsigned src1 = bi_get_src(ins, regs, 1);
669     assert((1 << src1) & 0xf3);
670     unsigned src2 = bi_get_src(ins, regs, 2);
671     assert((1 << src2) & 0xf7);
672 
673     unsigned atom_opc = 2;
674 
675     return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
676 }
677 
678 static inline unsigned
pan_pack_add_icmpi_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)679 pan_pack_add_icmpi_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
680 {
681     unsigned src0 = bi_get_src(ins, regs, 0);
682     unsigned src1 = bi_get_src(ins, regs, 1);
683 
684     unsigned result_type = 1;
685 
686     unsigned cmpf_table[] = {
687         ~0, ~0, ~0, 1, 0, ~0, ~0
688     };
689     unsigned cmpf = cmpf_table[ins->cond];
690     assert(cmpf < 2);
691 
692     return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
693 }
694 
695 static inline unsigned
pan_pack_fma_fma_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)696 pan_pack_fma_fma_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
697 {
698     unsigned src0 = bi_get_src(ins, regs, 0);
699     assert((1 << src0) & 0xfb);
700     unsigned src1 = bi_get_src(ins, regs, 1);
701     assert((1 << src1) & 0xfb);
702     unsigned src2 = bi_get_src(ins, regs, 2);
703 
704     unsigned neg0 = ins->src_neg[0];
705     assert(neg0 < 2);
706 
707     unsigned neg1 = ins->src_neg[1];
708     assert(neg1 < 2);
709 
710     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
711     unsigned swz0_temp = 0;
712     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
713     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
714     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
715     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
716     else unreachable("Could not pattern match widen");
717     unsigned swz0 = swz0_temp;
718     assert(swz0 < 4);
719 
720     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
721     unsigned swz1_temp = 0;
722     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
723     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
724     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
725     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
726     else unreachable("Could not pattern match widen");
727     unsigned swz1 = swz1_temp;
728     assert(swz1 < 4);
729 
730     unsigned round = ins->roundmode;
731     assert(round < 4);
732 
733     unsigned clamp = ins->outmod;
734     assert(clamp < 4);
735 
736     unsigned neg2 = ins->src_neg[2];
737     assert(neg2 < 2);
738 
739     unsigned swz2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
740     unsigned swz2_temp = 0;
741     if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) swz2_temp = 0;
742     else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swz2_temp = 1;
743     else if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swz2_temp = 2;
744     else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) swz2_temp = 3;
745     else unreachable("Could not pattern match widen");
746     unsigned swz2 = swz2_temp;
747     assert(swz2 < 4);
748 
749     unsigned derived_17 = 0;
750     if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0;
751     else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1;
752     else unreachable("No pattern match at pos 17");
753 
754     return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17);
755 }
756 
757 static inline unsigned
pan_pack_fma_arshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)758 pan_pack_fma_arshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
759 {
760     unsigned src0 = bi_get_src(ins, regs, 0);
761     assert((1 << src0) & 0xfb);
762     unsigned src1 = bi_get_src(ins, regs, 1);
763     assert((1 << src1) & 0xfb);
764     unsigned src2 = bi_get_src(ins, regs, 2);
765 
766     unsigned bytes2 = 0;
767 
768     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
769     unsigned lane2_temp = 0;
770     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
771     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
772     else unreachable("Could not pattern match widen");
773     unsigned lane2 = lane2_temp;
774     assert(lane2 < 2);
775 
776     unsigned result_word = 0;
777 
778     return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
779 }
780 
781 static inline unsigned
pan_pack_fma_fmul_slice_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)782 pan_pack_fma_fmul_slice_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
783 {
784     unsigned src0 = bi_get_src(ins, regs, 0);
785     assert((1 << src0) & 0xfb);
786     unsigned src1 = bi_get_src(ins, regs, 1);
787     assert((1 << src1) & 0xfb);
788 
789     return 0x70cb40 | (src0 << 0) | (src1 << 3);
790 }
791 
792 static inline unsigned
pan_pack_add_ld_var_flat_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)793 pan_pack_add_ld_var_flat_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
794 {
795 
796     unsigned vecsize = ins->vector_channels - 1;
797     assert(vecsize < 4);
798 
799     unsigned register_format_temp = 0;
800     if (ins->format == nir_type_float32) register_format_temp = 0;
801     else if (ins->format == nir_type_float16) register_format_temp = 1;
802     else if (ins->format == nir_type_uint32) register_format_temp = 2;
803     else if (ins->format == nir_type_int32) register_format_temp = 3;
804     else unreachable("Could not pattern match register format");
805     unsigned register_format = register_format_temp;
806     assert(register_format < 8);
807 
808     unsigned function = 3;
809 
810     unsigned index = bi_get_immediate(ins, 0);
811     bi_write_staging_register(clause, ins);
812     if (register_format != 4) {
813         unsigned derived_10 = 0;
814         if ((register_format == 0) || (register_format == 1)) derived_10 = 0;
815         else if ((register_format == 2) || (register_format == 3)) derived_10 = 1;
816         else unreachable("No pattern match at pos 10");
817 
818         unsigned derived_19 = 0;
819         if ((register_format == 0) || (register_format == 2)) derived_19 = 0;
820         else if ((register_format == 1) || (register_format == 3)) derived_19 = 1;
821         else unreachable("No pattern match at pos 19");
822 
823         return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19);
824     } else if (register_format == 4) {
825         return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3);
826     } else {
827         unreachable("No matching state found in add_ld_var_flat_imm");
828     }
829 }
830 
831 static inline unsigned
pan_pack_fma_csel_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)832 pan_pack_fma_csel_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
833 {
834     unsigned src0 = bi_get_src(ins, regs, 0);
835     assert((1 << src0) & 0xfb);
836     unsigned src1 = bi_get_src(ins, regs, 1);
837     assert((1 << src1) & 0xfb);
838     unsigned src2 = bi_get_src(ins, regs, 2);
839     unsigned src3 = bi_get_src(ins, regs, 3);
840 
841     unsigned cmpf_table[] = {
842         ~0, 4, 5, 2, 1, 0, 3
843     };
844     unsigned cmpf = cmpf_table[ins->cond];
845     assert(cmpf < 8);
846 
847     if ((cmpf == 4) || (cmpf == 5)) {
848         { unsigned temp = src0; src0 = src1; src1 = temp; }
849         if (cmpf == 5) cmpf = 2;
850         else if (cmpf == 4) cmpf = 1;
851     }
852 
853     if (cmpf == 3) {
854         { unsigned temp = src2; src2 = src3; src3 = temp; }
855         if (cmpf == 3) cmpf = 0;
856     }
857 
858     unsigned derived_12 = 0;
859     if (cmpf == 0) derived_12 = 0;
860     else if (cmpf == 1) derived_12 = 1;
861     else if (cmpf == 2) derived_12 = 2;
862     else unreachable("No pattern match at pos 12");
863 
864     return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
865 }
866 
867 static inline unsigned
pan_pack_fma_csel_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)868 pan_pack_fma_csel_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
869 {
870     unsigned src0 = bi_get_src(ins, regs, 0);
871     assert((1 << src0) & 0xfb);
872     unsigned src1 = bi_get_src(ins, regs, 1);
873     assert((1 << src1) & 0xfb);
874     unsigned src2 = bi_get_src(ins, regs, 2);
875     unsigned src3 = bi_get_src(ins, regs, 3);
876 
877     unsigned cmpf_table[] = {
878         ~0, ~0, ~0, ~0, ~0, 0, 1
879     };
880     unsigned cmpf = cmpf_table[ins->cond];
881     assert(cmpf < 2);
882 
883     if (cmpf == 1) {
884         { unsigned temp = src2; src2 = src3; src3 = temp; }
885         if (cmpf == 1) cmpf = 0;
886     }
887 
888     unsigned derived_12 = 0;
889     if (cmpf == 0) derived_12 = 3;
890     else unreachable("No pattern match at pos 12");
891 
892     return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
893 }
894 
895 static inline unsigned
pan_pack_add_load_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)896 pan_pack_add_load_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
897 {
898     unsigned src0 = bi_get_src(ins, regs, 0);
899     unsigned src1 = bi_get_src(ins, regs, 1);
900 
901     assert(ins->segment);
902     unsigned seg = ins->segment;
903     assert(seg < 8);
904 
905     unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
906     unsigned lane_temp = 0;
907     if (lane_sz == 32) lane_temp = 0;
908     else if (lane_sz == 64) lane_temp = 1;
909     else unreachable("Could not pattern match widen");
910     unsigned lane = lane_temp;
911     assert(lane < 2);
912 
913     ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
914     bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
915     unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
916     assert(extend < 4);
917 
918     bi_write_staging_register(clause, ins);
919     if ((extend == 0) && (lane == 0)) {
920         return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
921     } else if ((extend != 0) && (lane == 1)) {
922         unsigned derived_9 = 0;
923         if (extend == 1) derived_9 = 0;
924         else if (extend == 2) derived_9 = 1;
925         else unreachable("No pattern match at pos 9");
926 
927         return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
928     } else {
929         unreachable("No matching state found in add_load_i32");
930     }
931 }
932 
933 static inline unsigned
pan_pack_add_st_tile(bi_clause * clause,bi_instruction * ins,bi_registers * regs)934 pan_pack_add_st_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
935 {
936     unsigned src0 = bi_get_src(ins, regs, 1);
937     unsigned src1 = bi_get_src(ins, regs, 2);
938     unsigned src2 = bi_get_src(ins, regs, 3);
939     assert((1 << src2) & 0xf7);
940 
941     unsigned vecsize = ins->vector_channels - 1;
942     assert(vecsize < 4);
943 
944     bi_read_staging_register(clause, ins);
945     return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
946 }
947 
948 static inline unsigned
pan_pack_fma_rshift_or_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)949 pan_pack_fma_rshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
950 {
951     unsigned src0 = bi_get_src(ins, regs, 0);
952     assert((1 << src0) & 0xfb);
953     unsigned src1 = bi_get_src(ins, regs, 1);
954     assert((1 << src1) & 0xfb);
955     unsigned src2 = bi_get_src(ins, regs, 2);
956 
957     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
958     unsigned lanes2_temp = 0;
959     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
960     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
961     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
962     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
963     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
964     else unreachable("Could not pattern match widen");
965     unsigned lanes2 = lanes2_temp;
966     assert(lanes2 < 8);
967 
968     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
969     assert(not1 < 2);
970 
971     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
972     assert(not_result < 2);
973 
974     if (lanes2 != 0) {
975         unsigned derived_9 = 0;
976         if (lanes2 == 1) derived_9 = 0;
977         else if (lanes2 == 2) derived_9 = 1;
978         else if (lanes2 == 3) derived_9 = 2;
979         else if (lanes2 == 4) derived_9 = 3;
980         else unreachable("No pattern match at pos 9");
981 
982         return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
983     } else if (lanes2 == 0) {
984         return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
985     } else {
986         unreachable("No matching state found in fma_rshift_or_v4i8");
987     }
988 }
989 
990 static inline unsigned
pan_pack_fma_csel_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)991 pan_pack_fma_csel_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
992 {
993     unsigned src0 = bi_get_src(ins, regs, 0);
994     assert((1 << src0) & 0xfb);
995     unsigned src1 = bi_get_src(ins, regs, 1);
996     assert((1 << src1) & 0xfb);
997     unsigned src2 = bi_get_src(ins, regs, 2);
998     unsigned src3 = bi_get_src(ins, regs, 3);
999 
1000     unsigned cmpf_table[] = {
1001         ~0, 2, 3, 1, 0, ~0, ~0
1002     };
1003     unsigned cmpf = cmpf_table[ins->cond];
1004     assert(cmpf < 4);
1005 
1006     if ((cmpf == 2) || (cmpf == 3)) {
1007         { unsigned temp = src0; src0 = src1; src1 = temp; }
1008         if (cmpf == 3) cmpf = 1;
1009         else if (cmpf == 2) cmpf = 0;
1010     }
1011 
1012     unsigned derived_12 = 0;
1013     if (cmpf == 0) derived_12 = 0;
1014     else if (cmpf == 1) derived_12 = 1;
1015     else unreachable("No pattern match at pos 12");
1016 
1017     return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
1018 }
1019 
1020 static inline unsigned
pan_pack_add_mkvec_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1021 pan_pack_add_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1022 {
1023     unsigned src0 = bi_get_src(ins, regs, 0);
1024     unsigned src1 = bi_get_src(ins, regs, 1);
1025 
1026     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1027     unsigned lane0_temp = 0;
1028     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
1029     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
1030     else unreachable("Could not pattern match widen");
1031     unsigned lane0 = lane0_temp;
1032     assert(lane0 < 2);
1033 
1034     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1035     unsigned lane1_temp = 0;
1036     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
1037     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
1038     else unreachable("Could not pattern match widen");
1039     unsigned lane1 = lane1_temp;
1040     assert(lane1 < 2);
1041 
1042     return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7);
1043 }
1044 
1045 static inline unsigned
pan_pack_fma_atom_pre_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1046 pan_pack_fma_atom_pre_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1047 {
1048     unsigned src0 = bi_get_src(ins, regs, 0);
1049     assert((1 << src0) & 0xfb);
1050     unsigned src1 = bi_get_src(ins, regs, 1);
1051     assert((1 << src1) & 0xfb);
1052     unsigned src2 = bi_get_src(ins, regs, 2);
1053 
1054     unsigned atom_opc = 2;
1055 
1056     return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1057 }
1058 
1059 static inline unsigned
pan_pack_fma_shaddxl_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1060 pan_pack_fma_shaddxl_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1061 {
1062     unsigned src0 = bi_get_src(ins, regs, 0);
1063     assert((1 << src0) & 0xfb);
1064     unsigned src1 = bi_get_src(ins, regs, 1);
1065     assert((1 << src1) & 0xfb);
1066 
1067     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1068     unsigned lane1_temp = 0;
1069     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
1070     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
1071     else if (lane1_sz == 32) lane1_temp = 2;
1072     else unreachable("Could not pattern match widen");
1073     unsigned lane1 = lane1_temp;
1074     assert(lane1 < 4);
1075 
1076     unsigned shift = 0;
1077     return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6);
1078 }
1079 
1080 static inline unsigned
pan_pack_add_branch_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1081 pan_pack_add_branch_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1082 {
1083     unsigned src0 = bi_get_src(ins, regs, 0);
1084     unsigned src1 = bi_get_src(ins, regs, 1);
1085     unsigned src2 = bi_get_src(ins, regs, 2);
1086     assert((1 << src2) & 0xf7);
1087 
1088     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1089     unsigned widen0_temp = 0;
1090     if (widen0_sz == 32) widen0_temp = 0;
1091     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1092     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1093     else unreachable("Could not pattern match widen");
1094     unsigned widen0 = widen0_temp;
1095     assert(widen0 < 4);
1096 
1097     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1098     unsigned widen1_temp = 0;
1099     if (widen1_sz == 32) widen1_temp = 0;
1100     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1101     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1102     else unreachable("Could not pattern match widen");
1103     unsigned widen1 = widen1_temp;
1104     assert(widen1 < 4);
1105 
1106     unsigned cmpf_table[] = {
1107         ~0, ~0, ~0, ~0, ~0, 0, 1
1108     };
1109     unsigned cmpf = cmpf_table[ins->cond];
1110     assert(cmpf < 2);
1111 
1112     if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) {
1113         { unsigned temp = src0; src0 = src1; src1 = temp; }
1114         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1115     }
1116 
1117     unsigned derived_12 = 0;
1118     if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
1119     else unreachable("No pattern match at pos 12");
1120 
1121     unsigned derived_9 = 0;
1122     if ((src0 == src1) && (cmpf == 0)) derived_9 = 1;
1123     else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4;
1124     else unreachable("No pattern match at pos 9");
1125 
1126     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
1127 }
1128 
1129 static inline unsigned
pan_pack_fma_quiet_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1130 pan_pack_fma_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1131 {
1132     unsigned src0 = bi_get_src(ins, regs, 0);
1133     assert((1 << src0) & 0xfb);
1134 
1135     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1136     unsigned swz0_temp = 0;
1137     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1138     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1139     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1140     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1141     else unreachable("Could not pattern match widen");
1142     unsigned swz0 = swz0_temp;
1143     assert(swz0 < 4);
1144 
1145     return 0x701900 | (src0 << 0) | (swz0 << 4);
1146 }
1147 
1148 static inline unsigned
pan_pack_fma_lshift_or_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1149 pan_pack_fma_lshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1150 {
1151     unsigned src0 = bi_get_src(ins, regs, 0);
1152     assert((1 << src0) & 0xfb);
1153     unsigned src1 = bi_get_src(ins, regs, 1);
1154     assert((1 << src1) & 0xfb);
1155     unsigned src2 = bi_get_src(ins, regs, 2);
1156 
1157     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1158     unsigned lanes2_temp = 0;
1159     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
1160     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
1161     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
1162     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
1163     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
1164     else unreachable("Could not pattern match widen");
1165     unsigned lanes2 = lanes2_temp;
1166     assert(lanes2 < 8);
1167 
1168     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
1169     assert(not1 < 2);
1170 
1171     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
1172     assert(not_result < 2);
1173 
1174     if (lanes2 != 0) {
1175         unsigned derived_9 = 0;
1176         if (lanes2 == 1) derived_9 = 0;
1177         else if (lanes2 == 2) derived_9 = 1;
1178         else if (lanes2 == 3) derived_9 = 2;
1179         else if (lanes2 == 4) derived_9 = 3;
1180         else unreachable("No pattern match at pos 9");
1181 
1182         return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
1183     } else if (lanes2 == 0) {
1184         return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
1185     } else {
1186         unreachable("No matching state found in fma_lshift_or_v4i8");
1187     }
1188 }
1189 
1190 static inline unsigned
pan_pack_add_ld_var_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1191 pan_pack_add_ld_var_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1192 {
1193     unsigned src0 = bi_get_src(ins, regs, 1);
1194 
1195     unsigned vecsize = ins->vector_channels - 1;
1196     assert(vecsize < 4);
1197 
1198     unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
1199     assert(update < 4);
1200 
1201     unsigned register_format_temp = 0;
1202     if (ins->format == nir_type_float32) register_format_temp = 0;
1203     else if (ins->format == nir_type_float16) register_format_temp = 1;
1204     else unreachable("Could not pattern match register format");
1205     unsigned register_format = register_format_temp;
1206     assert(register_format < 4);
1207 
1208     unsigned sample = ins->load_vary.interp_mode;
1209     assert(sample < 8);
1210 
1211     unsigned index = bi_get_immediate(ins, 0);
1212     bi_write_staging_register(clause, ins);
1213     if (register_format != 2) {
1214         unsigned derived_19 = 0;
1215         if (register_format == 0) derived_19 = 0;
1216         else if (register_format == 1) derived_19 = 1;
1217         else unreachable("No pattern match at pos 19");
1218 
1219         unsigned derived_10 = 0;
1220         if ((sample == 0) && (update == 0)) derived_10 = 0;
1221         else if ((sample == 1) && (update == 0)) derived_10 = 1;
1222         else if ((sample == 2) && (update == 0)) derived_10 = 2;
1223         else if ((sample == 3) && (update == 0)) derived_10 = 3;
1224         else if ((sample == 4) && (update == 1)) derived_10 = 4;
1225         else if ((sample == 0) && (update == 2)) derived_10 = 8;
1226         else if ((sample == 1) && (update == 2)) derived_10 = 9;
1227         else if ((sample == 0) && (update == 3)) derived_10 = 10;
1228         else if ((sample == 1) && (update == 3)) derived_10 = 11;
1229         else if ((sample == 2) && (update == 3)) derived_10 = 12;
1230         else if ((sample == 3) && (update == 3)) derived_10 = 13;
1231         else unreachable("No pattern match at pos 10");
1232 
1233         return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10);
1234     } else if (register_format == 2) {
1235         unsigned derived_10 = 0;
1236         if ((sample == 0) && (update == 0)) derived_10 = 0;
1237         else if ((sample == 1) && (update == 0)) derived_10 = 1;
1238         else if ((sample == 2) && (update == 0)) derived_10 = 2;
1239         else if ((sample == 3) && (update == 0)) derived_10 = 3;
1240         else if ((sample == 4) && (update == 1)) derived_10 = 4;
1241         else if ((sample == 0) && (update == 2)) derived_10 = 8;
1242         else if ((sample == 1) && (update == 2)) derived_10 = 9;
1243         else if ((sample == 0) && (update == 3)) derived_10 = 10;
1244         else if ((sample == 1) && (update == 3)) derived_10 = 11;
1245         else if ((sample == 2) && (update == 3)) derived_10 = 12;
1246         else if ((sample == 3) && (update == 3)) derived_10 = 13;
1247         else unreachable("No pattern match at pos 10");
1248 
1249         return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10);
1250     } else {
1251         unreachable("No matching state found in add_ld_var_imm");
1252     }
1253 }
1254 
1255 static inline unsigned
pan_pack_fma_atom_c_return_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1256 pan_pack_fma_atom_c_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1257 {
1258     unsigned src0 = bi_get_src(ins, regs, 0);
1259     assert((1 << src0) & 0xf3);
1260     unsigned src1 = bi_get_src(ins, regs, 1);
1261     assert((1 << src1) & 0xf3);
1262     unsigned src2 = bi_get_src(ins, regs, 2);
1263     assert((1 << src2) & 0xf7);
1264 
1265     unsigned atom_opc = 2;
1266 
1267     return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1268 }
1269 
1270 static inline unsigned
pan_pack_add_barrier(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1271 pan_pack_add_barrier(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1272 {
1273 
1274     return 0xd7874;
1275 }
1276 
1277 static inline unsigned
pan_pack_add_quiet_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1278 pan_pack_add_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1279 {
1280     unsigned src0 = bi_get_src(ins, regs, 0);
1281 
1282     return 0x3d970 | (src0 << 0);
1283 }
1284 
1285 static inline unsigned
pan_pack_fma_atom_c_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1286 pan_pack_fma_atom_c_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1287 {
1288     unsigned src0 = bi_get_src(ins, regs, 0);
1289     assert((1 << src0) & 0xf3);
1290     unsigned src1 = bi_get_src(ins, regs, 1);
1291     assert((1 << src1) & 0xf3);
1292     unsigned src2 = bi_get_src(ins, regs, 2);
1293     assert((1 << src2) & 0xf7);
1294 
1295     unsigned atom_opc = 2;
1296 
1297     return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1298 }
1299 
1300 static inline unsigned
pan_pack_add_v2s8_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1301 pan_pack_add_v2s8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1302 {
1303     unsigned src0 = bi_get_src(ins, regs, 0);
1304 
1305     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1306     unsigned swz0_temp = 0;
1307     if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1308     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1309     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
1310     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
1311     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
1312     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
1313     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
1314     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
1315     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
1316     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
1317     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
1318     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
1319     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
1320     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
1321     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
1322     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
1323     else unreachable("Could not pattern match widen");
1324     unsigned swz0 = swz0_temp;
1325     assert(swz0 < 16);
1326 
1327     return 0x3c800 | (src0 << 0) | (swz0 << 4);
1328 }
1329 
1330 static inline unsigned
pan_pack_add_fsincos_offset_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1331 pan_pack_add_fsincos_offset_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1332 {
1333     unsigned src0 = bi_get_src(ins, regs, 0);
1334     assert((1 << src0) & 0xf7);
1335 
1336     unsigned scale = 0;
1337 
1338     return 0x67aa0 | (src0 << 0) | (scale << 3);
1339 }
1340 
1341 static inline unsigned
pan_pack_add_lea_attr(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1342 pan_pack_add_lea_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1343 {
1344     unsigned src0 = bi_get_src(ins, regs, 0);
1345     unsigned src1 = bi_get_src(ins, regs, 1);
1346     unsigned src2 = bi_get_src(ins, regs, 2);
1347 
1348     unsigned register_format_temp = 0;
1349     if (ins->format == nir_type_float16) register_format_temp = 0;
1350     else if (ins->format == nir_type_float32) register_format_temp = 1;
1351     else if (ins->format == nir_type_int32) register_format_temp = 2;
1352     else if (ins->format == nir_type_uint32) register_format_temp = 3;
1353     else if (ins->format == nir_type_int16) register_format_temp = 4;
1354     else if (ins->format == nir_type_uint16) register_format_temp = 5;
1355     else if (ins->format == nir_type_float64) register_format_temp = 6;
1356     else if (ins->format == nir_type_int64) register_format_temp = 7;
1357     else unreachable("Could not pattern match register format");
1358     unsigned register_format = register_format_temp;
1359     assert(register_format < 16);
1360 
1361     bi_write_staging_register(clause, ins);
1362     if (register_format != 8) {
1363         unsigned derived_11 = 0;
1364         if (register_format == 0) derived_11 = 0;
1365         else if (register_format == 1) derived_11 = 1;
1366         else if (register_format == 2) derived_11 = 2;
1367         else if (register_format == 3) derived_11 = 3;
1368         else if (register_format == 4) derived_11 = 4;
1369         else if (register_format == 5) derived_11 = 5;
1370         else if (register_format == 6) derived_11 = 6;
1371         else if (register_format == 7) derived_11 = 7;
1372         else unreachable("No pattern match at pos 11");
1373 
1374         return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11);
1375     } else if (register_format == 8) {
1376         return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6);
1377     } else {
1378         unreachable("No matching state found in add_lea_attr");
1379     }
1380 }
1381 
1382 static inline unsigned
pan_pack_add_fadd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1383 pan_pack_add_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1384 {
1385     unsigned src0 = bi_get_src(ins, regs, 0);
1386     unsigned src1 = bi_get_src(ins, regs, 1);
1387 
1388     unsigned round = ins->roundmode;
1389     assert(round < 8);
1390 
1391     unsigned abs1 = ins->src_abs[1];
1392     assert(abs1 < 2);
1393 
1394     unsigned neg0 = ins->src_neg[0];
1395     assert(neg0 < 2);
1396 
1397     unsigned neg1 = ins->src_neg[1];
1398     assert(neg1 < 2);
1399 
1400     unsigned clamp = ins->outmod;
1401     assert(clamp < 4);
1402 
1403     unsigned abs0 = ins->src_abs[0];
1404     assert(abs0 < 2);
1405 
1406     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1407     unsigned widen0_temp = 0;
1408     if (widen0_sz == 32) widen0_temp = 0;
1409     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1410     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1411     else unreachable("Could not pattern match widen");
1412     unsigned widen0 = widen0_temp;
1413     assert(widen0 < 4);
1414 
1415     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1416     unsigned widen1_temp = 0;
1417     if (widen1_sz == 32) widen1_temp = 0;
1418     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1419     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1420     else unreachable("Could not pattern match widen");
1421     unsigned widen1 = widen1_temp;
1422     assert(widen1 < 4);
1423 
1424     if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) {
1425         { unsigned temp = src0; src0 = src1; src1 = temp; }
1426         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
1427         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
1428         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1429     }
1430 
1431     if (round != 4) {
1432         unsigned derived_13 = 0;
1433         if (round == 0) derived_13 = 0;
1434         else if (round == 1) derived_13 = 1;
1435         else if (round == 2) derived_13 = 2;
1436         else if (round == 3) derived_13 = 3;
1437         else unreachable("No pattern match at pos 13");
1438 
1439         unsigned derived_9 = 0;
1440         if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
1441         else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
1442         else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
1443         else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
1444         else unreachable("No pattern match at pos 9");
1445 
1446         return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9);
1447     } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) {
1448         return 0x75200 | (src0 << 0) | (src1 << 3);
1449     } else {
1450         unreachable("No matching state found in add_fadd_f32");
1451     }
1452 }
1453 
1454 static inline unsigned
pan_pack_fma_atom_post_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1455 pan_pack_fma_atom_post_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1456 {
1457     unsigned src0 = bi_get_src(ins, regs, 0);
1458     assert((1 << src0) & 0xfb);
1459     unsigned src1 = bi_get_src(ins, regs, 1);
1460     assert((1 << src1) & 0xfb);
1461 
1462     unsigned atom_opc = 2;
1463 
1464     return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
1465 }
1466 
1467 static inline unsigned
pan_pack_fma_seg_sub(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1468 pan_pack_fma_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1469 {
1470     unsigned src0 = bi_get_src(ins, regs, 0);
1471     assert((1 << src0) & 0xfb);
1472 
1473     assert(ins->segment);
1474     unsigned seg = ins->segment;
1475     assert(seg < 8);
1476 
1477     unsigned preserve_null = 0;
1478 
1479     return 0x701540 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
1480 }
1481 
1482 static inline unsigned
pan_pack_add_seg_sub(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1483 pan_pack_add_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1484 {
1485     unsigned src0 = bi_get_src(ins, regs, 0);
1486 
1487     assert(ins->segment);
1488     unsigned seg = ins->segment;
1489     assert(seg < 8);
1490 
1491     unsigned preserve_null = 0;
1492 
1493     return 0x3d540 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
1494 }
1495 
1496 static inline unsigned
pan_pack_fma_frexpe_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1497 pan_pack_fma_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1498 {
1499     unsigned src0 = bi_get_src(ins, regs, 0);
1500     assert((1 << src0) & 0xfb);
1501 
1502     unsigned neg = ins->src_neg[0];
1503     assert(neg < 2);
1504 
1505     unsigned sqrt = 0;
1506 
1507     unsigned log = 1;
1508 
1509     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1510     unsigned widen0_temp = 0;
1511     if (widen0_sz == 32) widen0_temp = 1;
1512     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
1513     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
1514     else unreachable("Could not pattern match widen");
1515     unsigned widen0 = widen0_temp;
1516     assert(widen0 < 4);
1517 
1518     if (log == 0) {
1519         return 0x701c20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3);
1520     } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
1521         return 0x701e20 | (src0 << 0) | (widen0 << 3);
1522     } else {
1523         unreachable("No matching state found in fma_frexpe_f32");
1524     }
1525 }
1526 
1527 static inline unsigned
pan_pack_add_frsq_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1528 pan_pack_add_frsq_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1529 {
1530     unsigned src0 = bi_get_src(ins, regs, 0);
1531     assert((1 << src0) & 0xf7);
1532 
1533     unsigned neg = ins->src_neg[0];
1534     assert(neg < 2);
1535 
1536     unsigned abs0 = ins->src_abs[0];
1537     assert(abs0 < 2);
1538 
1539     unsigned divzero = 0;
1540 
1541     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1542     unsigned lane0_temp = 0;
1543     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
1544     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
1545     else unreachable("Could not pattern match widen");
1546     unsigned lane0 = lane0_temp;
1547     assert(lane0 < 2);
1548 
1549     return 0x67280 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8);
1550 }
1551 
1552 static inline unsigned
pan_pack_fma_lshift_and_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1553 pan_pack_fma_lshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1554 {
1555     unsigned src0 = bi_get_src(ins, regs, 0);
1556     assert((1 << src0) & 0xfb);
1557     unsigned src1 = bi_get_src(ins, regs, 1);
1558     assert((1 << src1) & 0xfb);
1559     unsigned src2 = bi_get_src(ins, regs, 2);
1560 
1561     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1562     unsigned lanes2_temp = 0;
1563     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
1564     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
1565     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
1566     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
1567     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
1568     else unreachable("Could not pattern match widen");
1569     unsigned lanes2 = lanes2_temp;
1570     assert(lanes2 < 8);
1571 
1572     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
1573     assert(not1 < 2);
1574 
1575     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
1576     assert(not_result < 2);
1577 
1578     if (lanes2 != 0) {
1579         unsigned derived_9 = 0;
1580         if (lanes2 == 1) derived_9 = 0;
1581         else if (lanes2 == 2) derived_9 = 1;
1582         else if (lanes2 == 3) derived_9 = 2;
1583         else if (lanes2 == 4) derived_9 = 3;
1584         else unreachable("No pattern match at pos 9");
1585 
1586         return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
1587     } else if (lanes2 == 0) {
1588         return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
1589     } else {
1590         unreachable("No matching state found in fma_lshift_and_v4i8");
1591     }
1592 }
1593 
1594 static inline unsigned
pan_pack_add_branch_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1595 pan_pack_add_branch_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1596 {
1597     unsigned src0 = bi_get_src(ins, regs, 0);
1598     unsigned src1 = bi_get_src(ins, regs, 1);
1599     unsigned src2 = bi_get_src(ins, regs, 2);
1600     assert((1 << src2) & 0xf7);
1601 
1602     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1603     unsigned widen0_temp = 0;
1604     if (widen0_sz == 32) widen0_temp = 0;
1605     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1606     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1607     else unreachable("Could not pattern match widen");
1608     unsigned widen0 = widen0_temp;
1609     assert(widen0 < 4);
1610 
1611     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1612     unsigned widen1_temp = 0;
1613     if (widen1_sz == 32) widen1_temp = 0;
1614     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1615     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1616     else unreachable("Could not pattern match widen");
1617     unsigned widen1 = widen1_temp;
1618     assert(widen1 < 4);
1619 
1620     unsigned cmpf_table[] = {
1621         ~0, 4, 5, 2, 1, 0, 3
1622     };
1623     unsigned cmpf = cmpf_table[ins->cond];
1624     assert(cmpf < 8);
1625 
1626     if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) {
1627         { unsigned temp = src0; src0 = src1; src1 = temp; }
1628         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1629         if (cmpf == 1) cmpf = 4;
1630         else if (cmpf == 5) cmpf = 2;
1631         else if (cmpf == 4) cmpf = 1;
1632         else if (cmpf == 2) cmpf = 5;
1633     }
1634 
1635     unsigned derived_12 = 0;
1636     if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
1637     else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
1638     else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3;
1639     else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4;
1640     else unreachable("No pattern match at pos 12");
1641 
1642     unsigned derived_9 = 0;
1643     if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4;
1644     else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5;
1645     else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6;
1646     else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7;
1647     else unreachable("No pattern match at pos 9");
1648 
1649     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
1650 }
1651 
1652 static inline unsigned
pan_pack_add_clper_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1653 pan_pack_add_clper_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1654 {
1655     unsigned src0 = bi_get_src(ins, regs, 0);
1656     assert((1 << src0) & 0x7);
1657     unsigned src1 = bi_get_src(ins, regs, 1);
1658 
1659     unsigned lane_op = 0;
1660 
1661     unsigned subgroup = 1;
1662 
1663     unsigned inactive_result = 0;
1664 
1665     return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10);
1666 }
1667 
1668 static inline unsigned
pan_pack_add_v2s16_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1669 pan_pack_add_v2s16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1670 {
1671     unsigned src0 = bi_get_src(ins, regs, 0);
1672 
1673     unsigned round = ins->roundmode;
1674     assert(round < 8);
1675 
1676     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1677     unsigned swz0_temp = 0;
1678     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1679     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1680     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1681     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1682     else unreachable("Could not pattern match widen");
1683     unsigned swz0 = swz0_temp;
1684     assert(swz0 < 4);
1685 
1686     if (round != 4) {
1687         unsigned derived_4 = 0;
1688         if (round == 0) derived_4 = 0;
1689         else if (round == 1) derived_4 = 1;
1690         else if (round == 2) derived_4 = 2;
1691         else if (round == 3) derived_4 = 3;
1692         else unreachable("No pattern match at pos 4");
1693 
1694         return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
1695     } else if (round == 4) {
1696         return 0x3cb00 | (src0 << 0) | (swz0 << 4);
1697     } else {
1698         unreachable("No matching state found in add_v2s16_to_v2f16");
1699     }
1700 }
1701 
1702 static inline unsigned
pan_pack_fma_atom_c1_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1703 pan_pack_fma_atom_c1_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1704 {
1705     unsigned src0 = bi_get_src(ins, regs, 0);
1706     assert((1 << src0) & 0xf3);
1707     unsigned src1 = bi_get_src(ins, regs, 1);
1708     assert((1 << src1) & 0xf3);
1709 
1710     unsigned atom_opc = 2;
1711 
1712     return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
1713 }
1714 
1715 static inline unsigned
pan_pack_add_axchg_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1716 pan_pack_add_axchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1717 {
1718     unsigned src0 = bi_get_src(ins, regs, 1);
1719     unsigned src1 = bi_get_src(ins, regs, 2);
1720 
1721     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
1722     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
1723     assert(seg < 2);
1724 
1725     bi_read_staging_register(clause, ins);
1726     assert(ins->src[0] == ins->dest);
1727     return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
1728 }
1729 
1730 static inline unsigned
pan_pack_fma_vn_asst1_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1731 pan_pack_fma_vn_asst1_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1732 {
1733     unsigned src0 = bi_get_src(ins, regs, 0);
1734     assert((1 << src0) & 0xfb);
1735     unsigned src1 = bi_get_src(ins, regs, 1);
1736     assert((1 << src1) & 0xfb);
1737     unsigned src2 = bi_get_src(ins, regs, 2);
1738 
1739     unsigned h = 0;
1740 
1741     unsigned l = 0;
1742 
1743     unsigned neg2 = ins->src_neg[2];
1744     assert(neg2 < 2);
1745 
1746     return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11);
1747 }
1748 
1749 static inline unsigned
pan_pack_fma_fma_rscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1750 pan_pack_fma_fma_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1751 {
1752     unsigned src0 = bi_get_src(ins, regs, 0);
1753     assert((1 << src0) & 0xfb);
1754     unsigned src1 = bi_get_src(ins, regs, 1);
1755     assert((1 << src1) & 0xfb);
1756     unsigned src2 = bi_get_src(ins, regs, 2);
1757     unsigned src3 = bi_get_src(ins, regs, 3);
1758 
1759     assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);
1760     unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0;
1761     assert(round < 2);
1762 
1763     unsigned clamp = ins->outmod;
1764     assert(clamp < 4);
1765 
1766     unsigned neg0 = ins->src_neg[0];
1767     assert(neg0 < 2);
1768 
1769     unsigned neg1 = ins->src_neg[1];
1770     assert(neg1 < 2);
1771 
1772     unsigned abs0 = ins->src_abs[0];
1773     assert(abs0 < 2);
1774 
1775     unsigned neg2 = ins->src_neg[2];
1776     assert(neg2 < 2);
1777 
1778     unsigned special = 0;
1779 
1780     unsigned derived_16 = 0;
1781     if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0;
1782     else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1;
1783     else unreachable("No pattern match at pos 16");
1784 
1785     unsigned derived_12 = 0;
1786     if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0;
1787     else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1;
1788     else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2;
1789     else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3;
1790     else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4;
1791     else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5;
1792     else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6;
1793     else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7;
1794     else unreachable("No pattern match at pos 12");
1795 
1796     return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12);
1797 }
1798 
1799 static inline unsigned
pan_pack_add_hadd_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1800 pan_pack_add_hadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1801 {
1802     unsigned src0 = bi_get_src(ins, regs, 0);
1803     unsigned src1 = bi_get_src(ins, regs, 1);
1804 
1805     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
1806     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
1807     assert(round < 2);
1808 
1809     return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12);
1810 }
1811 
1812 static inline unsigned
pan_pack_fma_imul_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1813 pan_pack_fma_imul_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1814 {
1815     unsigned src0 = bi_get_src(ins, regs, 0);
1816     assert((1 << src0) & 0xfb);
1817     unsigned src1 = bi_get_src(ins, regs, 1);
1818     assert((1 << src1) & 0xfb);
1819 
1820     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1821     unsigned swz0_temp = 0;
1822     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1823     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1824     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1825     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1826     else unreachable("Could not pattern match widen");
1827     unsigned swz0 = swz0_temp;
1828     assert(swz0 < 4);
1829 
1830     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1831     unsigned swz1_temp = 0;
1832     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
1833     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
1834     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
1835     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
1836     else unreachable("Could not pattern match widen");
1837     unsigned swz1 = swz1_temp;
1838     assert(swz1 < 4);
1839 
1840     return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11);
1841 }
1842 
1843 static inline unsigned
pan_pack_add_load_i48(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1844 pan_pack_add_load_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1845 {
1846     unsigned src0 = bi_get_src(ins, regs, 0);
1847     unsigned src1 = bi_get_src(ins, regs, 1);
1848 
1849     assert(ins->segment);
1850     unsigned seg = ins->segment;
1851     assert(seg < 8);
1852 
1853     bi_write_staging_register(clause, ins);
1854     return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6);
1855 }
1856 
1857 static inline unsigned
pan_pack_add_hadd_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1858 pan_pack_add_hadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1859 {
1860     unsigned src0 = bi_get_src(ins, regs, 0);
1861     unsigned src1 = bi_get_src(ins, regs, 1);
1862 
1863     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
1864     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
1865     assert(round < 2);
1866 
1867     return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12);
1868 }
1869 
1870 static inline unsigned
pan_pack_add_imov_fma(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1871 pan_pack_add_imov_fma(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1872 {
1873 
1874     unsigned threads = 0;
1875 
1876     return 0xd7820 | (threads << 3);
1877 }
1878 
1879 static inline unsigned
pan_pack_add_icmpi_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1880 pan_pack_add_icmpi_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1881 {
1882     unsigned src0 = bi_get_src(ins, regs, 0);
1883     unsigned src1 = bi_get_src(ins, regs, 1);
1884 
1885     unsigned result_type = 1;
1886 
1887     unsigned cmpf_table[] = {
1888         ~0, ~0, ~0, 1, 0, ~0, ~0
1889     };
1890     unsigned cmpf = cmpf_table[ins->cond];
1891     assert(cmpf < 2);
1892 
1893     return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
1894 }
1895 
1896 static inline unsigned
pan_pack_add_store_i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1897 pan_pack_add_store_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1898 {
1899     unsigned src0 = bi_get_src(ins, regs, 1);
1900     unsigned src1 = bi_get_src(ins, regs, 2);
1901 
1902     assert(ins->segment);
1903     unsigned seg = ins->segment;
1904     assert(seg < 8);
1905 
1906     bi_read_staging_register(clause, ins);
1907     return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6);
1908 }
1909 
1910 static inline unsigned
pan_pack_fma_jump_ex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1911 pan_pack_fma_jump_ex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1912 {
1913     unsigned src0 = bi_get_src(ins, regs, 0);
1914     assert((1 << src0) & 0xfb);
1915     unsigned src1 = bi_get_src(ins, regs, 1);
1916     assert((1 << src1) & 0xfb);
1917     unsigned src2 = bi_get_src(ins, regs, 2);
1918 
1919     unsigned test_mode = 0;
1920 
1921     unsigned stack_mode = 2;
1922 
1923     return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10);
1924 }
1925 
1926 static inline unsigned
pan_pack_add_iadd_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1927 pan_pack_add_iadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1928 {
1929     unsigned src0 = bi_get_src(ins, regs, 0);
1930     unsigned src1 = bi_get_src(ins, regs, 1);
1931 
1932     unsigned saturate = 0;
1933 
1934     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1935     unsigned lanes1_temp = 0;
1936     if (lanes1_sz == 32) lanes1_temp = 0;
1937     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
1938     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
1939     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
1940     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
1941     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
1942     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
1943     else unreachable("Could not pattern match widen");
1944     unsigned lanes1 = lanes1_temp;
1945     assert(lanes1 < 8);
1946 
1947     if (lanes1 == 0) {
1948         return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8);
1949     } else if ((lanes1 == 1) || (lanes1 == 2)) {
1950         unsigned derived_9 = 0;
1951         if (lanes1 == 1) derived_9 = 0;
1952         else if (lanes1 == 2) derived_9 = 1;
1953         else unreachable("No pattern match at pos 9");
1954 
1955         return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
1956     } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
1957         unsigned derived_9 = 0;
1958         if (lanes1 == 3) derived_9 = 0;
1959         else if (lanes1 == 4) derived_9 = 1;
1960         else if (lanes1 == 5) derived_9 = 2;
1961         else if (lanes1 == 6) derived_9 = 3;
1962         else unreachable("No pattern match at pos 9");
1963 
1964         return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
1965     } else {
1966         unreachable("No matching state found in add_iadd_s32");
1967     }
1968 }
1969 
1970 static inline unsigned
pan_pack_fma_rshift_xor_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1971 pan_pack_fma_rshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1972 {
1973     unsigned src0 = bi_get_src(ins, regs, 0);
1974     assert((1 << src0) & 0xfb);
1975     unsigned src1 = bi_get_src(ins, regs, 1);
1976     assert((1 << src1) & 0xfb);
1977     unsigned src2 = bi_get_src(ins, regs, 2);
1978 
1979     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1980     unsigned lanes2_temp = 0;
1981     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
1982     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
1983     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
1984     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
1985     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
1986     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
1987     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
1988     else unreachable("Could not pattern match widen");
1989     unsigned lanes2 = lanes2_temp;
1990     assert(lanes2 < 8);
1991 
1992     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
1993     assert(not_result < 2);
1994 
1995     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
1996         unsigned derived_9 = 0;
1997         if (lanes2 == 0) derived_9 = 0;
1998         else if (lanes2 == 1) derived_9 = 1;
1999         else if (lanes2 == 2) derived_9 = 2;
2000         else if (lanes2 == 3) derived_9 = 3;
2001         else unreachable("No pattern match at pos 9");
2002 
2003         return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
2004     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
2005         unsigned derived_9 = 0;
2006         if (lanes2 == 4) derived_9 = 1;
2007         else if (lanes2 == 5) derived_9 = 2;
2008         else if (lanes2 == 6) derived_9 = 3;
2009         else unreachable("No pattern match at pos 9");
2010 
2011         return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
2012     } else {
2013         unreachable("No matching state found in fma_rshift_xor_v2i16");
2014     }
2015 }
2016 
2017 static inline unsigned
pan_pack_fma_csel_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2018 pan_pack_fma_csel_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2019 {
2020     unsigned src0 = bi_get_src(ins, regs, 0);
2021     assert((1 << src0) & 0xfb);
2022     unsigned src1 = bi_get_src(ins, regs, 1);
2023     assert((1 << src1) & 0xfb);
2024     unsigned src2 = bi_get_src(ins, regs, 2);
2025     unsigned src3 = bi_get_src(ins, regs, 3);
2026 
2027     unsigned cmpf_table[] = {
2028         ~0, 2, 3, 1, 0, ~0, ~0
2029     };
2030     unsigned cmpf = cmpf_table[ins->cond];
2031     assert(cmpf < 4);
2032 
2033     if ((cmpf == 2) || (cmpf == 3)) {
2034         { unsigned temp = src0; src0 = src1; src1 = temp; }
2035         if (cmpf == 3) cmpf = 1;
2036         else if (cmpf == 2) cmpf = 0;
2037     }
2038 
2039     unsigned derived_12 = 0;
2040     if (cmpf == 0) derived_12 = 0;
2041     else if (cmpf == 1) derived_12 = 1;
2042     else unreachable("No pattern match at pos 12");
2043 
2044     return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
2045 }
2046 
2047 static inline unsigned
pan_pack_add_shaddxh_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2048 pan_pack_add_shaddxh_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2049 {
2050     unsigned src0 = bi_get_src(ins, regs, 0);
2051     unsigned src1 = bi_get_src(ins, regs, 1);
2052 
2053     return 0x3f8c0 | (src0 << 0) | (src1 << 3);
2054 }
2055 
2056 static inline unsigned
pan_pack_add_isub_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2057 pan_pack_add_isub_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2058 {
2059     unsigned src0 = bi_get_src(ins, regs, 0);
2060     unsigned src1 = bi_get_src(ins, regs, 1);
2061 
2062     unsigned saturate = 0;
2063 
2064     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2065     unsigned lanes0_temp = 0;
2066     if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
2067     else unreachable("Could not pattern match widen");
2068     unsigned lanes0 = lanes0_temp;
2069     assert(lanes0 < 8);
2070 
2071     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2072     unsigned lanes1_temp = 0;
2073     if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
2074     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
2075     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
2076     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
2077     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
2078     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
2079     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
2080     else unreachable("Could not pattern match widen");
2081     unsigned lanes1 = lanes1_temp;
2082     assert(lanes1 < 8);
2083 
2084     if ((lanes0 == 0) && (lanes1 == 0)) {
2085         unsigned derived_7 = 0;
2086         if (saturate == 0) derived_7 = 0;
2087         else if (saturate == 1) derived_7 = 1;
2088         else unreachable("No pattern match at pos 7");
2089 
2090         return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
2091     } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
2092         unsigned derived_7 = 0;
2093         if (saturate == 0) derived_7 = 0;
2094         else if (saturate == 1) derived_7 = 1;
2095         else unreachable("No pattern match at pos 7");
2096 
2097         unsigned derived_9 = 0;
2098         if (lanes1 == 1) derived_9 = 0;
2099         else if (lanes1 == 2) derived_9 = 1;
2100         else if (lanes1 == 3) derived_9 = 2;
2101         else if (lanes1 == 4) derived_9 = 3;
2102         else unreachable("No pattern match at pos 9");
2103 
2104         return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
2105     } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
2106         unsigned derived_7 = 0;
2107         if (saturate == 0) derived_7 = 0;
2108         else if (saturate == 1) derived_7 = 1;
2109         else unreachable("No pattern match at pos 7");
2110 
2111         unsigned derived_9 = 0;
2112         if (lanes1 == 5) derived_9 = 0;
2113         else if (lanes1 == 6) derived_9 = 1;
2114         else unreachable("No pattern match at pos 9");
2115 
2116         return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
2117     } else {
2118         unreachable("No matching state found in add_isub_v4u8");
2119     }
2120 }
2121 
2122 static inline unsigned
pan_pack_add_frexpm_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2123 pan_pack_add_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2124 {
2125     unsigned src0 = bi_get_src(ins, regs, 0);
2126 
2127     unsigned abs0 = ins->src_abs[0];
2128     assert(abs0 < 2);
2129 
2130     unsigned sqrt = 0;
2131 
2132     unsigned log = 1;
2133 
2134     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2135     unsigned widen0_temp = 0;
2136     if (widen0_sz == 32) widen0_temp = 1;
2137     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2138     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2139     else unreachable("Could not pattern match widen");
2140     unsigned widen0 = widen0_temp;
2141     assert(widen0 < 4);
2142 
2143     unsigned neg0 = ins->src_neg[0];
2144     assert(neg0 < 2);
2145 
2146     if ((log == 0) && (neg0 == 0)) {
2147         return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3);
2148     } else if ((log == 1) && (sqrt == 0)) {
2149         return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7);
2150     } else {
2151         unreachable("No matching state found in add_frexpm_f32");
2152     }
2153 }
2154 
2155 static inline unsigned
pan_pack_add_frexpe_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2156 pan_pack_add_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2157 {
2158     unsigned src0 = bi_get_src(ins, regs, 0);
2159 
2160     unsigned neg = ins->src_neg[0];
2161     assert(neg < 2);
2162 
2163     unsigned sqrt = 0;
2164 
2165     unsigned log = 1;
2166 
2167     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2168     unsigned swz0_temp = 0;
2169     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2170     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2171     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2172     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2173     else unreachable("Could not pattern match widen");
2174     unsigned swz0 = swz0_temp;
2175     assert(swz0 < 4);
2176 
2177     if (log == 0) {
2178         return 0x3dc00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3);
2179     } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
2180         return 0x3de00 | (src0 << 0) | (swz0 << 3);
2181     } else {
2182         unreachable("No matching state found in add_frexpe_v2f16");
2183     }
2184 }
2185 
2186 static inline unsigned
pan_pack_add_logb_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2187 pan_pack_add_logb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2188 {
2189     unsigned src0 = bi_get_src(ins, regs, 0);
2190 
2191     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2192     unsigned widen0_temp = 0;
2193     if (widen0_sz == 32) widen0_temp = 1;
2194     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2195     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2196     else unreachable("Could not pattern match widen");
2197     unsigned widen0 = widen0_temp;
2198     assert(widen0 < 4);
2199 
2200     return 0x3d9a0 | (src0 << 0) | (widen0 << 3);
2201 }
2202 
2203 static inline unsigned
pan_pack_fma_rshift_or_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2204 pan_pack_fma_rshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2205 {
2206     unsigned src0 = bi_get_src(ins, regs, 0);
2207     assert((1 << src0) & 0xfb);
2208     unsigned src1 = bi_get_src(ins, regs, 1);
2209     assert((1 << src1) & 0xfb);
2210     unsigned src2 = bi_get_src(ins, regs, 2);
2211 
2212     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
2213     unsigned lanes2_temp = 0;
2214     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
2215     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
2216     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
2217     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
2218     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
2219     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
2220     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
2221     else unreachable("Could not pattern match widen");
2222     unsigned lanes2 = lanes2_temp;
2223     assert(lanes2 < 8);
2224 
2225     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
2226     assert(not1 < 2);
2227 
2228     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
2229     assert(not_result < 2);
2230 
2231     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
2232         unsigned derived_9 = 0;
2233         if (lanes2 == 0) derived_9 = 0;
2234         else if (lanes2 == 1) derived_9 = 1;
2235         else if (lanes2 == 2) derived_9 = 2;
2236         else if (lanes2 == 3) derived_9 = 3;
2237         else unreachable("No pattern match at pos 9");
2238 
2239         return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
2240     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
2241         unsigned derived_9 = 0;
2242         if (lanes2 == 4) derived_9 = 1;
2243         else if (lanes2 == 5) derived_9 = 2;
2244         else if (lanes2 == 6) derived_9 = 3;
2245         else unreachable("No pattern match at pos 9");
2246 
2247         return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
2248     } else {
2249         unreachable("No matching state found in fma_rshift_or_v2i16");
2250     }
2251 }
2252 
2253 static inline unsigned
pan_pack_fma_shaddxl_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2254 pan_pack_fma_shaddxl_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2255 {
2256     unsigned src0 = bi_get_src(ins, regs, 0);
2257     assert((1 << src0) & 0xfb);
2258     unsigned src1 = bi_get_src(ins, regs, 1);
2259     assert((1 << src1) & 0xfb);
2260 
2261     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2262     unsigned lane1_temp = 0;
2263     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
2264     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
2265     else if (lane1_sz == 32) lane1_temp = 2;
2266     else unreachable("Could not pattern match widen");
2267     unsigned lane1 = lane1_temp;
2268     assert(lane1 < 4);
2269 
2270     unsigned shift = 0;
2271     return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6);
2272 }
2273 
2274 static inline unsigned
pan_pack_add_fcmp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2275 pan_pack_add_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2276 {
2277     unsigned src0 = bi_get_src(ins, regs, 0);
2278     unsigned src1 = bi_get_src(ins, regs, 1);
2279 
2280     unsigned neg0 = ins->src_neg[0];
2281     assert(neg0 < 2);
2282 
2283     unsigned neg1 = ins->src_neg[1];
2284     assert(neg1 < 2);
2285 
2286     unsigned cmpf_table[] = {
2287         ~0, 4, 5, 2, 1, 0, 3
2288     };
2289     unsigned cmpf = cmpf_table[ins->cond];
2290     assert(cmpf < 8);
2291 
2292     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2293     unsigned swz0_temp = 0;
2294     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2295     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2296     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2297     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2298     else unreachable("Could not pattern match widen");
2299     unsigned swz0 = swz0_temp;
2300     assert(swz0 < 4);
2301 
2302     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2303     unsigned swz1_temp = 0;
2304     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2305     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2306     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2307     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2308     else unreachable("Could not pattern match widen");
2309     unsigned swz1 = swz1_temp;
2310     assert(swz1 < 4);
2311 
2312     unsigned result_type = 2;
2313 
2314     if ((neg0 == 0) && (neg1 == 1)) {
2315         { unsigned temp = src0; src0 = src1; src1 = temp; }
2316         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2317         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2318         if (cmpf == 1) cmpf = 4;
2319         else if (cmpf == 5) cmpf = 2;
2320         else if (cmpf == 4) cmpf = 1;
2321         else if (cmpf == 2) cmpf = 5;
2322     }
2323 
2324     unsigned derived_13 = 0;
2325     if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0;
2326     else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1;
2327     else unreachable("No pattern match at pos 13");
2328 
2329     return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13);
2330 }
2331 
2332 static inline unsigned
pan_pack_fma_clz_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2333 pan_pack_fma_clz_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2334 {
2335     unsigned src0 = bi_get_src(ins, regs, 0);
2336     assert((1 << src0) & 0xfb);
2337 
2338     unsigned mask = 0;
2339 
2340     return 0x701f90 | (src0 << 0) | (mask << 3);
2341 }
2342 
2343 static inline unsigned
pan_pack_add_frexpe_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2344 pan_pack_add_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2345 {
2346     unsigned src0 = bi_get_src(ins, regs, 0);
2347 
2348     unsigned neg = ins->src_neg[0];
2349     assert(neg < 2);
2350 
2351     unsigned sqrt = 0;
2352 
2353     unsigned log = 1;
2354 
2355     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2356     unsigned widen0_temp = 0;
2357     if (widen0_sz == 32) widen0_temp = 1;
2358     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2359     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2360     else unreachable("Could not pattern match widen");
2361     unsigned widen0 = widen0_temp;
2362     assert(widen0 < 4);
2363 
2364     if (log == 0) {
2365         return 0x3dc20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3);
2366     } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
2367         return 0x3de20 | (src0 << 0) | (widen0 << 3);
2368     } else {
2369         unreachable("No matching state found in add_frexpe_f32");
2370     }
2371 }
2372 
2373 static inline unsigned
pan_pack_fma_quiet_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2374 pan_pack_fma_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2375 {
2376     unsigned src0 = bi_get_src(ins, regs, 0);
2377     assert((1 << src0) & 0xfb);
2378 
2379     return 0x701970 | (src0 << 0);
2380 }
2381 
2382 static inline unsigned
pan_pack_add_fmin_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2383 pan_pack_add_fmin_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2384 {
2385     unsigned src0 = bi_get_src(ins, regs, 0);
2386     unsigned src1 = bi_get_src(ins, regs, 1);
2387 
2388     unsigned abs0 = ins->src_abs[0];
2389     assert(abs0 < 2);
2390 
2391     unsigned abs1 = ins->src_abs[1];
2392     assert(abs1 < 2);
2393 
2394     unsigned neg0 = ins->src_neg[0];
2395     assert(neg0 < 2);
2396 
2397     unsigned neg1 = ins->src_neg[1];
2398     assert(neg1 < 2);
2399 
2400     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2401     unsigned swz0_temp = 0;
2402     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2403     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2404     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2405     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2406     else unreachable("Could not pattern match widen");
2407     unsigned swz0 = swz0_temp;
2408     assert(swz0 < 4);
2409 
2410     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2411     unsigned swz1_temp = 0;
2412     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2413     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2414     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2415     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2416     else unreachable("Could not pattern match widen");
2417     unsigned swz1 = swz1_temp;
2418     assert(swz1 < 4);
2419 
2420     unsigned sem = 0;
2421 
2422     if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
2423         { unsigned temp = src0; src0 = src1; src1 = temp; }
2424         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
2425         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2426         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2427         if (sem == 3) sem = 2;
2428         else if (sem == 2) sem = 3;
2429     }
2430 
2431     unsigned derived_6 = 0;
2432     if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
2433     else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
2434     else unreachable("No pattern match at pos 6");
2435 
2436     return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6);
2437 }
2438 
2439 static inline unsigned
pan_pack_add_var_tex_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2440 pan_pack_add_var_tex_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2441 {
2442 
2443     unsigned update = 0;
2444 
2445     unsigned skip = ins->skip;
2446     assert(skip < 2);
2447 
2448     unsigned lod_mode = 1 - ins->texture.compute_lod;
2449     assert(lod_mode < 2);
2450 
2451     unsigned sample = ins->load_vary.interp_mode;
2452     assert(sample < 2);
2453 
2454     unsigned varying_index = bi_get_immediate(ins, 0);
2455     unsigned texture_index = ins->texture.texture_index;
2456     bi_write_staging_register(clause, ins);
2457     unsigned derived_5 = 0;
2458     if ((sample == 0) && (update == 0)) derived_5 = 0;
2459     else if ((sample == 1) && (update == 1)) derived_5 = 1;
2460     else unreachable("No pattern match at pos 5");
2461 
2462     return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5);
2463 }
2464 
2465 static inline unsigned
pan_pack_add_branch_lowbits_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2466 pan_pack_add_branch_lowbits_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2467 {
2468     unsigned src0 = bi_get_src(ins, regs, 0);
2469     unsigned src1 = bi_get_src(ins, regs, 1);
2470     assert((1 << src1) & 0xf7);
2471 
2472     return 0x6fa38 | (src0 << 0) | (src1 << 6);
2473 }
2474 
2475 static inline unsigned
pan_pack_fma_lshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2476 pan_pack_fma_lshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2477 {
2478     unsigned src0 = bi_get_src(ins, regs, 0);
2479     assert((1 << src0) & 0xfb);
2480     unsigned src1 = bi_get_src(ins, regs, 1);
2481     assert((1 << src1) & 0xfb);
2482     unsigned src2 = bi_get_src(ins, regs, 2);
2483 
2484     unsigned bytes2 = 0;
2485 
2486     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
2487     unsigned lane2_temp = 0;
2488     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
2489     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
2490     else unreachable("Could not pattern match widen");
2491     unsigned lane2 = lane2_temp;
2492     assert(lane2 < 2);
2493 
2494     unsigned result_word = 0;
2495 
2496     return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
2497 }
2498 
2499 static inline unsigned
pan_pack_fma_idp_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2500 pan_pack_fma_idp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2501 {
2502     unsigned src0 = bi_get_src(ins, regs, 0);
2503     assert((1 << src0) & 0xfb);
2504     unsigned src1 = bi_get_src(ins, regs, 1);
2505     assert((1 << src1) & 0xfb);
2506 
2507     ASSERTED bool sign0_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
2508     bool sign0_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
2509     assert(sign0_small);
2510     unsigned sign0 = sign0_signed ? 1 : 0;
2511     assert(sign0 < 2);
2512 
2513     ASSERTED bool sign1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16;
2514     bool sign1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int;
2515     assert(sign1_small);
2516     unsigned sign1 = sign1_signed ? 1 : 0;
2517     assert(sign1 < 2);
2518 
2519     return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10);
2520 }
2521 
2522 static inline unsigned
pan_pack_add_icmp_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2523 pan_pack_add_icmp_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2524 {
2525     unsigned src0 = bi_get_src(ins, regs, 0);
2526     unsigned src1 = bi_get_src(ins, regs, 1);
2527 
2528     unsigned result_type = 1;
2529 
2530     unsigned cmpf_table[] = {
2531         ~0, 2, 3, 1, 0, ~0, ~0
2532     };
2533     unsigned cmpf = cmpf_table[ins->cond];
2534     assert(cmpf < 4);
2535 
2536     if ((cmpf == 2) || (cmpf == 3)) {
2537         { unsigned temp = src0; src0 = src1; src1 = temp; }
2538         if (cmpf == 3) cmpf = 1;
2539         else if (cmpf == 2) cmpf = 0;
2540     }
2541 
2542     unsigned derived_6 = 0;
2543     if (cmpf == 0) derived_6 = 0;
2544     else if (cmpf == 1) derived_6 = 1;
2545     else unreachable("No pattern match at pos 6");
2546 
2547     return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
2548 }
2549 
2550 static inline unsigned
pan_pack_add_acmpstore_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2551 pan_pack_add_acmpstore_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2552 {
2553     unsigned src0 = bi_get_src(ins, regs, 1);
2554     unsigned src1 = bi_get_src(ins, regs, 2);
2555 
2556     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
2557     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
2558     assert(seg < 2);
2559 
2560     bi_read_staging_register(clause, ins);
2561     return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9);
2562 }
2563 
2564 static inline unsigned
pan_pack_add_hadd_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2565 pan_pack_add_hadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2566 {
2567     unsigned src0 = bi_get_src(ins, regs, 0);
2568     unsigned src1 = bi_get_src(ins, regs, 1);
2569 
2570     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
2571     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
2572     assert(round < 2);
2573 
2574     return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12);
2575 }
2576 
2577 static inline unsigned
pan_pack_add_cubeface2(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2578 pan_pack_add_cubeface2(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2579 {
2580     unsigned src0 = bi_get_src(ins, regs, 0);
2581 
2582     return 0x3de58 | (src0 << 0);
2583 }
2584 
2585 static inline unsigned
pan_pack_fma_s16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2586 pan_pack_fma_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2587 {
2588     unsigned src0 = bi_get_src(ins, regs, 0);
2589     assert((1 << src0) & 0xfb);
2590 
2591     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2592     unsigned lane0_temp = 0;
2593     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2594     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2595     else unreachable("Could not pattern match widen");
2596     unsigned lane0 = lane0_temp;
2597     assert(lane0 < 2);
2598 
2599     return 0x700cc0 | (src0 << 0) | (lane0 << 4);
2600 }
2601 
2602 static inline unsigned
pan_pack_fma_fround_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2603 pan_pack_fma_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2604 {
2605     unsigned src0 = bi_get_src(ins, regs, 0);
2606     assert((1 << src0) & 0xfb);
2607 
2608     unsigned abs0 = ins->src_abs[0];
2609     assert(abs0 < 2);
2610 
2611     unsigned neg0 = ins->src_neg[0];
2612     assert(neg0 < 2);
2613 
2614     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2615     unsigned swz0_temp = 0;
2616     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2617     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2618     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2619     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2620     else unreachable("Could not pattern match widen");
2621     unsigned swz0 = swz0_temp;
2622     assert(swz0 < 4);
2623 
2624     unsigned round = ins->roundmode;
2625     assert(round < 8);
2626 
2627     if (round != 4) {
2628         unsigned derived_9 = 0;
2629         if (round == 0) derived_9 = 0;
2630         else if (round == 1) derived_9 = 1;
2631         else if (round == 2) derived_9 = 2;
2632         else if (round == 3) derived_9 = 3;
2633         else unreachable("No pattern match at pos 9");
2634 
2635         return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9);
2636     } else if (round == 4) {
2637         return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3);
2638     } else {
2639         unreachable("No matching state found in fma_fround_v2f16");
2640     }
2641 }
2642 
2643 static inline unsigned
pan_pack_add_v2u8_to_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2644 pan_pack_add_v2u8_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2645 {
2646     unsigned src0 = bi_get_src(ins, regs, 0);
2647 
2648     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2649     unsigned swz0_temp = 0;
2650     if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2651     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2652     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
2653     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
2654     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
2655     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
2656     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
2657     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
2658     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
2659     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
2660     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
2661     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
2662     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
2663     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
2664     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
2665     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
2666     else unreachable("Could not pattern match widen");
2667     unsigned swz0 = swz0_temp;
2668     assert(swz0 < 16);
2669 
2670     return 0x3c708 | (src0 << 0) | (swz0 << 4);
2671 }
2672 
2673 static inline unsigned
pan_pack_add_fmax_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2674 pan_pack_add_fmax_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2675 {
2676     unsigned src0 = bi_get_src(ins, regs, 0);
2677     unsigned src1 = bi_get_src(ins, regs, 1);
2678 
2679     unsigned abs0 = ins->src_abs[0];
2680     assert(abs0 < 2);
2681 
2682     unsigned abs1 = ins->src_abs[1];
2683     assert(abs1 < 2);
2684 
2685     unsigned neg0 = ins->src_neg[0];
2686     assert(neg0 < 2);
2687 
2688     unsigned neg1 = ins->src_neg[1];
2689     assert(neg1 < 2);
2690 
2691     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2692     unsigned swz0_temp = 0;
2693     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2694     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2695     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2696     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2697     else unreachable("Could not pattern match widen");
2698     unsigned swz0 = swz0_temp;
2699     assert(swz0 < 4);
2700 
2701     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2702     unsigned swz1_temp = 0;
2703     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2704     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2705     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2706     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2707     else unreachable("Could not pattern match widen");
2708     unsigned swz1 = swz1_temp;
2709     assert(swz1 < 4);
2710 
2711     unsigned sem = 0;
2712 
2713     if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
2714         { unsigned temp = src0; src0 = src1; src1 = temp; }
2715         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
2716         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2717         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2718         if (sem == 3) sem = 2;
2719         else if (sem == 2) sem = 3;
2720     }
2721 
2722     unsigned derived_6 = 0;
2723     if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
2724     else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
2725     else unreachable("No pattern match at pos 6");
2726 
2727     return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6);
2728 }
2729 
2730 static inline unsigned
pan_pack_fma_f16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2731 pan_pack_fma_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2732 {
2733     unsigned src0 = bi_get_src(ins, regs, 0);
2734     assert((1 << src0) & 0xfb);
2735 
2736     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2737     unsigned lane0_temp = 0;
2738     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2739     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2740     else unreachable("Could not pattern match widen");
2741     unsigned lane0 = lane0_temp;
2742     assert(lane0 < 2);
2743 
2744     return 0x700d10 | (src0 << 0) | (lane0 << 3);
2745 }
2746 
2747 static inline unsigned
pan_pack_add_branchc_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2748 pan_pack_add_branchc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2749 {
2750     unsigned src0 = bi_get_src(ins, regs, 0);
2751     unsigned src1 = bi_get_src(ins, regs, 1);
2752     assert((1 << src1) & 0xf7);
2753 
2754     unsigned combine = 0;
2755 
2756     return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10);
2757 }
2758 
2759 static inline unsigned
pan_pack_fma_dtsel_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2760 pan_pack_fma_dtsel_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2761 {
2762     unsigned src0 = bi_get_src(ins, regs, 0);
2763     assert((1 << src0) & 0xfb);
2764 
2765     unsigned table = 63;
2766     return 0x70f200 | (src0 << 0) | (table << 3);
2767 }
2768 
2769 static inline unsigned
pan_pack_add_iadd_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2770 pan_pack_add_iadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2771 {
2772     unsigned src0 = bi_get_src(ins, regs, 0);
2773     unsigned src1 = bi_get_src(ins, regs, 1);
2774 
2775     unsigned saturate = 0;
2776 
2777     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2778     unsigned lanes0_temp = 0;
2779     if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
2780     else unreachable("Could not pattern match widen");
2781     unsigned lanes0 = lanes0_temp;
2782     assert(lanes0 < 8);
2783 
2784     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2785     unsigned lanes1_temp = 0;
2786     if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
2787     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
2788     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
2789     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
2790     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
2791     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
2792     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
2793     else unreachable("Could not pattern match widen");
2794     unsigned lanes1 = lanes1_temp;
2795     assert(lanes1 < 8);
2796 
2797     if ((lanes0 == 0) && (lanes1 == 0)) {
2798         return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8);
2799     } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
2800         unsigned derived_9 = 0;
2801         if (lanes1 == 1) derived_9 = 0;
2802         else if (lanes1 == 2) derived_9 = 1;
2803         else if (lanes1 == 3) derived_9 = 2;
2804         else if (lanes1 == 4) derived_9 = 3;
2805         else unreachable("No pattern match at pos 9");
2806 
2807         return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
2808     } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
2809         unsigned derived_9 = 0;
2810         if (lanes1 == 5) derived_9 = 0;
2811         else if (lanes1 == 6) derived_9 = 1;
2812         else unreachable("No pattern match at pos 9");
2813 
2814         return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
2815     } else {
2816         unreachable("No matching state found in add_iadd_v4s8");
2817     }
2818 }
2819 
2820 static inline unsigned
pan_pack_add_texs_2d_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2821 pan_pack_add_texs_2d_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2822 {
2823     unsigned src0 = bi_get_src(ins, regs, 0);
2824     unsigned src1 = bi_get_src(ins, regs, 1);
2825 
2826     unsigned skip = ins->skip;
2827     assert(skip < 2);
2828 
2829     unsigned lod_mode = 1 - ins->texture.compute_lod;
2830     assert(lod_mode < 2);
2831 
2832     unsigned texture_index = ins->texture.texture_index;
2833     unsigned sampler_index = ins->texture.sampler_index;
2834     bi_write_staging_register(clause, ins);
2835     return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10);
2836 }
2837 
2838 static inline unsigned
pan_pack_add_vn_asst2_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2839 pan_pack_add_vn_asst2_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2840 {
2841     unsigned src0 = bi_get_src(ins, regs, 0);
2842 
2843     unsigned scale = 0;
2844 
2845     unsigned neg0 = ins->src_neg[0];
2846     assert(neg0 < 2);
2847 
2848     if (scale == 0) {
2849         return 0x3df80 | (src0 << 0) | (neg0 << 3);
2850     } else if (scale == 1) {
2851         return 0x3de80 | (src0 << 0) | (neg0 << 4);
2852     } else {
2853         unreachable("No matching state found in add_vn_asst2_f32");
2854     }
2855 }
2856 
2857 static inline unsigned
pan_pack_add_texc(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2858 pan_pack_add_texc(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2859 {
2860     unsigned src0 = bi_get_src(ins, regs, 1);
2861     unsigned src1 = bi_get_src(ins, regs, 2);
2862     unsigned src2 = bi_get_src(ins, regs, 3);
2863     assert((1 << src2) & 0xf7);
2864 
2865     unsigned skip = ins->skip;
2866     assert(skip < 2);
2867 
2868     bi_read_staging_register(clause, ins);
2869     assert(ins->src[0] == ins->dest);
2870     return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9);
2871 }
2872 
2873 static inline unsigned
pan_pack_add_lea_attr_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2874 pan_pack_add_lea_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2875 {
2876     unsigned src0 = bi_get_src(ins, regs, 1);
2877     unsigned src1 = bi_get_src(ins, regs, 2);
2878 
2879     unsigned register_format_temp = 0;
2880     if (ins->format == nir_type_float16) register_format_temp = 0;
2881     else if (ins->format == nir_type_float32) register_format_temp = 1;
2882     else if (ins->format == nir_type_int32) register_format_temp = 2;
2883     else if (ins->format == nir_type_uint32) register_format_temp = 3;
2884     else if (ins->format == nir_type_int16) register_format_temp = 4;
2885     else if (ins->format == nir_type_uint16) register_format_temp = 5;
2886     else if (ins->format == nir_type_float64) register_format_temp = 6;
2887     else if (ins->format == nir_type_int64) register_format_temp = 7;
2888     else unreachable("Could not pattern match register format");
2889     unsigned register_format = register_format_temp;
2890     assert(register_format < 16);
2891 
2892     unsigned attribute_index = bi_get_immediate(ins, 0);
2893     bi_write_staging_register(clause, ins);
2894     if (register_format != 8) {
2895         unsigned derived_11 = 0;
2896         if (register_format == 0) derived_11 = 0;
2897         else if (register_format == 1) derived_11 = 1;
2898         else if (register_format == 2) derived_11 = 2;
2899         else if (register_format == 3) derived_11 = 3;
2900         else if (register_format == 4) derived_11 = 4;
2901         else if (register_format == 5) derived_11 = 5;
2902         else if (register_format == 6) derived_11 = 6;
2903         else if (register_format == 7) derived_11 = 7;
2904         else unreachable("No pattern match at pos 11");
2905 
2906         return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11);
2907     } else if (register_format == 8) {
2908         return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6);
2909     } else {
2910         unreachable("No matching state found in add_lea_attr_imm");
2911     }
2912 }
2913 
2914 static inline unsigned
pan_pack_add_f16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2915 pan_pack_add_f16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2916 {
2917     unsigned src0 = bi_get_src(ins, regs, 0);
2918 
2919     unsigned round = ins->roundmode;
2920     assert(round < 8);
2921 
2922     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2923     unsigned lane0_temp = 0;
2924     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2925     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2926     else unreachable("Could not pattern match widen");
2927     unsigned lane0 = lane0_temp;
2928     assert(lane0 < 2);
2929 
2930     if (round != 4) {
2931         unsigned derived_4 = 0;
2932         if (round == 0) derived_4 = 0;
2933         else if (round == 1) derived_4 = 1;
2934         else if (round == 2) derived_4 = 2;
2935         else if (round == 3) derived_4 = 3;
2936         else unreachable("No pattern match at pos 4");
2937 
2938         return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4);
2939     } else if (round == 4) {
2940         return 0x3cc40 | (src0 << 0) | (lane0 << 5);
2941     } else {
2942         unreachable("No matching state found in add_f16_to_s32");
2943     }
2944 }
2945 
2946 static inline unsigned
pan_pack_add_st_cvt(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2947 pan_pack_add_st_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2948 {
2949     unsigned src0 = bi_get_src(ins, regs, 1);
2950     unsigned src1 = bi_get_src(ins, regs, 2);
2951     unsigned src2 = bi_get_src(ins, regs, 3);
2952     assert((1 << src2) & 0xf7);
2953 
2954     unsigned vecsize = ins->vector_channels - 1;
2955     assert(vecsize < 4);
2956 
2957     bi_read_staging_register(clause, ins);
2958     return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
2959 }
2960 
2961 static inline unsigned
pan_pack_add_load_i24(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2962 pan_pack_add_load_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2963 {
2964     unsigned src0 = bi_get_src(ins, regs, 0);
2965     unsigned src1 = bi_get_src(ins, regs, 1);
2966 
2967     assert(ins->segment);
2968     unsigned seg = ins->segment;
2969     assert(seg < 8);
2970 
2971     bi_write_staging_register(clause, ins);
2972     return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6);
2973 }
2974 
2975 static inline unsigned
pan_pack_fma_s8_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2976 pan_pack_fma_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2977 {
2978     unsigned src0 = bi_get_src(ins, regs, 0);
2979     assert((1 << src0) & 0xfb);
2980 
2981     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2982     unsigned lane0_temp = 0;
2983     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2984     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2985     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
2986     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
2987     else unreachable("Could not pattern match widen");
2988     unsigned lane0 = lane0_temp;
2989     assert(lane0 < 4);
2990 
2991     return 0x700b40 | (src0 << 0) | (lane0 << 4);
2992 }
2993 
2994 static inline unsigned
pan_pack_fma_cubeface1(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2995 pan_pack_fma_cubeface1(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2996 {
2997     unsigned src0 = bi_get_src(ins, regs, 0);
2998     assert((1 << src0) & 0xfb);
2999     unsigned src1 = bi_get_src(ins, regs, 1);
3000     assert((1 << src1) & 0xfb);
3001     unsigned src2 = bi_get_src(ins, regs, 2);
3002 
3003     unsigned neg0 = ins->src_neg[0];
3004     assert(neg0 < 2);
3005 
3006     unsigned neg1 = ins->src_neg[1];
3007     assert(neg1 < 2);
3008 
3009     unsigned neg2 = ins->src_neg[2];
3010     assert(neg2 < 2);
3011 
3012     unsigned derived_9 = 0;
3013     if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0;
3014     else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1;
3015     else unreachable("No pattern match at pos 9");
3016 
3017     return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3018 }
3019 
3020 static inline unsigned
pan_pack_add_branch_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3021 pan_pack_add_branch_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3022 {
3023     unsigned src0 = bi_get_src(ins, regs, 0);
3024     unsigned src1 = bi_get_src(ins, regs, 1);
3025     unsigned src2 = bi_get_src(ins, regs, 2);
3026     assert((1 << src2) & 0xf7);
3027 
3028     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3029     unsigned widen0_temp = 0;
3030     if (widen0_sz == 32) widen0_temp = 0;
3031     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3032     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3033     else unreachable("Could not pattern match widen");
3034     unsigned widen0 = widen0_temp;
3035     assert(widen0 < 4);
3036 
3037     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3038     unsigned widen1_temp = 0;
3039     if (widen1_sz == 32) widen1_temp = 0;
3040     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
3041     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
3042     else unreachable("Could not pattern match widen");
3043     unsigned widen1 = widen1_temp;
3044     assert(widen1 < 4);
3045 
3046     unsigned cmpf_table[] = {
3047         ~0, 2, 3, 1, 0, ~0, ~0
3048     };
3049     unsigned cmpf = cmpf_table[ins->cond];
3050     assert(cmpf < 4);
3051 
3052     if (src0 > src1) {
3053         { unsigned temp = src0; src0 = src1; src1 = temp; }
3054         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
3055         if (cmpf == 0) cmpf = 2;
3056         else if (cmpf == 3) cmpf = 1;
3057         else if (cmpf == 2) cmpf = 0;
3058         else if (cmpf == 1) cmpf = 3;
3059     }
3060 
3061     unsigned derived_12 = 0;
3062     if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
3063     else unreachable("No pattern match at pos 12");
3064 
3065     unsigned derived_9 = 0;
3066     if ((src0 < src1) && (cmpf == 2)) derived_9 = 0;
3067     else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1;
3068     else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2;
3069     else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3;
3070     else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4;
3071     else unreachable("No pattern match at pos 9");
3072 
3073     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
3074 }
3075 
3076 static inline unsigned
pan_pack_add_iabs_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3077 pan_pack_add_iabs_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3078 {
3079     unsigned src0 = bi_get_src(ins, regs, 0);
3080 
3081     return 0x3dea0 | (src0 << 0);
3082 }
3083 
3084 static inline unsigned
pan_pack_add_iadd_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3085 pan_pack_add_iadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3086 {
3087     unsigned src0 = bi_get_src(ins, regs, 0);
3088     unsigned src1 = bi_get_src(ins, regs, 1);
3089 
3090     unsigned saturate = 0;
3091 
3092     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3093     unsigned lanes0_temp = 0;
3094     if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
3095     else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
3096     else unreachable("Could not pattern match widen");
3097     unsigned lanes0 = lanes0_temp;
3098     assert(lanes0 < 2);
3099 
3100     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3101     unsigned lanes1_temp = 0;
3102     if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
3103     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
3104     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
3105     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
3106     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
3107     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
3108     else unreachable("Could not pattern match widen");
3109     unsigned lanes1 = lanes1_temp;
3110     assert(lanes1 < 8);
3111 
3112     if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
3113         unsigned derived_7 = 0;
3114         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3115         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3116         else unreachable("No pattern match at pos 7");
3117 
3118         unsigned derived_9 = 0;
3119         if (lanes1 == 0) derived_9 = 0;
3120         else if (lanes1 == 1) derived_9 = 1;
3121         else unreachable("No pattern match at pos 9");
3122 
3123         unsigned derived_10 = 0;
3124         if (lanes0 == 0) derived_10 = 0;
3125         else if (lanes0 == 1) derived_10 = 1;
3126         else unreachable("No pattern match at pos 10");
3127 
3128         return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10);
3129     } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
3130         unsigned derived_7 = 0;
3131         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3132         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3133         else unreachable("No pattern match at pos 7");
3134 
3135         unsigned derived_9 = 0;
3136         if (lanes1 == 2) derived_9 = 0;
3137         else if (lanes1 == 3) derived_9 = 1;
3138         else unreachable("No pattern match at pos 9");
3139 
3140         return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
3141     } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
3142         unsigned derived_7 = 0;
3143         if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3144         else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3145         else unreachable("No pattern match at pos 7");
3146 
3147         unsigned derived_9 = 0;
3148         if (lanes1 == 4) derived_9 = 0;
3149         else if (lanes1 == 5) derived_9 = 1;
3150         else unreachable("No pattern match at pos 9");
3151 
3152         return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
3153     } else {
3154         unreachable("No matching state found in add_iadd_v2u16");
3155     }
3156 }
3157 
3158 static inline unsigned
pan_pack_add_icmp_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3159 pan_pack_add_icmp_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3160 {
3161     unsigned src0 = bi_get_src(ins, regs, 0);
3162     unsigned src1 = bi_get_src(ins, regs, 1);
3163 
3164     unsigned result_type = 1;
3165 
3166     unsigned cmpf_table[] = {
3167         ~0, 2, 3, 1, 0, ~0, ~0
3168     };
3169     unsigned cmpf = cmpf_table[ins->cond];
3170     assert(cmpf < 4);
3171 
3172     if ((cmpf == 2) || (cmpf == 3)) {
3173         { unsigned temp = src0; src0 = src1; src1 = temp; }
3174         if (cmpf == 3) cmpf = 1;
3175         else if (cmpf == 2) cmpf = 0;
3176     }
3177 
3178     unsigned derived_6 = 0;
3179     if (cmpf == 0) derived_6 = 0;
3180     else if (cmpf == 1) derived_6 = 1;
3181     else unreachable("No pattern match at pos 6");
3182 
3183     return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
3184 }
3185 
3186 static inline unsigned
pan_pack_add_fsin_table_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3187 pan_pack_add_fsin_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3188 {
3189     unsigned src0 = bi_get_src(ins, regs, 0);
3190     assert((1 << src0) & 0xf7);
3191 
3192     unsigned offset = 0;
3193 
3194     return 0x67a80 | (src0 << 0) | (offset << 4);
3195 }
3196 
3197 static inline unsigned
pan_pack_add_cube_ssel(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3198 pan_pack_add_cube_ssel(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3199 {
3200     unsigned src0 = bi_get_src(ins, regs, 0);
3201     unsigned src1 = bi_get_src(ins, regs, 1);
3202     unsigned src2 = bi_get_src(ins, regs, 2);
3203 
3204     unsigned neg0 = ins->src_neg[0];
3205     assert(neg0 < 2);
3206 
3207     unsigned neg1 = ins->src_neg[1];
3208     assert(neg1 < 2);
3209 
3210     unsigned derived_9 = 0;
3211     if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0;
3212     else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1;
3213     else unreachable("No pattern match at pos 9");
3214 
3215     return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3216 }
3217 
3218 static inline unsigned
pan_pack_add_fatan_table_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3219 pan_pack_add_fatan_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3220 {
3221     unsigned src0 = bi_get_src(ins, regs, 0);
3222     assert((1 << src0) & 0xf7);
3223     unsigned src1 = bi_get_src(ins, regs, 1);
3224     assert((1 << src1) & 0xf7);
3225 
3226     return 0x67a40 | (src0 << 0) | (src1 << 3);
3227 }
3228 
3229 static inline unsigned
pan_pack_add_isub_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3230 pan_pack_add_isub_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3231 {
3232     unsigned src0 = bi_get_src(ins, regs, 0);
3233     unsigned src1 = bi_get_src(ins, regs, 1);
3234 
3235     unsigned saturate = 0;
3236 
3237     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3238     unsigned lanes0_temp = 0;
3239     if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
3240     else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
3241     else unreachable("Could not pattern match widen");
3242     unsigned lanes0 = lanes0_temp;
3243     assert(lanes0 < 2);
3244 
3245     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3246     unsigned lanes1_temp = 0;
3247     if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
3248     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
3249     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
3250     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
3251     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
3252     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
3253     else unreachable("Could not pattern match widen");
3254     unsigned lanes1 = lanes1_temp;
3255     assert(lanes1 < 8);
3256 
3257     if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
3258         unsigned derived_9 = 0;
3259         if (lanes1 == 0) derived_9 = 0;
3260         else if (lanes1 == 1) derived_9 = 1;
3261         else unreachable("No pattern match at pos 9");
3262 
3263         unsigned derived_10 = 0;
3264         if (lanes0 == 0) derived_10 = 0;
3265         else if (lanes0 == 1) derived_10 = 1;
3266         else unreachable("No pattern match at pos 10");
3267 
3268         return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10);
3269     } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
3270         unsigned derived_9 = 0;
3271         if (lanes1 == 2) derived_9 = 0;
3272         else if (lanes1 == 3) derived_9 = 1;
3273         else unreachable("No pattern match at pos 9");
3274 
3275         return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3276     } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
3277         unsigned derived_9 = 0;
3278         if (lanes1 == 4) derived_9 = 0;
3279         else if (lanes1 == 5) derived_9 = 1;
3280         else unreachable("No pattern match at pos 9");
3281 
3282         return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3283     } else {
3284         unreachable("No matching state found in add_isub_v2s16");
3285     }
3286 }
3287 
3288 static inline unsigned
pan_pack_fma_atom_c1_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3289 pan_pack_fma_atom_c1_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3290 {
3291     unsigned src0 = bi_get_src(ins, regs, 0);
3292     assert((1 << src0) & 0xf3);
3293     unsigned src1 = bi_get_src(ins, regs, 1);
3294     assert((1 << src1) & 0xf3);
3295 
3296     unsigned atom_opc = 2;
3297 
3298     return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
3299 }
3300 
3301 static inline unsigned
pan_pack_add_isub_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3302 pan_pack_add_isub_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3303 {
3304     unsigned src0 = bi_get_src(ins, regs, 0);
3305     unsigned src1 = bi_get_src(ins, regs, 1);
3306 
3307     unsigned saturate = 0;
3308 
3309     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3310     unsigned lanes1_temp = 0;
3311     if (lanes1_sz == 32) lanes1_temp = 0;
3312     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
3313     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
3314     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
3315     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
3316     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
3317     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
3318     else unreachable("Could not pattern match widen");
3319     unsigned lanes1 = lanes1_temp;
3320     assert(lanes1 < 8);
3321 
3322     if (lanes1 == 0) {
3323         return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8);
3324     } else if ((lanes1 == 1) || (lanes1 == 2)) {
3325         unsigned derived_9 = 0;
3326         if (lanes1 == 1) derived_9 = 0;
3327         else if (lanes1 == 2) derived_9 = 1;
3328         else unreachable("No pattern match at pos 9");
3329 
3330         return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3331     } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
3332         unsigned derived_9 = 0;
3333         if (lanes1 == 3) derived_9 = 0;
3334         else if (lanes1 == 4) derived_9 = 1;
3335         else if (lanes1 == 5) derived_9 = 2;
3336         else if (lanes1 == 6) derived_9 = 3;
3337         else unreachable("No pattern match at pos 9");
3338 
3339         return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3340     } else {
3341         unreachable("No matching state found in add_isub_s32");
3342     }
3343 }
3344 
3345 static inline unsigned
pan_pack_add_ld_attr_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3346 pan_pack_add_ld_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3347 {
3348     unsigned src0 = bi_get_src(ins, regs, 1);
3349     unsigned src1 = bi_get_src(ins, regs, 2);
3350 
3351     unsigned register_format_temp = 0;
3352     if (ins->format == nir_type_float16) register_format_temp = 0;
3353     else if (ins->format == nir_type_float32) register_format_temp = 1;
3354     else if (ins->format == nir_type_int32) register_format_temp = 2;
3355     else if (ins->format == nir_type_uint32) register_format_temp = 3;
3356     else if (ins->format == nir_type_int16) register_format_temp = 4;
3357     else if (ins->format == nir_type_uint16) register_format_temp = 5;
3358     else if (ins->format == nir_type_float64) register_format_temp = 6;
3359     else if (ins->format == nir_type_int64) register_format_temp = 7;
3360     else unreachable("Could not pattern match register format");
3361     unsigned register_format = register_format_temp;
3362     assert(register_format < 16);
3363 
3364     unsigned vecsize = ins->vector_channels - 1;
3365     assert(vecsize < 4);
3366 
3367     unsigned attribute_index = bi_get_immediate(ins, 0);
3368     bi_write_staging_register(clause, ins);
3369     if (register_format != 8) {
3370         unsigned derived_13 = 0;
3371         if (register_format == 0) derived_13 = 0;
3372         else if (register_format == 1) derived_13 = 1;
3373         else if (register_format == 2) derived_13 = 2;
3374         else if (register_format == 3) derived_13 = 3;
3375         else if (register_format == 4) derived_13 = 4;
3376         else if (register_format == 5) derived_13 = 5;
3377         else if (register_format == 6) derived_13 = 6;
3378         else if (register_format == 7) derived_13 = 7;
3379         else unreachable("No pattern match at pos 13");
3380 
3381         return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13);
3382     } else if (register_format == 8) {
3383         return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6);
3384     } else {
3385         unreachable("No matching state found in add_ld_attr_imm");
3386     }
3387 }
3388 
3389 static inline unsigned
pan_pack_fma_rshift_xor_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3390 pan_pack_fma_rshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3391 {
3392     unsigned src0 = bi_get_src(ins, regs, 0);
3393     assert((1 << src0) & 0xfb);
3394     unsigned src1 = bi_get_src(ins, regs, 1);
3395     assert((1 << src1) & 0xfb);
3396     unsigned src2 = bi_get_src(ins, regs, 2);
3397 
3398     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
3399     unsigned lanes2_temp = 0;
3400     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
3401     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
3402     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
3403     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
3404     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
3405     else unreachable("Could not pattern match widen");
3406     unsigned lanes2 = lanes2_temp;
3407     assert(lanes2 < 8);
3408 
3409     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
3410     assert(not_result < 2);
3411 
3412     if (lanes2 != 0) {
3413         unsigned derived_9 = 0;
3414         if (lanes2 == 1) derived_9 = 0;
3415         else if (lanes2 == 2) derived_9 = 1;
3416         else if (lanes2 == 3) derived_9 = 2;
3417         else if (lanes2 == 4) derived_9 = 3;
3418         else unreachable("No pattern match at pos 9");
3419 
3420         return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
3421     } else if (lanes2 == 0) {
3422         return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13);
3423     } else {
3424         unreachable("No matching state found in fma_rshift_xor_v4i8");
3425     }
3426 }
3427 
3428 static inline unsigned
pan_pack_add_icmpm_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3429 pan_pack_add_icmpm_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3430 {
3431     unsigned src0 = bi_get_src(ins, regs, 0);
3432     unsigned src1 = bi_get_src(ins, regs, 1);
3433     unsigned src2 = bi_get_src(ins, regs, 2);
3434 
3435     return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
3436 }
3437 
3438 static inline unsigned
pan_pack_add_icmp_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3439 pan_pack_add_icmp_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3440 {
3441     unsigned src0 = bi_get_src(ins, regs, 0);
3442     unsigned src1 = bi_get_src(ins, regs, 1);
3443 
3444     unsigned result_type = 1;
3445 
3446     unsigned cmpf_table[] = {
3447         ~0, ~0, ~0, ~0, ~0, 0, 1
3448     };
3449     unsigned cmpf = cmpf_table[ins->cond];
3450     assert(cmpf < 2);
3451 
3452     return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
3453 }
3454 
3455 static inline unsigned
pan_pack_add_branch_s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3456 pan_pack_add_branch_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3457 {
3458     unsigned src0 = bi_get_src(ins, regs, 0);
3459     unsigned src1 = bi_get_src(ins, regs, 1);
3460     unsigned src2 = bi_get_src(ins, regs, 2);
3461     assert((1 << src2) & 0xf7);
3462 
3463     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3464     unsigned widen0_temp = 0;
3465     if (widen0_sz == 32) widen0_temp = 0;
3466     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3467     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3468     else unreachable("Could not pattern match widen");
3469     unsigned widen0 = widen0_temp;
3470     assert(widen0 < 4);
3471 
3472     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3473     unsigned widen1_temp = 0;
3474     if (widen1_sz == 32) widen1_temp = 0;
3475     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
3476     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
3477     else unreachable("Could not pattern match widen");
3478     unsigned widen1 = widen1_temp;
3479     assert(widen1 < 4);
3480 
3481     unsigned cmpf_table[] = {
3482         ~0, 2, 3, 1, 0, ~0, ~0
3483     };
3484     unsigned cmpf = cmpf_table[ins->cond];
3485     assert(cmpf < 4);
3486 
3487     if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) {
3488         { unsigned temp = src0; src0 = src1; src1 = temp; }
3489         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
3490         if (cmpf == 0) cmpf = 2;
3491         else if (cmpf == 3) cmpf = 1;
3492         else if (cmpf == 2) cmpf = 0;
3493         else if (cmpf == 1) cmpf = 3;
3494     }
3495 
3496     unsigned derived_12 = 0;
3497     if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
3498     else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
3499     else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4;
3500     else unreachable("No pattern match at pos 12");
3501 
3502     unsigned derived_9 = 0;
3503     if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0;
3504     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1;
3505     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2;
3506     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3;
3507     else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4;
3508     else unreachable("No pattern match at pos 9");
3509 
3510     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
3511 }
3512 
3513 static inline unsigned
pan_pack_add_load_i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3514 pan_pack_add_load_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3515 {
3516     unsigned src0 = bi_get_src(ins, regs, 0);
3517     unsigned src1 = bi_get_src(ins, regs, 1);
3518 
3519     assert(ins->segment);
3520     unsigned seg = ins->segment;
3521     assert(seg < 8);
3522 
3523     unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3524     unsigned lane_temp = 0;
3525     if (lane_sz == 8 && ins->swizzle[0][0] == 0) lane_temp = 0;
3526     else if (lane_sz == 8 && ins->swizzle[0][0] == 1) lane_temp = 1;
3527     else if (lane_sz == 8 && ins->swizzle[0][0] == 2) lane_temp = 2;
3528     else if (lane_sz == 8 && ins->swizzle[0][0] == 3) lane_temp = 3;
3529     else if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 4;
3530     else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 5;
3531     else if (lane_sz == 32) lane_temp = 6;
3532     else if (lane_sz == 64) lane_temp = 7;
3533     else unreachable("Could not pattern match widen");
3534     unsigned lane = lane_temp;
3535     assert(lane < 8);
3536 
3537     ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
3538     bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
3539     unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
3540     assert(extend < 4);
3541 
3542     bi_write_staging_register(clause, ins);
3543     if ((extend == 0) && ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))) {
3544         unsigned derived_9 = 0;
3545         if (lane == 0) derived_9 = 0;
3546         else if (lane == 1) derived_9 = 1;
3547         else if (lane == 2) derived_9 = 2;
3548         else if (lane == 3) derived_9 = 3;
3549         else unreachable("No pattern match at pos 9");
3550 
3551         return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3552     } else if ((extend != 0) && ((lane == 4) || (lane == 5))) {
3553         unsigned derived_9 = 0;
3554         if (extend == 1) derived_9 = 0;
3555         else if (extend == 2) derived_9 = 1;
3556         else unreachable("No pattern match at pos 9");
3557 
3558         unsigned derived_10 = 0;
3559         if (lane == 4) derived_10 = 0;
3560         else if (lane == 5) derived_10 = 1;
3561         else unreachable("No pattern match at pos 10");
3562 
3563         return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10);
3564     } else if ((extend != 0) && (lane == 6)) {
3565         unsigned derived_9 = 0;
3566         if (extend == 1) derived_9 = 0;
3567         else if (extend == 2) derived_9 = 1;
3568         else unreachable("No pattern match at pos 9");
3569 
3570         return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3571     } else if ((extend != 0) && (lane == 7)) {
3572         unsigned derived_9 = 0;
3573         if (extend == 1) derived_9 = 0;
3574         else if (extend == 2) derived_9 = 1;
3575         else unreachable("No pattern match at pos 9");
3576 
3577         return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3578     } else {
3579         unreachable("No matching state found in add_load_i8");
3580     }
3581 }
3582 
3583 static inline unsigned
pan_pack_fma_csel_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3584 pan_pack_fma_csel_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3585 {
3586     unsigned src0 = bi_get_src(ins, regs, 0);
3587     assert((1 << src0) & 0xfb);
3588     unsigned src1 = bi_get_src(ins, regs, 1);
3589     assert((1 << src1) & 0xfb);
3590     unsigned src2 = bi_get_src(ins, regs, 2);
3591     unsigned src3 = bi_get_src(ins, regs, 3);
3592 
3593     unsigned cmpf_table[] = {
3594         ~0, 4, 5, 2, 1, 0, 3
3595     };
3596     unsigned cmpf = cmpf_table[ins->cond];
3597     assert(cmpf < 8);
3598 
3599     if ((cmpf == 4) || (cmpf == 5)) {
3600         { unsigned temp = src0; src0 = src1; src1 = temp; }
3601         if (cmpf == 5) cmpf = 2;
3602         else if (cmpf == 4) cmpf = 1;
3603     }
3604 
3605     if (cmpf == 3) {
3606         { unsigned temp = src2; src2 = src3; src3 = temp; }
3607         if (cmpf == 3) cmpf = 0;
3608     }
3609 
3610     unsigned derived_12 = 0;
3611     if (cmpf == 0) derived_12 = 0;
3612     else if (cmpf == 1) derived_12 = 1;
3613     else if (cmpf == 2) derived_12 = 2;
3614     else unreachable("No pattern match at pos 12");
3615 
3616     return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
3617 }
3618 
3619 static inline unsigned
pan_pack_add_frsq_approx_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3620 pan_pack_add_frsq_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3621 {
3622     unsigned src0 = bi_get_src(ins, regs, 0);
3623     assert((1 << src0) & 0xf7);
3624 
3625     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3626     unsigned widen0_temp = 0;
3627     if (widen0_sz == 32) widen0_temp = 0;
3628     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3629     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3630     else unreachable("Could not pattern match widen");
3631     unsigned widen0 = widen0_temp;
3632     assert(widen0 < 4);
3633 
3634     unsigned neg = ins->src_neg[0];
3635     assert(neg < 2);
3636 
3637     unsigned abs0 = ins->src_abs[0];
3638     assert(abs0 < 2);
3639 
3640     unsigned divzero = 0;
3641 
3642     if (widen0 == 0) {
3643         return 0x67100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
3644     } else if (widen0 != 0) {
3645         unsigned derived_7 = 0;
3646         if (widen0 == 1) derived_7 = 0;
3647         else if (widen0 == 2) derived_7 = 1;
3648         else unreachable("No pattern match at pos 7");
3649 
3650         return 0x67140 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
3651     } else {
3652         unreachable("No matching state found in add_frsq_approx_f32");
3653     }
3654 }
3655 
3656 static inline unsigned
pan_pack_add_iabs_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3657 pan_pack_add_iabs_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3658 {
3659     unsigned src0 = bi_get_src(ins, regs, 0);
3660 
3661     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3662     unsigned swz0_temp = 0;
3663     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
3664     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
3665     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
3666     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
3667     else unreachable("Could not pattern match widen");
3668     unsigned swz0 = swz0_temp;
3669     assert(swz0 < 4);
3670 
3671     return 0x3de88 | (src0 << 0) | (swz0 << 4);
3672 }
3673 
3674 static inline unsigned
pan_pack_add_ld_attr_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3675 pan_pack_add_ld_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3676 {
3677     unsigned src0 = bi_get_src(ins, regs, 0);
3678     unsigned src1 = bi_get_src(ins, regs, 1);
3679     unsigned src2 = bi_get_src(ins, regs, 2);
3680 
3681     unsigned register_format_temp = 0;
3682     if (ins->format == nir_type_float16) register_format_temp = 0;
3683     else if (ins->format == nir_type_float32) register_format_temp = 1;
3684     else if (ins->format == nir_type_int32) register_format_temp = 2;
3685     else if (ins->format == nir_type_uint32) register_format_temp = 3;
3686     else if (ins->format == nir_type_int16) register_format_temp = 4;
3687     else if (ins->format == nir_type_uint16) register_format_temp = 5;
3688     else if (ins->format == nir_type_float64) register_format_temp = 6;
3689     else if (ins->format == nir_type_int64) register_format_temp = 7;
3690     else unreachable("Could not pattern match register format");
3691     unsigned register_format = register_format_temp;
3692     assert(register_format < 16);
3693 
3694     unsigned vecsize = ins->vector_channels - 1;
3695     assert(vecsize < 4);
3696 
3697     bi_write_staging_register(clause, ins);
3698     if (register_format != 8) {
3699         unsigned derived_13 = 0;
3700         if (register_format == 0) derived_13 = 0;
3701         else if (register_format == 1) derived_13 = 1;
3702         else if (register_format == 2) derived_13 = 2;
3703         else if (register_format == 3) derived_13 = 3;
3704         else if (register_format == 4) derived_13 = 4;
3705         else if (register_format == 5) derived_13 = 5;
3706         else if (register_format == 6) derived_13 = 6;
3707         else if (register_format == 7) derived_13 = 7;
3708         else unreachable("No pattern match at pos 13");
3709 
3710         return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13);
3711     } else if (register_format == 8) {
3712         return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11);
3713     } else {
3714         unreachable("No matching state found in add_ld_attr_tex");
3715     }
3716 }
3717 
3718 static inline unsigned
pan_pack_fma_imuld(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3719 pan_pack_fma_imuld(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3720 {
3721     unsigned src0 = bi_get_src(ins, regs, 0);
3722     assert((1 << src0) & 0x33);
3723     unsigned src1 = bi_get_src(ins, regs, 1);
3724     assert((1 << src1) & 0x33);
3725 
3726     unsigned threads = 0;
3727 
3728     return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6);
3729 }
3730 
3731 static inline unsigned
pan_pack_add_ld_var_special(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3732 pan_pack_add_ld_var_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3733 {
3734     unsigned src0 = bi_get_src(ins, regs, 1);
3735 
3736     unsigned varying_name = ins->constant.u64 & 0x3;
3737     assert(varying_name < 32);
3738 
3739     unsigned vecsize = ins->vector_channels - 1;
3740     assert(vecsize < 4);
3741 
3742     unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
3743     assert(update < 4);
3744 
3745     unsigned register_format_temp = 0;
3746     if (ins->format == nir_type_float32) register_format_temp = 0;
3747     else if (ins->format == nir_type_float16) register_format_temp = 1;
3748     else unreachable("Could not pattern match register format");
3749     unsigned register_format = register_format_temp;
3750     assert(register_format < 4);
3751 
3752     unsigned sample = ins->load_vary.interp_mode;
3753     assert(sample < 8);
3754 
3755     bi_write_staging_register(clause, ins);
3756     if (register_format != 2) {
3757         unsigned derived_3 = 0;
3758         if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0;
3759         else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2;
3760         else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3;
3761         else unreachable("No pattern match at pos 3");
3762 
3763         unsigned derived_19 = 0;
3764         if (register_format == 0) derived_19 = 0;
3765         else if (register_format == 1) derived_19 = 1;
3766         else unreachable("No pattern match at pos 19");
3767 
3768         unsigned derived_10 = 0;
3769         if ((sample == 0) && (update == 0)) derived_10 = 0;
3770         else if ((sample == 1) && (update == 0)) derived_10 = 1;
3771         else if ((sample == 2) && (update == 0)) derived_10 = 2;
3772         else if ((sample == 3) && (update == 0)) derived_10 = 3;
3773         else if ((sample == 4) && (update == 1)) derived_10 = 4;
3774         else if ((sample == 0) && (update == 2)) derived_10 = 8;
3775         else if ((sample == 1) && (update == 2)) derived_10 = 9;
3776         else if ((sample == 0) && (update == 3)) derived_10 = 10;
3777         else if ((sample == 1) && (update == 3)) derived_10 = 11;
3778         else if ((sample == 2) && (update == 3)) derived_10 = 12;
3779         else if ((sample == 3) && (update == 3)) derived_10 = 13;
3780         else unreachable("No pattern match at pos 10");
3781 
3782         return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10);
3783     } else if (register_format == 2) {
3784         unsigned derived_3 = 0;
3785         if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0;
3786         else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2;
3787         else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3;
3788         else unreachable("No pattern match at pos 3");
3789 
3790         unsigned derived_10 = 0;
3791         if ((sample == 0) && (update == 0)) derived_10 = 0;
3792         else if ((sample == 1) && (update == 0)) derived_10 = 1;
3793         else if ((sample == 2) && (update == 0)) derived_10 = 2;
3794         else if ((sample == 3) && (update == 0)) derived_10 = 3;
3795         else if ((sample == 4) && (update == 1)) derived_10 = 4;
3796         else if ((sample == 0) && (update == 2)) derived_10 = 8;
3797         else if ((sample == 1) && (update == 2)) derived_10 = 9;
3798         else if ((sample == 0) && (update == 3)) derived_10 = 10;
3799         else if ((sample == 1) && (update == 3)) derived_10 = 11;
3800         else if ((sample == 2) && (update == 3)) derived_10 = 12;
3801         else if ((sample == 3) && (update == 3)) derived_10 = 13;
3802         else unreachable("No pattern match at pos 10");
3803 
3804         return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10);
3805     } else {
3806         unreachable("No matching state found in add_ld_var_special");
3807     }
3808 }
3809 
3810 static inline unsigned
pan_pack_add_fcos_table_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3811 pan_pack_add_fcos_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3812 {
3813     unsigned src0 = bi_get_src(ins, regs, 0);
3814     assert((1 << src0) & 0xf7);
3815 
3816     unsigned offset = 0;
3817 
3818     return 0x67a88 | (src0 << 0) | (offset << 4);
3819 }
3820 
3821 static inline unsigned
pan_pack_add_ld_cvt(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3822 pan_pack_add_ld_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3823 {
3824     unsigned src0 = bi_get_src(ins, regs, 0);
3825     unsigned src1 = bi_get_src(ins, regs, 1);
3826     unsigned src2 = bi_get_src(ins, regs, 2);
3827     assert((1 << src2) & 0xf7);
3828 
3829     unsigned vecsize = ins->vector_channels - 1;
3830     assert(vecsize < 4);
3831 
3832     bi_write_staging_register(clause, ins);
3833     return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
3834 }
3835 
3836 static inline unsigned
pan_pack_fma_arshift_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3837 pan_pack_fma_arshift_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3838 {
3839     unsigned src0 = bi_get_src(ins, regs, 0);
3840     assert((1 << src0) & 0xfb);
3841     unsigned src1 = bi_get_src(ins, regs, 1);
3842     assert((1 << src1) & 0x8);
3843     unsigned src2 = bi_get_src(ins, regs, 2);
3844 
3845     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
3846     unsigned lanes2_temp = 0;
3847     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
3848     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
3849     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
3850     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
3851     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
3852     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
3853     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
3854     else unreachable("Could not pattern match widen");
3855     unsigned lanes2 = lanes2_temp;
3856     assert(lanes2 < 8);
3857 
3858     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
3859         unsigned derived_9 = 0;
3860         if (lanes2 == 0) derived_9 = 0;
3861         else if (lanes2 == 1) derived_9 = 1;
3862         else if (lanes2 == 2) derived_9 = 2;
3863         else if (lanes2 == 3) derived_9 = 3;
3864         else unreachable("No pattern match at pos 9");
3865 
3866         return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3867     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
3868         unsigned derived_9 = 0;
3869         if (lanes2 == 4) derived_9 = 1;
3870         else if (lanes2 == 5) derived_9 = 2;
3871         else if (lanes2 == 6) derived_9 = 3;
3872         else unreachable("No pattern match at pos 9");
3873 
3874         return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3875     } else {
3876         unreachable("No matching state found in fma_arshift_v2i16");
3877     }
3878 }
3879 
3880 static inline unsigned
pan_pack_fma_csel_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3881 pan_pack_fma_csel_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3882 {
3883     unsigned src0 = bi_get_src(ins, regs, 0);
3884     assert((1 << src0) & 0xfb);
3885     unsigned src1 = bi_get_src(ins, regs, 1);
3886     assert((1 << src1) & 0xfb);
3887     unsigned src2 = bi_get_src(ins, regs, 2);
3888     unsigned src3 = bi_get_src(ins, regs, 3);
3889 
3890     unsigned cmpf_table[] = {
3891         ~0, ~0, ~0, ~0, ~0, 0, 1
3892     };
3893     unsigned cmpf = cmpf_table[ins->cond];
3894     assert(cmpf < 2);
3895 
3896     if (cmpf == 1) {
3897         { unsigned temp = src2; src2 = src3; src3 = temp; }
3898         if (cmpf == 1) cmpf = 0;
3899     }
3900 
3901     unsigned derived_12 = 0;
3902     if (cmpf == 0) derived_12 = 3;
3903     else unreachable("No pattern match at pos 12");
3904 
3905     return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
3906 }
3907 
3908 static inline unsigned
pan_pack_add_ld_tile(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3909 pan_pack_add_ld_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3910 {
3911     unsigned src0 = bi_get_src(ins, regs, 0);
3912     unsigned src1 = bi_get_src(ins, regs, 1);
3913     unsigned src2 = bi_get_src(ins, regs, 2);
3914     assert((1 << src2) & 0xf7);
3915 
3916     unsigned vecsize = ins->vector_channels - 1;
3917     assert(vecsize < 4);
3918 
3919     bi_write_staging_register(clause, ins);
3920     return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
3921 }
3922 
3923 static inline unsigned
pan_pack_add_icmp_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3924 pan_pack_add_icmp_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3925 {
3926     unsigned src0 = bi_get_src(ins, regs, 0);
3927     unsigned src1 = bi_get_src(ins, regs, 1);
3928 
3929     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3930     unsigned swz0_temp = 0;
3931     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
3932     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
3933     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
3934     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
3935     else unreachable("Could not pattern match widen");
3936     unsigned swz0 = swz0_temp;
3937     assert(swz0 < 4);
3938 
3939     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3940     unsigned swz1_temp = 0;
3941     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
3942     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
3943     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
3944     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
3945     else unreachable("Could not pattern match widen");
3946     unsigned swz1 = swz1_temp;
3947     assert(swz1 < 4);
3948 
3949     unsigned result_type = 1;
3950 
3951     unsigned cmpf_table[] = {
3952         ~0, 2, 3, 1, 0, ~0, ~0
3953     };
3954     unsigned cmpf = cmpf_table[ins->cond];
3955     assert(cmpf < 4);
3956 
3957     if ((cmpf == 2) || (cmpf == 3)) {
3958         { unsigned temp = src0; src0 = src1; src1 = temp; }
3959         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
3960         if (cmpf == 3) cmpf = 1;
3961         else if (cmpf == 2) cmpf = 0;
3962     }
3963 
3964     unsigned derived_12 = 0;
3965     if (cmpf == 0) derived_12 = 0;
3966     else if (cmpf == 1) derived_12 = 1;
3967     else unreachable("No pattern match at pos 12");
3968 
3969     return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12);
3970 }
3971 
3972 static inline unsigned
pan_pack_add_load_i128(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3973 pan_pack_add_load_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3974 {
3975     unsigned src0 = bi_get_src(ins, regs, 0);
3976     unsigned src1 = bi_get_src(ins, regs, 1);
3977 
3978     assert(ins->segment);
3979     unsigned seg = ins->segment;
3980     assert(seg < 8);
3981 
3982     bi_write_staging_register(clause, ins);
3983     return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6);
3984 }
3985 
3986 static inline unsigned
pan_pack_add_ilogb_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3987 pan_pack_add_ilogb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3988 {
3989     unsigned src0 = bi_get_src(ins, regs, 0);
3990 
3991     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3992     unsigned widen0_temp = 0;
3993     if (widen0_sz == 32) widen0_temp = 1;
3994     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
3995     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
3996     else unreachable("Could not pattern match widen");
3997     unsigned widen0 = widen0_temp;
3998     assert(widen0 < 4);
3999 
4000     return 0x3d9e0 | (src0 << 0) | (widen0 << 3);
4001 }
4002 
4003 static inline unsigned
pan_pack_add_frcp_approx_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4004 pan_pack_add_frcp_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4005 {
4006     unsigned src0 = bi_get_src(ins, regs, 0);
4007     assert((1 << src0) & 0xf7);
4008 
4009     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4010     unsigned widen0_temp = 0;
4011     if (widen0_sz == 32) widen0_temp = 0;
4012     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4013     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4014     else unreachable("Could not pattern match widen");
4015     unsigned widen0 = widen0_temp;
4016     assert(widen0 < 4);
4017 
4018     unsigned neg = ins->src_neg[0];
4019     assert(neg < 2);
4020 
4021     unsigned abs0 = ins->src_abs[0];
4022     assert(abs0 < 2);
4023 
4024     unsigned divzero = 0;
4025 
4026     if (widen0 == 0) {
4027         return 0x67000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
4028     } else if (widen0 != 0) {
4029         unsigned derived_7 = 0;
4030         if (widen0 == 1) derived_7 = 0;
4031         else if (widen0 == 2) derived_7 = 1;
4032         else unreachable("No pattern match at pos 7");
4033 
4034         return 0x67040 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
4035     } else {
4036         unreachable("No matching state found in add_frcp_approx_f32");
4037     }
4038 }
4039 
4040 static inline unsigned
pan_pack_add_frcp_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4041 pan_pack_add_frcp_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4042 {
4043     unsigned src0 = bi_get_src(ins, regs, 0);
4044     assert((1 << src0) & 0xf7);
4045 
4046     unsigned neg = ins->src_neg[0];
4047     assert(neg < 2);
4048 
4049     unsigned abs0 = ins->src_abs[0];
4050     assert(abs0 < 2);
4051 
4052     unsigned divzero = 0;
4053 
4054     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4055     unsigned lane0_temp = 0;
4056     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4057     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4058     else unreachable("Could not pattern match widen");
4059     unsigned lane0 = lane0_temp;
4060     assert(lane0 < 2);
4061 
4062     return 0x67080 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8);
4063 }
4064 
4065 static inline unsigned
pan_pack_add_discard_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4066 pan_pack_add_discard_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4067 {
4068     unsigned src0 = bi_get_src(ins, regs, 0);
4069     unsigned src1 = bi_get_src(ins, regs, 1);
4070 
4071     unsigned cmpf_table[] = {
4072         ~0, 4, 5, 2, 1, 0, 3
4073     };
4074     unsigned cmpf = cmpf_table[ins->cond];
4075     assert(cmpf < 8);
4076 
4077     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4078     unsigned widen0_temp = 0;
4079     if (widen0_sz == 32) widen0_temp = 0;
4080     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4081     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4082     else unreachable("Could not pattern match widen");
4083     unsigned widen0 = widen0_temp;
4084     assert(widen0 < 4);
4085 
4086     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4087     unsigned widen1_temp = 0;
4088     if (widen1_sz == 32) widen1_temp = 0;
4089     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4090     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4091     else unreachable("Could not pattern match widen");
4092     unsigned widen1 = widen1_temp;
4093     assert(widen1 < 4);
4094 
4095     if ((cmpf == 1) || (cmpf == 2)) {
4096         { unsigned temp = src0; src0 = src1; src1 = temp; }
4097         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4098         if (cmpf == 1) cmpf = 4;
4099         else if (cmpf == 2) cmpf = 5;
4100     }
4101 
4102     unsigned derived_6 = 0;
4103     if (cmpf == 0) derived_6 = 0;
4104     else if (cmpf == 3) derived_6 = 1;
4105     else if (cmpf == 4) derived_6 = 2;
4106     else if (cmpf == 5) derived_6 = 3;
4107     else unreachable("No pattern match at pos 6");
4108 
4109     unsigned derived_8 = 0;
4110     if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0;
4111     else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1;
4112     else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2;
4113     else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3;
4114     else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4;
4115     else unreachable("No pattern match at pos 8");
4116 
4117     return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8);
4118 }
4119 
4120 static inline unsigned
pan_pack_fma_iaddc_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4121 pan_pack_fma_iaddc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4122 {
4123     unsigned src0 = bi_get_src(ins, regs, 0);
4124     assert((1 << src0) & 0xfb);
4125     unsigned src1 = bi_get_src(ins, regs, 1);
4126     assert((1 << src1) & 0xfb);
4127     unsigned src2 = bi_get_src(ins, regs, 2);
4128 
4129     return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
4130 }
4131 
4132 static inline unsigned
pan_pack_add_f16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4133 pan_pack_add_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4134 {
4135     unsigned src0 = bi_get_src(ins, regs, 0);
4136 
4137     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4138     unsigned lane0_temp = 0;
4139     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4140     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4141     else unreachable("Could not pattern match widen");
4142     unsigned lane0 = lane0_temp;
4143     assert(lane0 < 2);
4144 
4145     return 0x3cd10 | (src0 << 0) | (lane0 << 3);
4146 }
4147 
4148 static inline unsigned
pan_pack_add_fexp_table_u4(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4149 pan_pack_add_fexp_table_u4(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4150 {
4151     unsigned src0 = bi_get_src(ins, regs, 0);
4152     assert((1 << src0) & 0xf7);
4153 
4154     unsigned adj = 0;
4155 
4156     return 0x67ac0 | (src0 << 0) | (adj << 3);
4157 }
4158 
4159 static inline unsigned
pan_pack_add_branch_no_diverg(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4160 pan_pack_add_branch_no_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4161 {
4162     unsigned src0 = bi_get_src(ins, regs, 0);
4163     assert((1 << src0) & 0xf7);
4164 
4165     return 0x6fa34 | (src0 << 6);
4166 }
4167 
4168 static inline unsigned
pan_pack_add_acmpxchg_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4169 pan_pack_add_acmpxchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4170 {
4171     unsigned src0 = bi_get_src(ins, regs, 1);
4172     unsigned src1 = bi_get_src(ins, regs, 2);
4173 
4174     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
4175     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
4176     assert(seg < 2);
4177 
4178     bi_read_staging_register(clause, ins);
4179     assert(ins->src[0] == ins->dest);
4180     return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
4181 }
4182 
4183 static inline unsigned
pan_pack_add_icmp_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4184 pan_pack_add_icmp_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4185 {
4186     unsigned src0 = bi_get_src(ins, regs, 0);
4187     unsigned src1 = bi_get_src(ins, regs, 1);
4188 
4189     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4190     unsigned swz0_temp = 0;
4191     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4192     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4193     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4194     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4195     else unreachable("Could not pattern match widen");
4196     unsigned swz0 = swz0_temp;
4197     assert(swz0 < 4);
4198 
4199     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4200     unsigned swz1_temp = 0;
4201     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
4202     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
4203     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
4204     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
4205     else unreachable("Could not pattern match widen");
4206     unsigned swz1 = swz1_temp;
4207     assert(swz1 < 4);
4208 
4209     unsigned result_type = 1;
4210 
4211     unsigned cmpf_table[] = {
4212         ~0, ~0, ~0, ~0, ~0, 0, 1
4213     };
4214     unsigned cmpf = cmpf_table[ins->cond];
4215     assert(cmpf < 2);
4216 
4217     return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11);
4218 }
4219 
4220 static inline unsigned
pan_pack_fma_fadd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4221 pan_pack_fma_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4222 {
4223     unsigned src0 = bi_get_src(ins, regs, 0);
4224     assert((1 << src0) & 0xfb);
4225     unsigned src1 = bi_get_src(ins, regs, 1);
4226     assert((1 << src1) & 0xfb);
4227 
4228     unsigned abs1 = ins->src_abs[1];
4229     assert(abs1 < 2);
4230 
4231     unsigned neg0 = ins->src_neg[0];
4232     assert(neg0 < 2);
4233 
4234     unsigned neg1 = ins->src_neg[1];
4235     assert(neg1 < 2);
4236 
4237     unsigned abs0 = ins->src_abs[0];
4238     assert(abs0 < 2);
4239 
4240     unsigned round = ins->roundmode;
4241     assert(round < 4);
4242 
4243     unsigned clamp = ins->outmod;
4244     assert(clamp < 4);
4245 
4246     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4247     unsigned widen0_temp = 0;
4248     if (widen0_sz == 32) widen0_temp = 0;
4249     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4250     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4251     else unreachable("Could not pattern match widen");
4252     unsigned widen0 = widen0_temp;
4253     assert(widen0 < 4);
4254 
4255     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4256     unsigned widen1_temp = 0;
4257     if (widen1_sz == 32) widen1_temp = 0;
4258     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4259     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4260     else unreachable("Could not pattern match widen");
4261     unsigned widen1 = widen1_temp;
4262     assert(widen1 < 4);
4263 
4264     if ((widen0 == 2) && (widen1 == 1)) {
4265         { unsigned temp = src0; src0 = src1; src1 = temp; }
4266         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
4267         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
4268         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4269     }
4270 
4271     unsigned derived_9 = 0;
4272     if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
4273     else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
4274     else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
4275     else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
4276     else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
4277     else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
4278     else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
4279     else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
4280     else unreachable("No pattern match at pos 9");
4281 
4282     return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9);
4283 }
4284 
4285 static inline unsigned
pan_pack_add_icmp_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4286 pan_pack_add_icmp_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4287 {
4288     unsigned src0 = bi_get_src(ins, regs, 0);
4289     unsigned src1 = bi_get_src(ins, regs, 1);
4290 
4291     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4292     unsigned swz0_temp = 0;
4293     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4294     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4295     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4296     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4297     else unreachable("Could not pattern match widen");
4298     unsigned swz0 = swz0_temp;
4299     assert(swz0 < 4);
4300 
4301     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4302     unsigned swz1_temp = 0;
4303     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
4304     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
4305     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
4306     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
4307     else unreachable("Could not pattern match widen");
4308     unsigned swz1 = swz1_temp;
4309     assert(swz1 < 4);
4310 
4311     unsigned result_type = 1;
4312 
4313     unsigned cmpf_table[] = {
4314         ~0, 2, 3, 1, 0, ~0, ~0
4315     };
4316     unsigned cmpf = cmpf_table[ins->cond];
4317     assert(cmpf < 4);
4318 
4319     if ((cmpf == 2) || (cmpf == 3)) {
4320         { unsigned temp = src0; src0 = src1; src1 = temp; }
4321         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
4322         if (cmpf == 3) cmpf = 1;
4323         else if (cmpf == 2) cmpf = 0;
4324     }
4325 
4326     unsigned derived_12 = 0;
4327     if (cmpf == 0) derived_12 = 0;
4328     else if (cmpf == 1) derived_12 = 1;
4329     else unreachable("No pattern match at pos 12");
4330 
4331     return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12);
4332 }
4333 
4334 static inline unsigned
pan_pack_fma_csel_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4335 pan_pack_fma_csel_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4336 {
4337     unsigned src0 = bi_get_src(ins, regs, 0);
4338     assert((1 << src0) & 0xfb);
4339     unsigned src1 = bi_get_src(ins, regs, 1);
4340     assert((1 << src1) & 0xfb);
4341     unsigned src2 = bi_get_src(ins, regs, 2);
4342     unsigned src3 = bi_get_src(ins, regs, 3);
4343 
4344     unsigned cmpf_table[] = {
4345         ~0, 2, 3, 1, 0, ~0, ~0
4346     };
4347     unsigned cmpf = cmpf_table[ins->cond];
4348     assert(cmpf < 4);
4349 
4350     if ((cmpf == 2) || (cmpf == 3)) {
4351         { unsigned temp = src0; src0 = src1; src1 = temp; }
4352         if (cmpf == 3) cmpf = 1;
4353         else if (cmpf == 2) cmpf = 0;
4354     }
4355 
4356     unsigned derived_12 = 0;
4357     if (cmpf == 0) derived_12 = 0;
4358     else if (cmpf == 1) derived_12 = 1;
4359     else unreachable("No pattern match at pos 12");
4360 
4361     return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
4362 }
4363 
4364 static inline unsigned
pan_pack_add_branch_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4365 pan_pack_add_branch_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4366 {
4367     unsigned src0 = bi_get_src(ins, regs, 0);
4368     unsigned src1 = bi_get_src(ins, regs, 1);
4369     unsigned src2 = bi_get_src(ins, regs, 2);
4370     assert((1 << src2) & 0xf7);
4371 
4372     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4373     unsigned widen0_temp = 0;
4374     if (widen0_sz == 32) widen0_temp = 0;
4375     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4376     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4377     else unreachable("Could not pattern match widen");
4378     unsigned widen0 = widen0_temp;
4379     assert(widen0 < 4);
4380 
4381     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4382     unsigned widen1_temp = 0;
4383     if (widen1_sz == 32) widen1_temp = 0;
4384     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4385     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4386     else unreachable("Could not pattern match widen");
4387     unsigned widen1 = widen1_temp;
4388     assert(widen1 < 4);
4389 
4390     unsigned cmpf_table[] = {
4391         ~0, 4, 5, 2, 1, 0, 3
4392     };
4393     unsigned cmpf = cmpf_table[ins->cond];
4394     assert(cmpf < 8);
4395 
4396     if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) {
4397         { unsigned temp = src0; src0 = src1; src1 = temp; }
4398         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4399         if (cmpf == 1) cmpf = 4;
4400         else if (cmpf == 5) cmpf = 2;
4401         else if (cmpf == 4) cmpf = 1;
4402         else if (cmpf == 2) cmpf = 5;
4403     }
4404 
4405     unsigned derived_12 = 0;
4406     if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
4407     else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5;
4408     else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6;
4409     else unreachable("No pattern match at pos 12");
4410 
4411     unsigned derived_9 = 0;
4412     if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1;
4413     else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2;
4414     else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3;
4415     else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4;
4416     else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5;
4417     else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6;
4418     else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7;
4419     else unreachable("No pattern match at pos 9");
4420 
4421     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
4422 }
4423 
4424 static inline unsigned
pan_pack_add_icmp_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4425 pan_pack_add_icmp_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4426 {
4427     unsigned src0 = bi_get_src(ins, regs, 0);
4428     unsigned src1 = bi_get_src(ins, regs, 1);
4429 
4430     unsigned result_type = 1;
4431 
4432     unsigned cmpf_table[] = {
4433         ~0, 2, 3, 1, 0, ~0, ~0
4434     };
4435     unsigned cmpf = cmpf_table[ins->cond];
4436     assert(cmpf < 4);
4437 
4438     if ((cmpf == 2) || (cmpf == 3)) {
4439         { unsigned temp = src0; src0 = src1; src1 = temp; }
4440         if (cmpf == 3) cmpf = 1;
4441         else if (cmpf == 2) cmpf = 0;
4442     }
4443 
4444     unsigned derived_6 = 0;
4445     if (cmpf == 0) derived_6 = 0;
4446     else if (cmpf == 1) derived_6 = 1;
4447     else unreachable("No pattern match at pos 6");
4448 
4449     return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
4450 }
4451 
4452 static inline unsigned
pan_pack_add_texs_cube_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4453 pan_pack_add_texs_cube_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4454 {
4455     unsigned src0 = bi_get_src(ins, regs, 0);
4456     unsigned src1 = bi_get_src(ins, regs, 1);
4457     unsigned src2 = bi_get_src(ins, regs, 2);
4458 
4459     unsigned skip = ins->skip;
4460     assert(skip < 2);
4461 
4462     unsigned sampler_index = ins->texture.sampler_index;
4463     unsigned texture_index = ins->texture.texture_index;
4464     bi_write_staging_register(clause, ins);
4465     return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12);
4466 }
4467 
4468 static inline unsigned
pan_pack_fma_rshift_or_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4469 pan_pack_fma_rshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4470 {
4471     unsigned src0 = bi_get_src(ins, regs, 0);
4472     assert((1 << src0) & 0xfb);
4473     unsigned src1 = bi_get_src(ins, regs, 1);
4474     assert((1 << src1) & 0xfb);
4475     unsigned src2 = bi_get_src(ins, regs, 2);
4476 
4477     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
4478     unsigned lane2_temp = 0;
4479     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
4480     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
4481     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
4482     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
4483     else unreachable("Could not pattern match widen");
4484     unsigned lane2 = lane2_temp;
4485     assert(lane2 < 4);
4486 
4487     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
4488     assert(not1 < 2);
4489 
4490     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
4491     assert(not_result < 2);
4492 
4493     return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
4494 }
4495 
4496 static inline unsigned
pan_pack_add_ld_var(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4497 pan_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4498 {
4499     unsigned src0 = bi_get_src(ins, regs, 0);
4500     unsigned src1 = bi_get_src(ins, regs, 1);
4501 
4502     unsigned vecsize = ins->vector_channels - 1;
4503     assert(vecsize < 4);
4504 
4505     unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
4506     assert(update < 4);
4507 
4508     unsigned register_format_temp = 0;
4509     if (ins->format == nir_type_float32) register_format_temp = 0;
4510     else if (ins->format == nir_type_float16) register_format_temp = 1;
4511     else unreachable("Could not pattern match register format");
4512     unsigned register_format = register_format_temp;
4513     assert(register_format < 4);
4514 
4515     unsigned sample = ins->load_vary.interp_mode;
4516     assert(sample < 8);
4517 
4518     bi_write_staging_register(clause, ins);
4519     if (register_format != 2) {
4520         unsigned derived_19 = 0;
4521         if (register_format == 0) derived_19 = 0;
4522         else if (register_format == 1) derived_19 = 1;
4523         else unreachable("No pattern match at pos 19");
4524 
4525         unsigned derived_10 = 0;
4526         if ((sample == 0) && (update == 0)) derived_10 = 0;
4527         else if ((sample == 1) && (update == 0)) derived_10 = 1;
4528         else if ((sample == 2) && (update == 0)) derived_10 = 2;
4529         else if ((sample == 3) && (update == 0)) derived_10 = 3;
4530         else if ((sample == 4) && (update == 1)) derived_10 = 4;
4531         else if ((sample == 0) && (update == 2)) derived_10 = 8;
4532         else if ((sample == 1) && (update == 2)) derived_10 = 9;
4533         else if ((sample == 0) && (update == 3)) derived_10 = 10;
4534         else if ((sample == 1) && (update == 3)) derived_10 = 11;
4535         else if ((sample == 2) && (update == 3)) derived_10 = 12;
4536         else if ((sample == 3) && (update == 3)) derived_10 = 13;
4537         else unreachable("No pattern match at pos 10");
4538 
4539         return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10);
4540     } else if (register_format == 2) {
4541         unsigned derived_10 = 0;
4542         if ((sample == 0) && (update == 0)) derived_10 = 0;
4543         else if ((sample == 1) && (update == 0)) derived_10 = 1;
4544         else if ((sample == 2) && (update == 0)) derived_10 = 2;
4545         else if ((sample == 3) && (update == 0)) derived_10 = 3;
4546         else if ((sample == 4) && (update == 1)) derived_10 = 4;
4547         else if ((sample == 0) && (update == 2)) derived_10 = 8;
4548         else if ((sample == 1) && (update == 2)) derived_10 = 9;
4549         else if ((sample == 0) && (update == 3)) derived_10 = 10;
4550         else if ((sample == 1) && (update == 3)) derived_10 = 11;
4551         else if ((sample == 2) && (update == 3)) derived_10 = 12;
4552         else if ((sample == 3) && (update == 3)) derived_10 = 13;
4553         else unreachable("No pattern match at pos 10");
4554 
4555         return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10);
4556     } else {
4557         unreachable("No matching state found in add_ld_var");
4558     }
4559 }
4560 
4561 static inline unsigned
pan_pack_add_hadd_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4562 pan_pack_add_hadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4563 {
4564     unsigned src0 = bi_get_src(ins, regs, 0);
4565     unsigned src1 = bi_get_src(ins, regs, 1);
4566 
4567     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
4568     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
4569     assert(round < 2);
4570 
4571     unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4572     unsigned swap1_temp = 0;
4573     if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
4574     else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
4575     else unreachable("Could not pattern match widen");
4576     unsigned swap1 = swap1_temp;
4577     assert(swap1 < 2);
4578 
4579     unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4580     unsigned swap0_temp = 0;
4581     if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
4582     else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
4583     else unreachable("Could not pattern match widen");
4584     unsigned swap0 = swap0_temp;
4585     assert(swap0 < 2);
4586 
4587     return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10);
4588 }
4589 
4590 static inline unsigned
pan_pack_add_swz_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4591 pan_pack_add_swz_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4592 {
4593     unsigned src0 = bi_get_src(ins, regs, 0);
4594 
4595     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4596     unsigned swz0_temp = 0;
4597     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4598     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4599     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4600     else unreachable("Could not pattern match widen");
4601     unsigned swz0 = swz0_temp;
4602     assert(swz0 < 4);
4603 
4604     return 0x3d948 | (src0 << 0) | (swz0 << 4);
4605 }
4606 
4607 static inline unsigned
pan_pack_add_atest(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4608 pan_pack_add_atest(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4609 {
4610     unsigned src0 = bi_get_src(ins, regs, 0);
4611     assert((1 << src0) & 0xf7);
4612     unsigned src1 = bi_get_src(ins, regs, 1);
4613     assert((1 << src1) & 0xf7);
4614 
4615     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4616     unsigned widen1_temp = 0;
4617     if (widen1_sz == 32) widen1_temp = 1;
4618     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 2;
4619     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 3;
4620     else unreachable("Could not pattern match widen");
4621     unsigned widen1 = widen1_temp;
4622     assert(widen1 < 4);
4623 
4624     bi_write_staging_register(clause, ins);
4625     return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6);
4626 }
4627 
4628 static inline unsigned
pan_pack_add_ldexp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4629 pan_pack_add_ldexp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4630 {
4631     unsigned src0 = bi_get_src(ins, regs, 0);
4632     unsigned src1 = bi_get_src(ins, regs, 1);
4633 
4634     unsigned round = ins->roundmode;
4635     assert(round < 8);
4636 
4637     return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6);
4638 }
4639 
4640 static inline unsigned
pan_pack_fma_bitrev_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4641 pan_pack_fma_bitrev_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4642 {
4643     unsigned src0 = bi_get_src(ins, regs, 0);
4644     assert((1 << src0) & 0xfb);
4645 
4646     return 0x701fc0 | (src0 << 0);
4647 }
4648 
4649 static inline unsigned
pan_pack_add_icmpi_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4650 pan_pack_add_icmpi_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4651 {
4652     unsigned src0 = bi_get_src(ins, regs, 0);
4653     unsigned src1 = bi_get_src(ins, regs, 1);
4654 
4655     unsigned result_type = 1;
4656 
4657     unsigned cmpf_table[] = {
4658         ~0, ~0, ~0, ~0, ~0, 0, 1
4659     };
4660     unsigned cmpf = cmpf_table[ins->cond];
4661     assert(cmpf < 2);
4662 
4663     return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
4664 }
4665 
4666 static inline unsigned
pan_pack_add_mov_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4667 pan_pack_add_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4668 {
4669     unsigned src0 = bi_get_src(ins, regs, 0);
4670 
4671     return 0x3d968 | (src0 << 0);
4672 }
4673 
4674 static inline unsigned
pan_pack_fma_frexpm_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4675 pan_pack_fma_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4676 {
4677     unsigned src0 = bi_get_src(ins, regs, 0);
4678     assert((1 << src0) & 0xfb);
4679 
4680     unsigned abs0 = ins->src_abs[0];
4681     assert(abs0 < 2);
4682 
4683     unsigned sqrt = 0;
4684 
4685     unsigned log = 1;
4686 
4687     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4688     unsigned widen0_temp = 0;
4689     if (widen0_sz == 32) widen0_temp = 1;
4690     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
4691     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
4692     else unreachable("Could not pattern match widen");
4693     unsigned widen0 = widen0_temp;
4694     assert(widen0 < 4);
4695 
4696     unsigned neg0 = ins->src_neg[0];
4697     assert(neg0 < 2);
4698 
4699     if ((log == 0) && (neg0 == 0)) {
4700         return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3);
4701     } else if ((log == 1) && (sqrt == 0)) {
4702         return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7);
4703     } else {
4704         unreachable("No matching state found in fma_frexpm_f32");
4705     }
4706 }
4707 
4708 static inline unsigned
pan_pack_add_atom_cx(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4709 pan_pack_add_atom_cx(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4710 {
4711     unsigned src0 = bi_get_src(ins, regs, 1);
4712     unsigned src1 = bi_get_src(ins, regs, 2);
4713     unsigned src2 = bi_get_src(ins, regs, 3);
4714 
4715     bi_read_staging_register(clause, ins);
4716     assert(ins->src[0] == ins->dest);
4717     return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6);
4718 }
4719 
4720 static inline unsigned
pan_pack_add_fadd_rscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4721 pan_pack_add_fadd_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4722 {
4723     unsigned src0 = bi_get_src(ins, regs, 0);
4724     unsigned src1 = bi_get_src(ins, regs, 1);
4725     unsigned src2 = bi_get_src(ins, regs, 2);
4726 
4727     assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT);
4728     unsigned clamp = (ins->outmod == BIFROST_SAT) ? 1 : 0;
4729     assert(clamp < 2);
4730 
4731     unsigned special = 0;
4732 
4733     unsigned round = ins->roundmode;
4734     assert(round < 8);
4735 
4736     unsigned abs1 = ins->src_abs[1];
4737     assert(abs1 < 2);
4738 
4739     unsigned neg0 = ins->src_neg[0];
4740     assert(neg0 < 2);
4741 
4742     unsigned neg1 = ins->src_neg[1];
4743     assert(neg1 < 2);
4744 
4745     unsigned abs0 = ins->src_abs[0];
4746     assert(abs0 < 2);
4747 
4748     unsigned derived_9 = 0;
4749     if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0;
4750     else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2;
4751     else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3;
4752     else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4;
4753     else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5;
4754     else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6;
4755     else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7;
4756     else unreachable("No pattern match at pos 9");
4757 
4758     return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9);
4759 }
4760 
4761 static inline unsigned
pan_pack_fma_atom_post_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4762 pan_pack_fma_atom_post_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4763 {
4764     unsigned src0 = bi_get_src(ins, regs, 0);
4765     assert((1 << src0) & 0xfb);
4766     unsigned src1 = bi_get_src(ins, regs, 1);
4767     assert((1 << src1) & 0xfb);
4768 
4769     unsigned atom_opc = 2;
4770 
4771     return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
4772 }
4773 
4774 static inline unsigned
pan_pack_fma_imul_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4775 pan_pack_fma_imul_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4776 {
4777     unsigned src0 = bi_get_src(ins, regs, 0);
4778     assert((1 << src0) & 0xfb);
4779     unsigned src1 = bi_get_src(ins, regs, 1);
4780     assert((1 << src1) & 0xfb);
4781 
4782     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4783     unsigned widen1_temp = 0;
4784     if (widen1_sz == 32) widen1_temp = 0;
4785     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4786     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4787     else if (widen1_sz == 8 && ins->swizzle[1][0] == 0) widen1_temp = 3;
4788     else if (widen1_sz == 8 && ins->swizzle[1][0] == 1) widen1_temp = 4;
4789     else if (widen1_sz == 8 && ins->swizzle[1][0] == 2) widen1_temp = 5;
4790     else if (widen1_sz == 8 && ins->swizzle[1][0] == 3) widen1_temp = 6;
4791     else unreachable("Could not pattern match widen");
4792     unsigned widen1 = widen1_temp;
4793     assert(widen1 < 8);
4794 
4795     ASSERTED bool extend1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16;
4796     bool extend1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int;
4797     unsigned extend1 = extend1_small ? (extend1_signed ? 1 : 2) : 0;
4798     assert(extend1 < 4);
4799 
4800     if ((extend1 == 0) && (widen1 == 0)) {
4801         return 0x73c0c0 | (src0 << 0) | (src1 << 3);
4802     } else if ((extend1 != 0) && ((widen1 == 1) || (widen1 == 2))) {
4803         unsigned derived_9 = 0;
4804         if (widen1 == 1) derived_9 = 0;
4805         else if (widen1 == 2) derived_9 = 1;
4806         else unreachable("No pattern match at pos 9");
4807 
4808         unsigned derived_10 = 0;
4809         if (extend1 == 2) derived_10 = 0;
4810         else if (extend1 == 1) derived_10 = 1;
4811         else unreachable("No pattern match at pos 10");
4812 
4813         return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10);
4814     } else if ((extend1 != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) {
4815         unsigned derived_9 = 0;
4816         if (widen1 == 3) derived_9 = 0;
4817         else if (widen1 == 4) derived_9 = 1;
4818         else if (widen1 == 5) derived_9 = 2;
4819         else if (widen1 == 6) derived_9 = 3;
4820         else unreachable("No pattern match at pos 9");
4821 
4822         unsigned derived_11 = 0;
4823         if (extend1 == 2) derived_11 = 0;
4824         else if (extend1 == 1) derived_11 = 1;
4825         else unreachable("No pattern match at pos 11");
4826 
4827         return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11);
4828     } else {
4829         unreachable("No matching state found in fma_imul_i32");
4830     }
4831 }
4832 
4833 static inline unsigned
pan_pack_add_flogd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4834 pan_pack_add_flogd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4835 {
4836     unsigned src0 = bi_get_src(ins, regs, 0);
4837     assert((1 << src0) & 0xf7);
4838 
4839     return 0x66340 | (src0 << 0);
4840 }
4841 
4842 static inline unsigned
pan_pack_fma_frexpm_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4843 pan_pack_fma_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4844 {
4845     unsigned src0 = bi_get_src(ins, regs, 0);
4846     assert((1 << src0) & 0xfb);
4847 
4848     unsigned abs0 = ins->src_abs[0];
4849     assert(abs0 < 2);
4850 
4851     unsigned sqrt = 0;
4852 
4853     unsigned log = 1;
4854 
4855     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4856     unsigned swz0_temp = 0;
4857     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4858     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4859     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4860     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4861     else unreachable("Could not pattern match widen");
4862     unsigned swz0 = swz0_temp;
4863     assert(swz0 < 4);
4864 
4865     unsigned neg0 = ins->src_neg[0];
4866     assert(neg0 < 2);
4867 
4868     if ((log == 0) && (neg0 == 0)) {
4869         return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3);
4870     } else if ((log == 1) && (sqrt == 0)) {
4871         return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7);
4872     } else {
4873         unreachable("No matching state found in fma_frexpm_v2f16");
4874     }
4875 }
4876 
4877 static inline unsigned
pan_pack_add_s8_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4878 pan_pack_add_s8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4879 {
4880     unsigned src0 = bi_get_src(ins, regs, 0);
4881 
4882     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4883     unsigned lane0_temp = 0;
4884     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4885     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4886     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
4887     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
4888     else unreachable("Could not pattern match widen");
4889     unsigned lane0 = lane0_temp;
4890     assert(lane0 < 4);
4891 
4892     return 0x3cb80 | (src0 << 0) | (lane0 << 4);
4893 }
4894 
4895 static inline unsigned
pan_pack_add_zs_emit(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4896 pan_pack_add_zs_emit(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4897 {
4898     unsigned src0 = bi_get_src(ins, regs, 0);
4899     unsigned src1 = bi_get_src(ins, regs, 1);
4900     unsigned src2 = bi_get_src(ins, regs, 2);
4901 
4902     unsigned stencil = (ins->src[1] != 0);
4903     assert(stencil < 2);
4904 
4905     unsigned z = (ins->src[0] != 0);
4906     assert(z < 2);
4907 
4908     bi_write_staging_register(clause, ins);
4909     unsigned derived_9 = 0;
4910     if ((stencil == 1) && (z == 0)) derived_9 = 1;
4911     else if ((stencil == 0) && (z == 1)) derived_9 = 2;
4912     else if ((stencil == 1) && (z == 1)) derived_9 = 3;
4913     else unreachable("No pattern match at pos 9");
4914 
4915     return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
4916 }
4917 
4918 static inline unsigned
pan_pack_add_load_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4919 pan_pack_add_load_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4920 {
4921     unsigned src0 = bi_get_src(ins, regs, 0);
4922     unsigned src1 = bi_get_src(ins, regs, 1);
4923 
4924     assert(ins->segment);
4925     unsigned seg = ins->segment;
4926     assert(seg < 8);
4927 
4928     bi_write_staging_register(clause, ins);
4929     return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6);
4930 }
4931 
4932 static inline unsigned
pan_pack_add_branchz_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4933 pan_pack_add_branchz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4934 {
4935     unsigned src0 = bi_get_src(ins, regs, 0);
4936     unsigned src1 = bi_get_src(ins, regs, 1);
4937     assert((1 << src1) & 0xf7);
4938 
4939     unsigned cmpf_table[] = {
4940         ~0, 2, 3, 1, 0, ~0, ~0
4941     };
4942     unsigned cmpf = cmpf_table[ins->cond];
4943     assert(cmpf < 4);
4944 
4945     unsigned derived_9 = 0;
4946     if (cmpf == 2) derived_9 = 0;
4947     else if (cmpf == 3) derived_9 = 1;
4948     else if (cmpf == 1) derived_9 = 2;
4949     else if (cmpf == 0) derived_9 = 3;
4950     else unreachable("No pattern match at pos 9");
4951 
4952     return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9);
4953 }
4954 
4955 static inline unsigned
pan_pack_add_fcmp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4956 pan_pack_add_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4957 {
4958     unsigned src0 = bi_get_src(ins, regs, 0);
4959     unsigned src1 = bi_get_src(ins, regs, 1);
4960 
4961     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4962     unsigned widen0_temp = 0;
4963     if (widen0_sz == 32) widen0_temp = 0;
4964     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4965     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4966     else unreachable("Could not pattern match widen");
4967     unsigned widen0 = widen0_temp;
4968     assert(widen0 < 4);
4969 
4970     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4971     unsigned widen1_temp = 0;
4972     if (widen1_sz == 32) widen1_temp = 0;
4973     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4974     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4975     else unreachable("Could not pattern match widen");
4976     unsigned widen1 = widen1_temp;
4977     assert(widen1 < 4);
4978 
4979     unsigned neg0 = ins->src_neg[0];
4980     assert(neg0 < 2);
4981 
4982     unsigned neg1 = ins->src_neg[1];
4983     assert(neg1 < 2);
4984 
4985     unsigned cmpf_table[] = {
4986         ~0, 4, 5, 2, 1, 0, 3
4987     };
4988     unsigned cmpf = cmpf_table[ins->cond];
4989     assert(cmpf < 8);
4990 
4991     unsigned abs0 = ins->src_abs[0];
4992     assert(abs0 < 2);
4993 
4994     unsigned abs1 = ins->src_abs[1];
4995     assert(abs1 < 2);
4996 
4997     unsigned result_type = 2;
4998 
4999     if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) {
5000         { unsigned temp = src0; src0 = src1; src1 = temp; }
5001         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
5002         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5003         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5004         if (cmpf == 1) cmpf = 4;
5005         else if (cmpf == 5) cmpf = 2;
5006         else if (cmpf == 4) cmpf = 1;
5007         else if (cmpf == 2) cmpf = 5;
5008     }
5009 
5010     unsigned derived_9 = 0;
5011     if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
5012     else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
5013     else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
5014     else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
5015     else unreachable("No pattern match at pos 9");
5016 
5017     unsigned derived_13 = 0;
5018     if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0;
5019     else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1;
5020     else unreachable("No pattern match at pos 13");
5021 
5022     return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13);
5023 }
5024 
5025 static inline unsigned
pan_pack_fma_atom_c1_return_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5026 pan_pack_fma_atom_c1_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5027 {
5028     unsigned src0 = bi_get_src(ins, regs, 0);
5029     assert((1 << src0) & 0xf3);
5030     unsigned src1 = bi_get_src(ins, regs, 1);
5031     assert((1 << src1) & 0xf3);
5032 
5033     unsigned atom_opc = 2;
5034 
5035     return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
5036 }
5037 
5038 static inline unsigned
pan_pack_add_hadd_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5039 pan_pack_add_hadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5040 {
5041     unsigned src0 = bi_get_src(ins, regs, 0);
5042     unsigned src1 = bi_get_src(ins, regs, 1);
5043 
5044     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
5045     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
5046     assert(round < 2);
5047 
5048     unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5049     unsigned swap1_temp = 0;
5050     if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
5051     else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
5052     else unreachable("Could not pattern match widen");
5053     unsigned swap1 = swap1_temp;
5054     assert(swap1 < 2);
5055 
5056     unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5057     unsigned swap0_temp = 0;
5058     if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
5059     else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
5060     else unreachable("Could not pattern match widen");
5061     unsigned swap0 = swap0_temp;
5062     assert(swap0 < 2);
5063 
5064     return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10);
5065 }
5066 
5067 static inline unsigned
pan_pack_add_acmpstore_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5068 pan_pack_add_acmpstore_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5069 {
5070     unsigned src0 = bi_get_src(ins, regs, 1);
5071     unsigned src1 = bi_get_src(ins, regs, 2);
5072 
5073     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
5074     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
5075     assert(seg < 2);
5076 
5077     bi_read_staging_register(clause, ins);
5078     return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
5079 }
5080 
5081 static inline unsigned
pan_pack_add_frcp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5082 pan_pack_add_frcp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5083 {
5084     unsigned src0 = bi_get_src(ins, regs, 0);
5085     assert((1 << src0) & 0xf7);
5086 
5087     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5088     unsigned widen0_temp = 0;
5089     if (widen0_sz == 32) widen0_temp = 0;
5090     else unreachable("Could not pattern match widen");
5091     unsigned widen0 = widen0_temp;
5092     assert(widen0 < 4);
5093 
5094     unsigned neg = ins->src_neg[0];
5095     assert(neg < 2);
5096 
5097     unsigned abs0 = ins->src_abs[0];
5098     assert(abs0 < 2);
5099 
5100     unsigned derived_6 = 0;
5101     if (widen0 == 0) derived_6 = 0;
5102     else unreachable("No pattern match at pos 6");
5103 
5104     return 0x66000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6);
5105 }
5106 
5107 static inline unsigned
pan_pack_fma_fadd_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5108 pan_pack_fma_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5109 {
5110     unsigned src0 = bi_get_src(ins, regs, 0);
5111     assert((1 << src0) & 0xfb);
5112     unsigned src1 = bi_get_src(ins, regs, 1);
5113     assert((1 << src1) & 0xfb);
5114 
5115     unsigned abs0 = ins->src_abs[0];
5116     assert(abs0 < 2);
5117 
5118     unsigned abs1 = ins->src_abs[1];
5119     assert(abs1 < 2);
5120 
5121     unsigned neg0 = ins->src_neg[0];
5122     assert(neg0 < 2);
5123 
5124     unsigned neg1 = ins->src_neg[1];
5125     assert(neg1 < 2);
5126 
5127     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5128     unsigned swz0_temp = 0;
5129     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5130     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5131     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5132     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5133     else unreachable("Could not pattern match widen");
5134     unsigned swz0 = swz0_temp;
5135     assert(swz0 < 4);
5136 
5137     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5138     unsigned swz1_temp = 0;
5139     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
5140     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
5141     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
5142     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
5143     else unreachable("Could not pattern match widen");
5144     unsigned swz1 = swz1_temp;
5145     assert(swz1 < 4);
5146 
5147     unsigned round = ins->roundmode;
5148     assert(round < 4);
5149 
5150     unsigned clamp = ins->outmod;
5151     assert(clamp < 4);
5152 
5153     if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
5154         { unsigned temp = src0; src0 = src1; src1 = temp; }
5155         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5156         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5157         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
5158     }
5159 
5160     unsigned derived_6 = 0;
5161     if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
5162     else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
5163     else unreachable("No pattern match at pos 6");
5164 
5165     return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6);
5166 }
5167 
5168 static inline unsigned
pan_pack_add_var_tex_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5169 pan_pack_add_var_tex_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5170 {
5171 
5172     unsigned update = 0;
5173 
5174     unsigned skip = ins->skip;
5175     assert(skip < 2);
5176 
5177     unsigned lod_mode = 1 - ins->texture.compute_lod;
5178     assert(lod_mode < 2);
5179 
5180     unsigned sample = ins->load_vary.interp_mode;
5181     assert(sample < 2);
5182 
5183     unsigned varying_index = bi_get_immediate(ins, 0);
5184     unsigned texture_index = ins->texture.texture_index;
5185     bi_write_staging_register(clause, ins);
5186     unsigned derived_5 = 0;
5187     if ((sample == 0) && (update == 0)) derived_5 = 0;
5188     else if ((sample == 1) && (update == 1)) derived_5 = 1;
5189     else unreachable("No pattern match at pos 5");
5190 
5191     return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5);
5192 }
5193 
5194 static inline unsigned
pan_pack_fma_lshift_and_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5195 pan_pack_fma_lshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5196 {
5197     unsigned src0 = bi_get_src(ins, regs, 0);
5198     assert((1 << src0) & 0xfb);
5199     unsigned src1 = bi_get_src(ins, regs, 1);
5200     assert((1 << src1) & 0xfb);
5201     unsigned src2 = bi_get_src(ins, regs, 2);
5202 
5203     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5204     unsigned lanes2_temp = 0;
5205     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
5206     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
5207     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
5208     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
5209     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
5210     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
5211     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
5212     else unreachable("Could not pattern match widen");
5213     unsigned lanes2 = lanes2_temp;
5214     assert(lanes2 < 8);
5215 
5216     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
5217     assert(not1 < 2);
5218 
5219     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
5220     assert(not_result < 2);
5221 
5222     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
5223         unsigned derived_9 = 0;
5224         if (lanes2 == 0) derived_9 = 0;
5225         else if (lanes2 == 1) derived_9 = 1;
5226         else if (lanes2 == 2) derived_9 = 2;
5227         else if (lanes2 == 3) derived_9 = 3;
5228         else unreachable("No pattern match at pos 9");
5229 
5230         return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5231     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
5232         unsigned derived_9 = 0;
5233         if (lanes2 == 4) derived_9 = 1;
5234         else if (lanes2 == 5) derived_9 = 2;
5235         else if (lanes2 == 6) derived_9 = 3;
5236         else unreachable("No pattern match at pos 9");
5237 
5238         return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5239     } else {
5240         unreachable("No matching state found in fma_lshift_and_v2i16");
5241     }
5242 }
5243 
5244 static inline unsigned
pan_pack_add_quiet_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5245 pan_pack_add_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5246 {
5247     unsigned src0 = bi_get_src(ins, regs, 0);
5248 
5249     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5250     unsigned swz0_temp = 0;
5251     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5252     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5253     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5254     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5255     else unreachable("Could not pattern match widen");
5256     unsigned swz0 = swz0_temp;
5257     assert(swz0 < 4);
5258 
5259     return 0x3d900 | (src0 << 0) | (swz0 << 4);
5260 }
5261 
5262 static inline unsigned
pan_pack_add_iabs_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5263 pan_pack_add_iabs_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5264 {
5265     unsigned src0 = bi_get_src(ins, regs, 0);
5266 
5267     return 0x3deb0 | (src0 << 0);
5268 }
5269 
5270 static inline unsigned
pan_pack_add_u16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5271 pan_pack_add_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5272 {
5273     unsigned src0 = bi_get_src(ins, regs, 0);
5274 
5275     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5276     unsigned lane0_temp = 0;
5277     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5278     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5279     else unreachable("Could not pattern match widen");
5280     unsigned lane0 = lane0_temp;
5281     assert(lane0 < 2);
5282 
5283     return 0x3ccc8 | (src0 << 0) | (lane0 << 4);
5284 }
5285 
5286 static inline unsigned
pan_pack_fma_csel_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5287 pan_pack_fma_csel_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5288 {
5289     unsigned src0 = bi_get_src(ins, regs, 0);
5290     assert((1 << src0) & 0xfb);
5291     unsigned src1 = bi_get_src(ins, regs, 1);
5292     assert((1 << src1) & 0xfb);
5293     unsigned src2 = bi_get_src(ins, regs, 2);
5294     unsigned src3 = bi_get_src(ins, regs, 3);
5295 
5296     unsigned cmpf_table[] = {
5297         ~0, 2, 3, 1, 0, ~0, ~0
5298     };
5299     unsigned cmpf = cmpf_table[ins->cond];
5300     assert(cmpf < 4);
5301 
5302     if ((cmpf == 2) || (cmpf == 3)) {
5303         { unsigned temp = src0; src0 = src1; src1 = temp; }
5304         if (cmpf == 3) cmpf = 1;
5305         else if (cmpf == 2) cmpf = 0;
5306     }
5307 
5308     unsigned derived_12 = 0;
5309     if (cmpf == 0) derived_12 = 0;
5310     else if (cmpf == 1) derived_12 = 1;
5311     else unreachable("No pattern match at pos 12");
5312 
5313     return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
5314 }
5315 
5316 static inline unsigned
pan_pack_fma_shaddxl_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5317 pan_pack_fma_shaddxl_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5318 {
5319     unsigned src0 = bi_get_src(ins, regs, 0);
5320     assert((1 << src0) & 0xfb);
5321     unsigned src1 = bi_get_src(ins, regs, 1);
5322     assert((1 << src1) & 0xfb);
5323 
5324     unsigned shift = 0;
5325     return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6);
5326 }
5327 
5328 static inline unsigned
pan_pack_add_s32_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5329 pan_pack_add_s32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5330 {
5331     unsigned src0 = bi_get_src(ins, regs, 0);
5332 
5333     unsigned round = ins->roundmode;
5334     assert(round < 8);
5335 
5336     if (round != 4) {
5337         unsigned derived_4 = 0;
5338         if (round == 0) derived_4 = 0;
5339         else if (round == 1) derived_4 = 1;
5340         else if (round == 2) derived_4 = 2;
5341         else if (round == 3) derived_4 = 3;
5342         else unreachable("No pattern match at pos 4");
5343 
5344         return 0x3cbc0 | (src0 << 0) | (derived_4 << 4);
5345     } else if (round == 4) {
5346         return 0x3cd00 | (src0 << 0);
5347     } else {
5348         unreachable("No matching state found in add_s32_to_f32");
5349     }
5350 }
5351 
5352 static inline unsigned
pan_pack_add_fmax_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5353 pan_pack_add_fmax_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5354 {
5355     unsigned src0 = bi_get_src(ins, regs, 0);
5356     unsigned src1 = bi_get_src(ins, regs, 1);
5357 
5358     unsigned abs1 = ins->src_abs[1];
5359     assert(abs1 < 2);
5360 
5361     unsigned neg0 = ins->src_neg[0];
5362     assert(neg0 < 2);
5363 
5364     unsigned neg1 = ins->src_neg[1];
5365     assert(neg1 < 2);
5366 
5367     unsigned clamp = ins->outmod;
5368     assert(clamp < 4);
5369 
5370     unsigned sem = 0;
5371 
5372     unsigned abs0 = ins->src_abs[0];
5373     assert(abs0 < 2);
5374 
5375     return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15);
5376 }
5377 
5378 static inline unsigned
pan_pack_fma_lshift_xor_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5379 pan_pack_fma_lshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5380 {
5381     unsigned src0 = bi_get_src(ins, regs, 0);
5382     assert((1 << src0) & 0xfb);
5383     unsigned src1 = bi_get_src(ins, regs, 1);
5384     assert((1 << src1) & 0xfb);
5385     unsigned src2 = bi_get_src(ins, regs, 2);
5386 
5387     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5388     unsigned lane2_temp = 0;
5389     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5390     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
5391     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
5392     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
5393     else unreachable("Could not pattern match widen");
5394     unsigned lane2 = lane2_temp;
5395     assert(lane2 < 4);
5396 
5397     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
5398     assert(not_result < 2);
5399 
5400     return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13);
5401 }
5402 
5403 static inline unsigned
pan_pack_add_shift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5404 pan_pack_add_shift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5405 {
5406     unsigned src0 = bi_get_src(ins, regs, 0);
5407     unsigned src1 = bi_get_src(ins, regs, 1);
5408     unsigned src2 = bi_get_src(ins, regs, 2);
5409 
5410     return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
5411 }
5412 
5413 static inline unsigned
pan_pack_add_jump(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5414 pan_pack_add_jump(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5415 {
5416     unsigned src0 = bi_get_src(ins, regs, 0);
5417     assert((1 << src0) & 0xf7);
5418 
5419     return 0x6fe34 | (src0 << 6);
5420 }
5421 
5422 static inline unsigned
pan_pack_add_branchz_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5423 pan_pack_add_branchz_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5424 {
5425     unsigned src0 = bi_get_src(ins, regs, 0);
5426     unsigned src1 = bi_get_src(ins, regs, 1);
5427     assert((1 << src1) & 0xf7);
5428 
5429     unsigned cmpf_table[] = {
5430         ~0, 2, 3, 1, 0, ~0, ~0
5431     };
5432     unsigned cmpf = cmpf_table[ins->cond];
5433     assert(cmpf < 4);
5434 
5435     unsigned derived_9 = 0;
5436     if (cmpf == 2) derived_9 = 0;
5437     else if (cmpf == 3) derived_9 = 1;
5438     else if (cmpf == 1) derived_9 = 2;
5439     else if (cmpf == 0) derived_9 = 3;
5440     else unreachable("No pattern match at pos 9");
5441 
5442     return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9);
5443 }
5444 
5445 static inline unsigned
pan_pack_add_branch_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5446 pan_pack_add_branch_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5447 {
5448     unsigned src0 = bi_get_src(ins, regs, 0);
5449     unsigned src1 = bi_get_src(ins, regs, 1);
5450     unsigned src2 = bi_get_src(ins, regs, 2);
5451     assert((1 << src2) & 0xf7);
5452 
5453     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5454     unsigned widen0_temp = 0;
5455     if (widen0_sz == 32) widen0_temp = 0;
5456     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
5457     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
5458     else unreachable("Could not pattern match widen");
5459     unsigned widen0 = widen0_temp;
5460     assert(widen0 < 4);
5461 
5462     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5463     unsigned widen1_temp = 0;
5464     if (widen1_sz == 32) widen1_temp = 0;
5465     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
5466     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
5467     else unreachable("Could not pattern match widen");
5468     unsigned widen1 = widen1_temp;
5469     assert(widen1 < 4);
5470 
5471     unsigned cmpf_table[] = {
5472         ~0, 2, 3, 1, 0, ~0, ~0
5473     };
5474     unsigned cmpf = cmpf_table[ins->cond];
5475     assert(cmpf < 4);
5476 
5477     if (src0 < src1) {
5478         { unsigned temp = src0; src0 = src1; src1 = temp; }
5479         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
5480         if (cmpf == 0) cmpf = 2;
5481         else if (cmpf == 3) cmpf = 1;
5482         else if (cmpf == 2) cmpf = 0;
5483         else if (cmpf == 1) cmpf = 3;
5484     }
5485 
5486     unsigned derived_12 = 0;
5487     if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
5488     else unreachable("No pattern match at pos 12");
5489 
5490     unsigned derived_9 = 0;
5491     if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0;
5492     else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1;
5493     else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2;
5494     else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3;
5495     else unreachable("No pattern match at pos 9");
5496 
5497     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
5498 }
5499 
5500 static inline unsigned
pan_pack_add_mux_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5501 pan_pack_add_mux_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5502 {
5503     unsigned src0 = bi_get_src(ins, regs, 0);
5504     unsigned src1 = bi_get_src(ins, regs, 1);
5505     unsigned src2 = bi_get_src(ins, regs, 2);
5506 
5507     unsigned mux = 1;
5508 
5509     return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9);
5510 }
5511 
5512 static inline unsigned
pan_pack_add_texs_2d_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5513 pan_pack_add_texs_2d_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5514 {
5515     unsigned src0 = bi_get_src(ins, regs, 0);
5516     unsigned src1 = bi_get_src(ins, regs, 1);
5517 
5518     unsigned skip = ins->skip;
5519     assert(skip < 2);
5520 
5521     unsigned lod_mode = 1 - ins->texture.compute_lod;
5522     assert(lod_mode < 2);
5523 
5524     unsigned texture_index = ins->texture.texture_index;
5525     unsigned sampler_index = ins->texture.sampler_index;
5526     bi_write_staging_register(clause, ins);
5527     return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10);
5528 }
5529 
5530 static inline unsigned
pan_pack_add_s8_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5531 pan_pack_add_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5532 {
5533     unsigned src0 = bi_get_src(ins, regs, 0);
5534 
5535     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5536     unsigned lane0_temp = 0;
5537     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5538     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5539     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5540     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5541     else unreachable("Could not pattern match widen");
5542     unsigned lane0 = lane0_temp;
5543     assert(lane0 < 4);
5544 
5545     return 0x3cb40 | (src0 << 0) | (lane0 << 4);
5546 }
5547 
5548 static inline unsigned
pan_pack_fma_u8_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5549 pan_pack_fma_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5550 {
5551     unsigned src0 = bi_get_src(ins, regs, 0);
5552     assert((1 << src0) & 0xfb);
5553 
5554     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5555     unsigned lane0_temp = 0;
5556     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5557     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5558     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5559     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5560     else unreachable("Could not pattern match widen");
5561     unsigned lane0 = lane0_temp;
5562     assert(lane0 < 4);
5563 
5564     return 0x700b48 | (src0 << 0) | (lane0 << 4);
5565 }
5566 
5567 static inline unsigned
pan_pack_add_cube_tsel(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5568 pan_pack_add_cube_tsel(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5569 {
5570     unsigned src0 = bi_get_src(ins, regs, 0);
5571     unsigned src1 = bi_get_src(ins, regs, 1);
5572     unsigned src2 = bi_get_src(ins, regs, 2);
5573 
5574     unsigned neg0 = ins->src_neg[0];
5575     assert(neg0 < 2);
5576 
5577     unsigned neg1 = ins->src_neg[1];
5578     assert(neg1 < 2);
5579 
5580     unsigned derived_9 = 0;
5581     if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0;
5582     else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1;
5583     else unreachable("No pattern match at pos 9");
5584 
5585     return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
5586 }
5587 
5588 static inline unsigned
pan_pack_add_fpow_sc_det_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5589 pan_pack_add_fpow_sc_det_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5590 {
5591     unsigned src0 = bi_get_src(ins, regs, 0);
5592     assert((1 << src0) & 0xf7);
5593     unsigned src1 = bi_get_src(ins, regs, 1);
5594     assert((1 << src1) & 0xf7);
5595 
5596     unsigned func = 0;
5597 
5598     return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7);
5599 }
5600 
5601 static inline unsigned
pan_pack_fma_mkvec_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5602 pan_pack_fma_mkvec_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5603 {
5604     unsigned src0 = bi_get_src(ins, regs, 0);
5605     assert((1 << src0) & 0xfb);
5606     unsigned src1 = bi_get_src(ins, regs, 1);
5607     assert((1 << src1) & 0xfb);
5608     unsigned src2 = bi_get_src(ins, regs, 2);
5609     unsigned src3 = bi_get_src(ins, regs, 3);
5610 
5611     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5612     unsigned lane0_temp = 0;
5613     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5614     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 1;
5615     else unreachable("Could not pattern match widen");
5616     unsigned lane0 = lane0_temp;
5617     assert(lane0 < 2);
5618 
5619     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5620     unsigned lane1_temp = 0;
5621     if (lane1_sz == 8 && ins->swizzle[1][0] == 0) lane1_temp = 0;
5622     else if (lane1_sz == 8 && ins->swizzle[1][0] == 2) lane1_temp = 1;
5623     else unreachable("Could not pattern match widen");
5624     unsigned lane1 = lane1_temp;
5625     assert(lane1 < 2);
5626 
5627     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5628     unsigned lane2_temp = 0;
5629     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5630     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
5631     else unreachable("Could not pattern match widen");
5632     unsigned lane2 = lane2_temp;
5633     assert(lane2 < 2);
5634 
5635     unsigned lane3_sz = nir_alu_type_get_type_size(ins->src_types[3]);
5636     unsigned lane3_temp = 0;
5637     if (lane3_sz == 8 && ins->swizzle[3][0] == 0) lane3_temp = 0;
5638     else if (lane3_sz == 8 && ins->swizzle[3][0] == 2) lane3_temp = 1;
5639     else unreachable("Could not pattern match widen");
5640     unsigned lane3 = lane3_temp;
5641     assert(lane3 < 2);
5642 
5643     return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15);
5644 }
5645 
5646 static inline unsigned
pan_pack_add_fmin_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5647 pan_pack_add_fmin_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5648 {
5649     unsigned src0 = bi_get_src(ins, regs, 0);
5650     unsigned src1 = bi_get_src(ins, regs, 1);
5651 
5652     unsigned abs1 = ins->src_abs[1];
5653     assert(abs1 < 2);
5654 
5655     unsigned neg0 = ins->src_neg[0];
5656     assert(neg0 < 2);
5657 
5658     unsigned neg1 = ins->src_neg[1];
5659     assert(neg1 < 2);
5660 
5661     unsigned clamp = ins->outmod;
5662     assert(clamp < 4);
5663 
5664     unsigned sem = 0;
5665 
5666     unsigned abs0 = ins->src_abs[0];
5667     assert(abs0 < 2);
5668 
5669     return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15);
5670 }
5671 
5672 static inline unsigned
pan_pack_fma_fcmp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5673 pan_pack_fma_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5674 {
5675     unsigned src0 = bi_get_src(ins, regs, 0);
5676     assert((1 << src0) & 0xfb);
5677     unsigned src1 = bi_get_src(ins, regs, 1);
5678     assert((1 << src1) & 0xfb);
5679 
5680     unsigned abs0 = ins->src_abs[0];
5681     assert(abs0 < 2);
5682 
5683     unsigned abs1 = ins->src_abs[1];
5684     assert(abs1 < 2);
5685 
5686     unsigned cmpf_table[] = {
5687         ~0, 4, 5, 2, 1, 0, 3
5688     };
5689     unsigned cmpf = cmpf_table[ins->cond];
5690     assert(cmpf < 8);
5691 
5692     unsigned neg0 = ins->src_neg[0];
5693     assert(neg0 < 2);
5694 
5695     unsigned neg1 = ins->src_neg[1];
5696     assert(neg1 < 2);
5697 
5698     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5699     unsigned swz0_temp = 0;
5700     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5701     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5702     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5703     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5704     else unreachable("Could not pattern match widen");
5705     unsigned swz0 = swz0_temp;
5706     assert(swz0 < 4);
5707 
5708     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5709     unsigned swz1_temp = 0;
5710     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
5711     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
5712     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
5713     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
5714     else unreachable("Could not pattern match widen");
5715     unsigned swz1 = swz1_temp;
5716     assert(swz1 < 4);
5717 
5718     unsigned result_type = 2;
5719 
5720     if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
5721         { unsigned temp = src0; src0 = src1; src1 = temp; }
5722         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5723         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5724         { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
5725         if (cmpf == 1) cmpf = 4;
5726         else if (cmpf == 5) cmpf = 2;
5727         else if (cmpf == 4) cmpf = 1;
5728         else if (cmpf == 2) cmpf = 5;
5729     }
5730 
5731     unsigned derived_6 = 0;
5732     if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
5733     else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
5734     else unreachable("No pattern match at pos 6");
5735 
5736     unsigned derived_13 = 0;
5737     if (cmpf == 0) derived_13 = 0;
5738     else if (cmpf == 1) derived_13 = 1;
5739     else if (cmpf == 2) derived_13 = 2;
5740     else if (cmpf == 3) derived_13 = 3;
5741     else if (cmpf == 4) derived_13 = 4;
5742     else if (cmpf == 5) derived_13 = 5;
5743     else if (cmpf == 6) derived_13 = 6;
5744     else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7;
5745     else unreachable("No pattern match at pos 13");
5746 
5747     return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13);
5748 }
5749 
5750 static inline unsigned
pan_pack_add_acmpxchg_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5751 pan_pack_add_acmpxchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5752 {
5753     unsigned src0 = bi_get_src(ins, regs, 1);
5754     unsigned src1 = bi_get_src(ins, regs, 2);
5755 
5756     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
5757     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
5758     assert(seg < 2);
5759 
5760     bi_read_staging_register(clause, ins);
5761     assert(ins->src[0] == ins->dest);
5762     return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9);
5763 }
5764 
5765 static inline unsigned
pan_pack_fma_rshift_and_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5766 pan_pack_fma_rshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5767 {
5768     unsigned src0 = bi_get_src(ins, regs, 0);
5769     assert((1 << src0) & 0xfb);
5770     unsigned src1 = bi_get_src(ins, regs, 1);
5771     assert((1 << src1) & 0xfb);
5772     unsigned src2 = bi_get_src(ins, regs, 2);
5773 
5774     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5775     unsigned lanes2_temp = 0;
5776     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
5777     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
5778     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
5779     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
5780     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
5781     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
5782     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
5783     else unreachable("Could not pattern match widen");
5784     unsigned lanes2 = lanes2_temp;
5785     assert(lanes2 < 8);
5786 
5787     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
5788     assert(not1 < 2);
5789 
5790     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
5791     assert(not_result < 2);
5792 
5793     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
5794         unsigned derived_9 = 0;
5795         if (lanes2 == 0) derived_9 = 0;
5796         else if (lanes2 == 1) derived_9 = 1;
5797         else if (lanes2 == 2) derived_9 = 2;
5798         else if (lanes2 == 3) derived_9 = 3;
5799         else unreachable("No pattern match at pos 9");
5800 
5801         return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5802     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
5803         unsigned derived_9 = 0;
5804         if (lanes2 == 4) derived_9 = 1;
5805         else if (lanes2 == 5) derived_9 = 2;
5806         else if (lanes2 == 6) derived_9 = 3;
5807         else unreachable("No pattern match at pos 9");
5808 
5809         return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5810     } else {
5811         unreachable("No matching state found in fma_rshift_and_v2i16");
5812     }
5813 }
5814 
5815 static inline unsigned
pan_pack_add_fpow_sc_det_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5816 pan_pack_add_fpow_sc_det_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5817 {
5818     unsigned src0 = bi_get_src(ins, regs, 0);
5819     assert((1 << src0) & 0xf7);
5820     unsigned src1 = bi_get_src(ins, regs, 1);
5821     assert((1 << src1) & 0xf7);
5822 
5823     unsigned func = 0;
5824 
5825     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5826     unsigned lane1_temp = 0;
5827     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
5828     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
5829     else if (lane1_sz == 32) lane1_temp = 2;
5830     else unreachable("Could not pattern match widen");
5831     unsigned lane1 = lane1_temp;
5832     assert(lane1 < 4);
5833 
5834     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5835     unsigned lane0_temp = 0;
5836     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5837     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5838     else unreachable("Could not pattern match widen");
5839     unsigned lane0 = lane0_temp;
5840     assert(lane0 < 2);
5841 
5842     if ((func == 0) || (func == 1)) {
5843         unsigned derived_6 = 0;
5844         if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0;
5845         else if (lane1 == 1) derived_6 = 1;
5846         else unreachable("No pattern match at pos 6");
5847 
5848         unsigned derived_8 = 0;
5849         if (func == 0) derived_8 = 0;
5850         else if (func == 1) derived_8 = 1;
5851         else unreachable("No pattern match at pos 8");
5852 
5853         return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8);
5854     } else if (((func == 2) || (func == 3)) && (lane1 == 2)) {
5855         unsigned derived_8 = 0;
5856         if (func == 2) derived_8 = 0;
5857         else if (func == 3) derived_8 = 1;
5858         else unreachable("No pattern match at pos 8");
5859 
5860         return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8);
5861     } else {
5862         unreachable("No matching state found in add_fpow_sc_det_f16");
5863     }
5864 }
5865 
5866 static inline unsigned
pan_pack_add_iadd_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5867 pan_pack_add_iadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5868 {
5869     unsigned src0 = bi_get_src(ins, regs, 0);
5870     unsigned src1 = bi_get_src(ins, regs, 1);
5871 
5872     unsigned saturate = 0;
5873 
5874     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5875     unsigned lanes0_temp = 0;
5876     if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
5877     else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
5878     else unreachable("Could not pattern match widen");
5879     unsigned lanes0 = lanes0_temp;
5880     assert(lanes0 < 2);
5881 
5882     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5883     unsigned lanes1_temp = 0;
5884     if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
5885     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
5886     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
5887     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
5888     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
5889     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
5890     else unreachable("Could not pattern match widen");
5891     unsigned lanes1 = lanes1_temp;
5892     assert(lanes1 < 8);
5893 
5894     if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
5895         unsigned derived_9 = 0;
5896         if (lanes1 == 0) derived_9 = 0;
5897         else if (lanes1 == 1) derived_9 = 1;
5898         else unreachable("No pattern match at pos 9");
5899 
5900         unsigned derived_10 = 0;
5901         if (lanes0 == 0) derived_10 = 0;
5902         else if (lanes0 == 1) derived_10 = 1;
5903         else unreachable("No pattern match at pos 10");
5904 
5905         return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10);
5906     } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
5907         unsigned derived_9 = 0;
5908         if (lanes1 == 2) derived_9 = 0;
5909         else if (lanes1 == 3) derived_9 = 1;
5910         else unreachable("No pattern match at pos 9");
5911 
5912         return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
5913     } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
5914         unsigned derived_9 = 0;
5915         if (lanes1 == 4) derived_9 = 0;
5916         else if (lanes1 == 5) derived_9 = 1;
5917         else unreachable("No pattern match at pos 9");
5918 
5919         return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
5920     } else {
5921         unreachable("No matching state found in add_iadd_v2s16");
5922     }
5923 }
5924 
5925 static inline unsigned
pan_pack_fma_arshift_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5926 pan_pack_fma_arshift_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5927 {
5928     unsigned src0 = bi_get_src(ins, regs, 0);
5929     assert((1 << src0) & 0xfb);
5930     unsigned src1 = bi_get_src(ins, regs, 1);
5931     assert((1 << src1) & 0x8);
5932     unsigned src2 = bi_get_src(ins, regs, 2);
5933 
5934     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5935     unsigned lane2_temp = 0;
5936     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5937     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
5938     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
5939     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
5940     else unreachable("Could not pattern match widen");
5941     unsigned lane2 = lane2_temp;
5942     assert(lane2 < 4);
5943 
5944     return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9);
5945 }
5946 
5947 static inline unsigned
pan_pack_add_store_i128(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5948 pan_pack_add_store_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5949 {
5950     unsigned src0 = bi_get_src(ins, regs, 1);
5951     unsigned src1 = bi_get_src(ins, regs, 2);
5952 
5953     assert(ins->segment);
5954     unsigned seg = ins->segment;
5955     assert(seg < 8);
5956 
5957     bi_read_staging_register(clause, ins);
5958     return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6);
5959 }
5960 
5961 static inline unsigned
pan_pack_add_fpclass_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5962 pan_pack_add_fpclass_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5963 {
5964     unsigned src0 = bi_get_src(ins, regs, 0);
5965     assert((1 << src0) & 0xf7);
5966 
5967     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5968     unsigned lane0_temp = 0;
5969     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5970     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5971     else unreachable("Could not pattern match widen");
5972     unsigned lane0 = lane0_temp;
5973     assert(lane0 < 2);
5974 
5975     return 0x67c40 | (src0 << 0) | (lane0 << 3);
5976 }
5977 
5978 static inline unsigned
pan_pack_add_u8_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5979 pan_pack_add_u8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5980 {
5981     unsigned src0 = bi_get_src(ins, regs, 0);
5982 
5983     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5984     unsigned lane0_temp = 0;
5985     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5986     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5987     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5988     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5989     else unreachable("Could not pattern match widen");
5990     unsigned lane0 = lane0_temp;
5991     assert(lane0 < 4);
5992 
5993     return 0x3cb88 | (src0 << 0) | (lane0 << 4);
5994 }
5995 
5996 static inline unsigned
pan_pack_fma_nop_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5997 pan_pack_fma_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5998 {
5999 
6000     return 0x701963;
6001 }
6002 
6003 static inline unsigned
pan_pack_add_lea_attr_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6004 pan_pack_add_lea_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6005 {
6006     unsigned src0 = bi_get_src(ins, regs, 0);
6007     unsigned src1 = bi_get_src(ins, regs, 1);
6008     unsigned src2 = bi_get_src(ins, regs, 2);
6009 
6010     unsigned register_format_temp = 0;
6011     if (ins->format == nir_type_float16) register_format_temp = 0;
6012     else if (ins->format == nir_type_float32) register_format_temp = 1;
6013     else if (ins->format == nir_type_int32) register_format_temp = 2;
6014     else if (ins->format == nir_type_uint32) register_format_temp = 3;
6015     else if (ins->format == nir_type_int16) register_format_temp = 4;
6016     else if (ins->format == nir_type_uint16) register_format_temp = 5;
6017     else if (ins->format == nir_type_float64) register_format_temp = 6;
6018     else if (ins->format == nir_type_int64) register_format_temp = 7;
6019     else unreachable("Could not pattern match register format");
6020     unsigned register_format = register_format_temp;
6021     assert(register_format < 16);
6022 
6023     bi_write_staging_register(clause, ins);
6024     if (register_format != 8) {
6025         unsigned derived_11 = 0;
6026         if (register_format == 0) derived_11 = 0;
6027         else if (register_format == 1) derived_11 = 1;
6028         else if (register_format == 2) derived_11 = 2;
6029         else if (register_format == 3) derived_11 = 3;
6030         else if (register_format == 4) derived_11 = 4;
6031         else if (register_format == 5) derived_11 = 5;
6032         else if (register_format == 6) derived_11 = 6;
6033         else if (register_format == 7) derived_11 = 7;
6034         else unreachable("No pattern match at pos 11");
6035 
6036         return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11);
6037     } else if (register_format == 8) {
6038         return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6);
6039     } else {
6040         unreachable("No matching state found in add_lea_attr_tex");
6041     }
6042 }
6043 
6044 static inline unsigned
pan_pack_fma_mkvec_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6045 pan_pack_fma_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6046 {
6047     unsigned src0 = bi_get_src(ins, regs, 0);
6048     assert((1 << src0) & 0xfb);
6049     unsigned src1 = bi_get_src(ins, regs, 1);
6050     assert((1 << src1) & 0xfb);
6051 
6052     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6053     unsigned lane0_temp = 0;
6054     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6055     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6056     else unreachable("Could not pattern match widen");
6057     unsigned lane0 = lane0_temp;
6058     assert(lane0 < 2);
6059 
6060     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6061     unsigned lane1_temp = 0;
6062     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
6063     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
6064     else unreachable("Could not pattern match widen");
6065     unsigned lane1 = lane1_temp;
6066     assert(lane1 < 2);
6067 
6068     return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7);
6069 }
6070 
6071 static inline unsigned
pan_pack_fma_fadd_lscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6072 pan_pack_fma_fadd_lscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6073 {
6074     unsigned src0 = bi_get_src(ins, regs, 0);
6075     assert((1 << src0) & 0xfb);
6076     unsigned src1 = bi_get_src(ins, regs, 1);
6077     assert((1 << src1) & 0xfb);
6078 
6079     unsigned abs0 = ins->src_abs[0];
6080     assert(abs0 < 2);
6081 
6082     unsigned neg0 = ins->src_neg[0];
6083     assert(neg0 < 2);
6084 
6085     unsigned abs1 = ins->src_abs[1];
6086     assert(abs1 < 2);
6087 
6088     unsigned neg1 = ins->src_neg[1];
6089     assert(neg1 < 2);
6090 
6091     return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9);
6092 }
6093 
6094 static inline unsigned
pan_pack_add_v2f16_to_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6095 pan_pack_add_v2f16_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6096 {
6097     unsigned src0 = bi_get_src(ins, regs, 0);
6098 
6099     unsigned round = ins->roundmode;
6100     assert(round < 8);
6101 
6102     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6103     unsigned swz0_temp = 0;
6104     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6105     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6106     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6107     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6108     else unreachable("Could not pattern match widen");
6109     unsigned swz0 = swz0_temp;
6110     assert(swz0 < 4);
6111 
6112     if (round != 4) {
6113         unsigned derived_4 = 0;
6114         if (round == 0) derived_4 = 0;
6115         else if (round == 1) derived_4 = 1;
6116         else if (round == 2) derived_4 = 2;
6117         else if (round == 3) derived_4 = 3;
6118         else unreachable("No pattern match at pos 4");
6119 
6120         return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
6121     } else if (round == 4) {
6122         return 0x3ca88 | (src0 << 0) | (swz0 << 4);
6123     } else {
6124         unreachable("No matching state found in add_v2f16_to_v2u16");
6125     }
6126 }
6127 
6128 static inline unsigned
pan_pack_fma_fcmp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6129 pan_pack_fma_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6130 {
6131     unsigned src0 = bi_get_src(ins, regs, 0);
6132     assert((1 << src0) & 0xfb);
6133     unsigned src1 = bi_get_src(ins, regs, 1);
6134     assert((1 << src1) & 0xfb);
6135 
6136     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6137     unsigned widen0_temp = 0;
6138     if (widen0_sz == 32) widen0_temp = 0;
6139     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6140     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6141     else unreachable("Could not pattern match widen");
6142     unsigned widen0 = widen0_temp;
6143     assert(widen0 < 4);
6144 
6145     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6146     unsigned widen1_temp = 0;
6147     if (widen1_sz == 32) widen1_temp = 0;
6148     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
6149     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
6150     else unreachable("Could not pattern match widen");
6151     unsigned widen1 = widen1_temp;
6152     assert(widen1 < 4);
6153 
6154     unsigned abs1 = ins->src_abs[1];
6155     assert(abs1 < 2);
6156 
6157     unsigned neg0 = ins->src_neg[0];
6158     assert(neg0 < 2);
6159 
6160     unsigned neg1 = ins->src_neg[1];
6161     assert(neg1 < 2);
6162 
6163     unsigned abs0 = ins->src_abs[0];
6164     assert(abs0 < 2);
6165 
6166     unsigned cmpf_table[] = {
6167         ~0, 4, 5, 2, 1, 0, 3
6168     };
6169     unsigned cmpf = cmpf_table[ins->cond];
6170     assert(cmpf < 8);
6171 
6172     unsigned result_type = 2;
6173 
6174     if ((widen0 == 2) && (widen1 == 1)) {
6175         { unsigned temp = src0; src0 = src1; src1 = temp; }
6176         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
6177         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
6178         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
6179         if (cmpf == 1) cmpf = 4;
6180         else if (cmpf == 5) cmpf = 2;
6181         else if (cmpf == 4) cmpf = 1;
6182         else if (cmpf == 2) cmpf = 5;
6183     }
6184 
6185     unsigned derived_9 = 0;
6186     if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
6187     else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
6188     else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
6189     else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
6190     else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
6191     else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
6192     else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
6193     else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
6194     else unreachable("No pattern match at pos 9");
6195 
6196     return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9);
6197 }
6198 
6199 static inline unsigned
pan_pack_add_fpclass_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6200 pan_pack_add_fpclass_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6201 {
6202     unsigned src0 = bi_get_src(ins, regs, 0);
6203     assert((1 << src0) & 0xf7);
6204 
6205     return 0x67c50 | (src0 << 0);
6206 }
6207 
6208 static inline unsigned
pan_pack_add_ld_attr(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6209 pan_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6210 {
6211     unsigned src0 = bi_get_src(ins, regs, 0);
6212     unsigned src1 = bi_get_src(ins, regs, 1);
6213     unsigned src2 = bi_get_src(ins, regs, 2);
6214 
6215     unsigned register_format_temp = 0;
6216     if (ins->format == nir_type_float16) register_format_temp = 0;
6217     else if (ins->format == nir_type_float32) register_format_temp = 1;
6218     else if (ins->format == nir_type_int32) register_format_temp = 2;
6219     else if (ins->format == nir_type_uint32) register_format_temp = 3;
6220     else if (ins->format == nir_type_int16) register_format_temp = 4;
6221     else if (ins->format == nir_type_uint16) register_format_temp = 5;
6222     else if (ins->format == nir_type_float64) register_format_temp = 6;
6223     else if (ins->format == nir_type_int64) register_format_temp = 7;
6224     else unreachable("Could not pattern match register format");
6225     unsigned register_format = register_format_temp;
6226     assert(register_format < 16);
6227 
6228     unsigned vecsize = ins->vector_channels - 1;
6229     assert(vecsize < 4);
6230 
6231     bi_write_staging_register(clause, ins);
6232     if (register_format != 8) {
6233         unsigned derived_13 = 0;
6234         if (register_format == 0) derived_13 = 0;
6235         else if (register_format == 1) derived_13 = 1;
6236         else if (register_format == 2) derived_13 = 2;
6237         else if (register_format == 3) derived_13 = 3;
6238         else if (register_format == 4) derived_13 = 4;
6239         else if (register_format == 5) derived_13 = 5;
6240         else if (register_format == 6) derived_13 = 6;
6241         else if (register_format == 7) derived_13 = 7;
6242         else unreachable("No pattern match at pos 13");
6243 
6244         return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13);
6245     } else if (register_format == 8) {
6246         return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11);
6247     } else {
6248         unreachable("No matching state found in add_ld_attr");
6249     }
6250 }
6251 
6252 static inline unsigned
pan_pack_fma_rshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6253 pan_pack_fma_rshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6254 {
6255     unsigned src0 = bi_get_src(ins, regs, 0);
6256     assert((1 << src0) & 0xfb);
6257     unsigned src1 = bi_get_src(ins, regs, 1);
6258     assert((1 << src1) & 0xfb);
6259     unsigned src2 = bi_get_src(ins, regs, 2);
6260 
6261     unsigned bytes2 = 0;
6262 
6263     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6264     unsigned lane2_temp = 0;
6265     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
6266     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
6267     else unreachable("Could not pattern match widen");
6268     unsigned lane2 = lane2_temp;
6269     assert(lane2 < 2);
6270 
6271     unsigned result_word = 0;
6272 
6273     return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
6274 }
6275 
6276 static inline unsigned
pan_pack_add_branchz_u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6277 pan_pack_add_branchz_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6278 {
6279     unsigned src0 = bi_get_src(ins, regs, 0);
6280     unsigned src1 = bi_get_src(ins, regs, 1);
6281     assert((1 << src1) & 0xf7);
6282 
6283     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6284     unsigned widen0_temp = 0;
6285     if (widen0_sz == 32) widen0_temp = 0;
6286     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6287     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6288     else unreachable("Could not pattern match widen");
6289     unsigned widen0 = widen0_temp;
6290     assert(widen0 < 4);
6291 
6292     unsigned cmpf_table[] = {
6293         ~0, 2, 3, 1, 0, ~0, ~0
6294     };
6295     unsigned cmpf = cmpf_table[ins->cond];
6296     assert(cmpf < 4);
6297 
6298     unsigned derived_4 = 0;
6299     if (widen0 == 2) derived_4 = 1;
6300     else if (widen0 == 1) derived_4 = 2;
6301     else unreachable("No pattern match at pos 4");
6302 
6303     unsigned derived_9 = 0;
6304     if (cmpf == 2) derived_9 = 0;
6305     else if (cmpf == 3) derived_9 = 1;
6306     else if (cmpf == 1) derived_9 = 2;
6307     else if (cmpf == 0) derived_9 = 3;
6308     else unreachable("No pattern match at pos 9");
6309 
6310     return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9);
6311 }
6312 
6313 static inline unsigned
pan_pack_fma_atom_c1_return_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6314 pan_pack_fma_atom_c1_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6315 {
6316     unsigned src0 = bi_get_src(ins, regs, 0);
6317     assert((1 << src0) & 0xf3);
6318     unsigned src1 = bi_get_src(ins, regs, 1);
6319     assert((1 << src1) & 0xf3);
6320 
6321     unsigned atom_opc = 2;
6322 
6323     return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
6324 }
6325 
6326 static inline unsigned
pan_pack_add_store_i48(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6327 pan_pack_add_store_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6328 {
6329     unsigned src0 = bi_get_src(ins, regs, 1);
6330     unsigned src1 = bi_get_src(ins, regs, 2);
6331 
6332     assert(ins->segment);
6333     unsigned seg = ins->segment;
6334     assert(seg < 8);
6335 
6336     bi_read_staging_register(clause, ins);
6337     return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6);
6338 }
6339 
6340 static inline unsigned
pan_pack_fma_rshift_and_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6341 pan_pack_fma_rshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6342 {
6343     unsigned src0 = bi_get_src(ins, regs, 0);
6344     assert((1 << src0) & 0xfb);
6345     unsigned src1 = bi_get_src(ins, regs, 1);
6346     assert((1 << src1) & 0xfb);
6347     unsigned src2 = bi_get_src(ins, regs, 2);
6348 
6349     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6350     unsigned lanes2_temp = 0;
6351     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
6352     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
6353     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
6354     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
6355     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
6356     else unreachable("Could not pattern match widen");
6357     unsigned lanes2 = lanes2_temp;
6358     assert(lanes2 < 8);
6359 
6360     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
6361     assert(not1 < 2);
6362 
6363     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
6364     assert(not_result < 2);
6365 
6366     if (lanes2 != 0) {
6367         unsigned derived_9 = 0;
6368         if (lanes2 == 1) derived_9 = 0;
6369         else if (lanes2 == 2) derived_9 = 1;
6370         else if (lanes2 == 3) derived_9 = 2;
6371         else if (lanes2 == 4) derived_9 = 3;
6372         else unreachable("No pattern match at pos 9");
6373 
6374         return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
6375     } else if (lanes2 == 0) {
6376         return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
6377     } else {
6378         unreachable("No matching state found in fma_rshift_and_v4i8");
6379     }
6380 }
6381 
6382 static inline unsigned
pan_pack_add_frsq_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6383 pan_pack_add_frsq_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6384 {
6385     unsigned src0 = bi_get_src(ins, regs, 0);
6386     assert((1 << src0) & 0xf7);
6387 
6388     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6389     unsigned widen0_temp = 0;
6390     if (widen0_sz == 32) widen0_temp = 0;
6391     else unreachable("Could not pattern match widen");
6392     unsigned widen0 = widen0_temp;
6393     assert(widen0 < 4);
6394 
6395     unsigned neg = ins->src_neg[0];
6396     assert(neg < 2);
6397 
6398     unsigned abs0 = ins->src_abs[0];
6399     assert(abs0 < 2);
6400 
6401     unsigned derived_6 = 0;
6402     if (widen0 == 0) derived_6 = 0;
6403     else unreachable("No pattern match at pos 6");
6404 
6405     return 0x66100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6);
6406 }
6407 
6408 static inline unsigned
pan_pack_add_icmpf_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6409 pan_pack_add_icmpf_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6410 {
6411     unsigned src0 = bi_get_src(ins, regs, 0);
6412     unsigned src1 = bi_get_src(ins, regs, 1);
6413     unsigned src2 = bi_get_src(ins, regs, 2);
6414 
6415     return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
6416 }
6417 
6418 static inline unsigned
pan_pack_add_lea_tex_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6419 pan_pack_add_lea_tex_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6420 {
6421     unsigned src0 = bi_get_src(ins, regs, 0);
6422     unsigned src1 = bi_get_src(ins, regs, 1);
6423 
6424     unsigned format = 1;
6425 
6426     unsigned texture_index = ins->texture.texture_index;
6427     bi_write_staging_register(clause, ins);
6428     return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6);
6429 }
6430 
6431 static inline unsigned
pan_pack_add_f16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6432 pan_pack_add_f16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6433 {
6434     unsigned src0 = bi_get_src(ins, regs, 0);
6435 
6436     unsigned round = ins->roundmode;
6437     assert(round < 8);
6438 
6439     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6440     unsigned lane0_temp = 0;
6441     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6442     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6443     else unreachable("Could not pattern match widen");
6444     unsigned lane0 = lane0_temp;
6445     assert(lane0 < 2);
6446 
6447     if (round != 4) {
6448         unsigned derived_4 = 0;
6449         if (round == 0) derived_4 = 0;
6450         else if (round == 1) derived_4 = 1;
6451         else if (round == 2) derived_4 = 2;
6452         else if (round == 3) derived_4 = 3;
6453         else unreachable("No pattern match at pos 4");
6454 
6455         return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4);
6456     } else if (round == 4) {
6457         return 0x3cc48 | (src0 << 0) | (lane0 << 5);
6458     } else {
6459         unreachable("No matching state found in add_f16_to_u32");
6460     }
6461 }
6462 
6463 static inline unsigned
pan_pack_add_isub_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6464 pan_pack_add_isub_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6465 {
6466     unsigned src0 = bi_get_src(ins, regs, 0);
6467     unsigned src1 = bi_get_src(ins, regs, 1);
6468 
6469     unsigned saturate = 0;
6470 
6471     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6472     unsigned lanes1_temp = 0;
6473     if (lanes1_sz == 32) lanes1_temp = 0;
6474     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
6475     else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
6476     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
6477     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
6478     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
6479     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
6480     else unreachable("Could not pattern match widen");
6481     unsigned lanes1 = lanes1_temp;
6482     assert(lanes1 < 8);
6483 
6484     if (lanes1 == 0) {
6485         unsigned derived_7 = 0;
6486         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6487         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6488         else unreachable("No pattern match at pos 7");
6489 
6490         return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
6491     } else if ((lanes1 == 1) || (lanes1 == 2)) {
6492         unsigned derived_7 = 0;
6493         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6494         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6495         else unreachable("No pattern match at pos 7");
6496 
6497         unsigned derived_9 = 0;
6498         if (lanes1 == 1) derived_9 = 0;
6499         else if (lanes1 == 2) derived_9 = 1;
6500         else unreachable("No pattern match at pos 9");
6501 
6502         return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
6503     } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
6504         unsigned derived_7 = 0;
6505         if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6506         else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6507         else unreachable("No pattern match at pos 7");
6508 
6509         unsigned derived_9 = 0;
6510         if (lanes1 == 3) derived_9 = 0;
6511         else if (lanes1 == 4) derived_9 = 1;
6512         else if (lanes1 == 5) derived_9 = 2;
6513         else if (lanes1 == 6) derived_9 = 3;
6514         else unreachable("No pattern match at pos 9");
6515 
6516         return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
6517     } else {
6518         unreachable("No matching state found in add_isub_u32");
6519     }
6520 }
6521 
6522 static inline unsigned
pan_pack_fma_v2f32_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6523 pan_pack_fma_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6524 {
6525     unsigned src0 = bi_get_src(ins, regs, 0);
6526     assert((1 << src0) & 0xfb);
6527     unsigned src1 = bi_get_src(ins, regs, 1);
6528     assert((1 << src1) & 0xfb);
6529 
6530     unsigned abs0 = ins->src_abs[0];
6531     assert(abs0 < 2);
6532 
6533     unsigned abs1 = ins->src_abs[1];
6534     assert(abs1 < 2);
6535 
6536     unsigned neg0 = ins->src_neg[0];
6537     assert(neg0 < 2);
6538 
6539     unsigned neg1 = ins->src_neg[1];
6540     assert(neg1 < 2);
6541 
6542     unsigned clamp = ins->outmod;
6543     assert(clamp < 4);
6544 
6545     unsigned round = ins->roundmode;
6546     assert(round < 8);
6547 
6548     unsigned derived_6 = 0;
6549     if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0;
6550     else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1;
6551     else unreachable("No pattern match at pos 6");
6552 
6553     unsigned derived_7 = 0;
6554     if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0;
6555     else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1;
6556     else unreachable("No pattern match at pos 7");
6557 
6558     return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7);
6559 }
6560 
6561 static inline unsigned
pan_pack_add_u8_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6562 pan_pack_add_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6563 {
6564     unsigned src0 = bi_get_src(ins, regs, 0);
6565 
6566     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6567     unsigned lane0_temp = 0;
6568     if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6569     else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6570     else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
6571     else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
6572     else unreachable("Could not pattern match widen");
6573     unsigned lane0 = lane0_temp;
6574     assert(lane0 < 4);
6575 
6576     return 0x3cb48 | (src0 << 0) | (lane0 << 4);
6577 }
6578 
6579 static inline unsigned
pan_pack_add_kaboom(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6580 pan_pack_add_kaboom(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6581 {
6582     unsigned src0 = bi_get_src(ins, regs, 0);
6583 
6584     return 0xd7858 | (src0 << 0);
6585 }
6586 
6587 static inline unsigned
pan_pack_fma_mov_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6588 pan_pack_fma_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6589 {
6590     unsigned src0 = bi_get_src(ins, regs, 0);
6591     assert((1 << src0) & 0xfb);
6592 
6593     return 0x701968 | (src0 << 0);
6594 }
6595 
6596 static inline unsigned
pan_pack_add_nop_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6597 pan_pack_add_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6598 {
6599 
6600     return 0x3d964;
6601 }
6602 
6603 static inline unsigned
pan_pack_fma_frexpe_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6604 pan_pack_fma_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6605 {
6606     unsigned src0 = bi_get_src(ins, regs, 0);
6607     assert((1 << src0) & 0xfb);
6608 
6609     unsigned neg = ins->src_neg[0];
6610     assert(neg < 2);
6611 
6612     unsigned sqrt = 0;
6613 
6614     unsigned log = 1;
6615 
6616     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6617     unsigned swz0_temp = 0;
6618     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6619     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6620     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6621     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6622     else unreachable("Could not pattern match widen");
6623     unsigned swz0 = swz0_temp;
6624     assert(swz0 < 4);
6625 
6626     if (log == 0) {
6627         return 0x701c00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3);
6628     } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
6629         return 0x701e00 | (src0 << 0) | (swz0 << 3);
6630     } else {
6631         unreachable("No matching state found in fma_frexpe_v2f16");
6632     }
6633 }
6634 
6635 static inline unsigned
pan_pack_add_store_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6636 pan_pack_add_store_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6637 {
6638     unsigned src0 = bi_get_src(ins, regs, 1);
6639     unsigned src1 = bi_get_src(ins, regs, 2);
6640 
6641     assert(ins->segment);
6642     unsigned seg = ins->segment;
6643     assert(seg < 8);
6644 
6645     bi_read_staging_register(clause, ins);
6646     return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6);
6647 }
6648 
6649 static inline unsigned
pan_pack_add_frexpm_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6650 pan_pack_add_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6651 {
6652     unsigned src0 = bi_get_src(ins, regs, 0);
6653 
6654     unsigned abs0 = ins->src_abs[0];
6655     assert(abs0 < 2);
6656 
6657     unsigned sqrt = 0;
6658 
6659     unsigned log = 1;
6660 
6661     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6662     unsigned swz0_temp = 0;
6663     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6664     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6665     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6666     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6667     else unreachable("Could not pattern match widen");
6668     unsigned swz0 = swz0_temp;
6669     assert(swz0 < 4);
6670 
6671     unsigned neg0 = ins->src_neg[0];
6672     assert(neg0 < 2);
6673 
6674     if ((log == 0) && (neg0 == 0)) {
6675         return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3);
6676     } else if ((log == 1) && (sqrt == 0)) {
6677         return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7);
6678     } else {
6679         unreachable("No matching state found in add_frexpm_v2f16");
6680     }
6681 }
6682 
6683 static inline unsigned
pan_pack_add_branchz_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6684 pan_pack_add_branchz_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6685 {
6686     unsigned src0 = bi_get_src(ins, regs, 0);
6687     unsigned src1 = bi_get_src(ins, regs, 1);
6688     assert((1 << src1) & 0xf7);
6689 
6690     unsigned cmpf_table[] = {
6691         ~0, ~0, ~0, ~0, ~0, 0, 1
6692     };
6693     unsigned cmpf = cmpf_table[ins->cond];
6694     assert(cmpf < 2);
6695 
6696     unsigned derived_3 = 0;
6697     if (cmpf == 1) derived_3 = 0;
6698     else if (cmpf == 0) derived_3 = 1;
6699     else unreachable("No pattern match at pos 3");
6700 
6701     return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3);
6702 }
6703 
6704 static inline unsigned
pan_pack_add_swz_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6705 pan_pack_add_swz_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6706 {
6707     unsigned src0 = bi_get_src(ins, regs, 0);
6708 
6709     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6710     unsigned swz0_temp = 0;
6711     if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 0 && ins->swizzle[0][3] == 0) swz0_temp = 0;
6712     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 1;
6713     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 2) swz0_temp = 2;
6714     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 3;
6715     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 4;
6716     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 5;
6717     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 2) swz0_temp = 6;
6718     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 0) swz0_temp = 7;
6719     else unreachable("Could not pattern match widen");
6720     unsigned swz0 = swz0_temp;
6721     assert(swz0 < 8);
6722 
6723     return 0x3df40 | (src0 << 0) | (swz0 << 3);
6724 }
6725 
6726 static inline unsigned
pan_pack_add_branchz_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6727 pan_pack_add_branchz_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6728 {
6729     unsigned src0 = bi_get_src(ins, regs, 0);
6730     unsigned src1 = bi_get_src(ins, regs, 1);
6731     assert((1 << src1) & 0xf7);
6732 
6733     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6734     unsigned widen0_temp = 0;
6735     if (widen0_sz == 32) widen0_temp = 0;
6736     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6737     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6738     else unreachable("Could not pattern match widen");
6739     unsigned widen0 = widen0_temp;
6740     assert(widen0 < 4);
6741 
6742     unsigned cmpf_table[] = {
6743         ~0, 4, 5, 2, 1, 0, 3
6744     };
6745     unsigned cmpf = cmpf_table[ins->cond];
6746     assert(cmpf < 8);
6747 
6748     unsigned derived_4 = 0;
6749     if (widen0 == 2) derived_4 = 1;
6750     else if (widen0 == 1) derived_4 = 2;
6751     else unreachable("No pattern match at pos 4");
6752 
6753     unsigned derived_3 = 0;
6754     if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0;
6755     else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1;
6756     else unreachable("No pattern match at pos 3");
6757 
6758     unsigned derived_9 = 0;
6759     if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5;
6760     else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6;
6761     else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7;
6762     else unreachable("No pattern match at pos 9");
6763 
6764     return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9);
6765 }
6766 
6767 static inline unsigned
pan_pack_add_u16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6768 pan_pack_add_u16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6769 {
6770     unsigned src0 = bi_get_src(ins, regs, 0);
6771 
6772     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6773     unsigned lane0_temp = 0;
6774     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6775     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6776     else unreachable("Could not pattern match widen");
6777     unsigned lane0 = lane0_temp;
6778     assert(lane0 < 2);
6779 
6780     return 0x3cce8 | (src0 << 0) | (lane0 << 4);
6781 }
6782 
6783 static inline unsigned
pan_pack_add_icmp_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6784 pan_pack_add_icmp_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6785 {
6786     unsigned src0 = bi_get_src(ins, regs, 0);
6787     unsigned src1 = bi_get_src(ins, regs, 1);
6788 
6789     unsigned result_type = 1;
6790 
6791     unsigned cmpf_table[] = {
6792         ~0, 2, 3, 1, 0, ~0, ~0
6793     };
6794     unsigned cmpf = cmpf_table[ins->cond];
6795     assert(cmpf < 4);
6796 
6797     if ((cmpf == 2) || (cmpf == 3)) {
6798         { unsigned temp = src0; src0 = src1; src1 = temp; }
6799         if (cmpf == 3) cmpf = 1;
6800         else if (cmpf == 2) cmpf = 0;
6801     }
6802 
6803     unsigned derived_6 = 0;
6804     if (cmpf == 0) derived_6 = 0;
6805     else if (cmpf == 1) derived_6 = 1;
6806     else unreachable("No pattern match at pos 6");
6807 
6808     return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
6809 }
6810 
6811 static inline unsigned
pan_pack_fma_frshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6812 pan_pack_fma_frshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6813 {
6814     unsigned src0 = bi_get_src(ins, regs, 0);
6815     assert((1 << src0) & 0xfb);
6816     unsigned src1 = bi_get_src(ins, regs, 1);
6817     assert((1 << src1) & 0xfb);
6818     unsigned src2 = bi_get_src(ins, regs, 2);
6819 
6820     unsigned bytes2 = 0;
6821 
6822     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6823     unsigned lane2_temp = 0;
6824     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
6825     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
6826     else unreachable("Could not pattern match widen");
6827     unsigned lane2 = lane2_temp;
6828     assert(lane2 < 2);
6829 
6830     return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10);
6831 }
6832 
6833 static inline unsigned
pan_pack_add_frcbrt_approx_c_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6834 pan_pack_add_frcbrt_approx_c_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6835 {
6836     unsigned src0 = bi_get_src(ins, regs, 0);
6837     assert((1 << src0) & 0xf7);
6838 
6839     return 0x67ab8 | (src0 << 0);
6840 }
6841 
6842 static inline unsigned
pan_pack_add_hadd_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6843 pan_pack_add_hadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6844 {
6845     unsigned src0 = bi_get_src(ins, regs, 0);
6846     unsigned src1 = bi_get_src(ins, regs, 1);
6847 
6848     assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
6849     unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
6850     assert(round < 2);
6851 
6852     return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12);
6853 }
6854 
6855 static inline unsigned
pan_pack_add_s16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6856 pan_pack_add_s16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6857 {
6858     unsigned src0 = bi_get_src(ins, regs, 0);
6859 
6860     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6861     unsigned lane0_temp = 0;
6862     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6863     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6864     else unreachable("Could not pattern match widen");
6865     unsigned lane0 = lane0_temp;
6866     assert(lane0 < 2);
6867 
6868     return 0x3cce0 | (src0 << 0) | (lane0 << 4);
6869 }
6870 
6871 static inline unsigned
pan_pack_add_v2u8_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6872 pan_pack_add_v2u8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6873 {
6874     unsigned src0 = bi_get_src(ins, regs, 0);
6875 
6876     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6877     unsigned swz0_temp = 0;
6878     if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6879     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6880     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
6881     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
6882     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
6883     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
6884     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
6885     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
6886     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
6887     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
6888     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
6889     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
6890     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
6891     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
6892     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
6893     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
6894     else unreachable("Could not pattern match widen");
6895     unsigned swz0 = swz0_temp;
6896     assert(swz0 < 16);
6897 
6898     return 0x3c808 | (src0 << 0) | (swz0 << 4);
6899 }
6900 
6901 static inline unsigned
pan_pack_add_branchz_s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6902 pan_pack_add_branchz_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6903 {
6904     unsigned src0 = bi_get_src(ins, regs, 0);
6905     unsigned src1 = bi_get_src(ins, regs, 1);
6906     assert((1 << src1) & 0xf7);
6907 
6908     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6909     unsigned widen0_temp = 0;
6910     if (widen0_sz == 32) widen0_temp = 0;
6911     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6912     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6913     else unreachable("Could not pattern match widen");
6914     unsigned widen0 = widen0_temp;
6915     assert(widen0 < 4);
6916 
6917     unsigned cmpf_table[] = {
6918         ~0, 2, 3, 1, 0, ~0, ~0
6919     };
6920     unsigned cmpf = cmpf_table[ins->cond];
6921     assert(cmpf < 4);
6922 
6923     unsigned derived_4 = 0;
6924     if (widen0 == 2) derived_4 = 1;
6925     else if (widen0 == 1) derived_4 = 2;
6926     else unreachable("No pattern match at pos 4");
6927 
6928     unsigned derived_9 = 0;
6929     if (cmpf == 2) derived_9 = 0;
6930     else if (cmpf == 3) derived_9 = 1;
6931     else if (cmpf == 1) derived_9 = 2;
6932     else if (cmpf == 0) derived_9 = 3;
6933     else unreachable("No pattern match at pos 9");
6934 
6935     return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9);
6936 }
6937 
6938 static inline unsigned
pan_pack_fma_imul_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6939 pan_pack_fma_imul_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6940 {
6941     unsigned src0 = bi_get_src(ins, regs, 0);
6942     assert((1 << src0) & 0xfb);
6943     unsigned src1 = bi_get_src(ins, regs, 1);
6944     assert((1 << src1) & 0xfb);
6945 
6946     unsigned replicate0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6947     unsigned replicate0_temp = 0;
6948     if (replicate0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) replicate0_temp = 0;
6949     else unreachable("Could not pattern match widen");
6950     unsigned replicate0 = replicate0_temp;
6951     assert(replicate0 < 8);
6952 
6953     unsigned replicate1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6954     unsigned replicate1_temp = 0;
6955     if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) replicate1_temp = 0;
6956     else if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) replicate1_temp = 1;
6957     else if (replicate1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) replicate1_temp = 2;
6958     else if (replicate1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) replicate1_temp = 3;
6959     else if (replicate1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) replicate1_temp = 4;
6960     else unreachable("Could not pattern match widen");
6961     unsigned replicate1 = replicate1_temp;
6962     assert(replicate1 < 8);
6963 
6964     if ((replicate0 == 0) && (replicate1 == 0)) {
6965         return 0x73e0c0 | (src0 << 0) | (src1 << 3);
6966     } else if ((replicate0 == 0) && (replicate1 != 0)) {
6967         unsigned derived_9 = 0;
6968         if (replicate1 == 1) derived_9 = 0;
6969         else if (replicate1 == 2) derived_9 = 1;
6970         else if (replicate1 == 3) derived_9 = 2;
6971         else if (replicate1 == 4) derived_9 = 3;
6972         else unreachable("No pattern match at pos 9");
6973 
6974         return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9);
6975     } else {
6976         unreachable("No matching state found in fma_imul_v4i8");
6977     }
6978 }
6979 
6980 static inline unsigned
pan_pack_add_s16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6981 pan_pack_add_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6982 {
6983     unsigned src0 = bi_get_src(ins, regs, 0);
6984 
6985     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6986     unsigned lane0_temp = 0;
6987     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6988     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6989     else unreachable("Could not pattern match widen");
6990     unsigned lane0 = lane0_temp;
6991     assert(lane0 < 2);
6992 
6993     return 0x3ccc0 | (src0 << 0) | (lane0 << 4);
6994 }
6995 
6996 static inline unsigned
pan_pack_add_f32_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6997 pan_pack_add_f32_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6998 {
6999     unsigned src0 = bi_get_src(ins, regs, 0);
7000 
7001     unsigned round = ins->roundmode;
7002     assert(round < 8);
7003 
7004     if (round != 4) {
7005         unsigned derived_4 = 0;
7006         if (round == 0) derived_4 = 0;
7007         else if (round == 1) derived_4 = 1;
7008         else if (round == 2) derived_4 = 2;
7009         else if (round == 3) derived_4 = 3;
7010         else unreachable("No pattern match at pos 4");
7011 
7012         return 0x3c980 | (src0 << 0) | (derived_4 << 4);
7013     } else if (round == 4) {
7014         return 0x3cca0 | (src0 << 0);
7015     } else {
7016         unreachable("No matching state found in add_f32_to_s32");
7017     }
7018 }
7019 
7020 static inline unsigned
pan_pack_fma_rshift_xor_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7021 pan_pack_fma_rshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7022 {
7023     unsigned src0 = bi_get_src(ins, regs, 0);
7024     assert((1 << src0) & 0xfb);
7025     unsigned src1 = bi_get_src(ins, regs, 1);
7026     assert((1 << src1) & 0xfb);
7027     unsigned src2 = bi_get_src(ins, regs, 2);
7028 
7029     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7030     unsigned lane2_temp = 0;
7031     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7032     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7033     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7034     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7035     else unreachable("Could not pattern match widen");
7036     unsigned lane2 = lane2_temp;
7037     assert(lane2 < 4);
7038 
7039     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7040     assert(not_result < 2);
7041 
7042     return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13);
7043 }
7044 
7045 static inline unsigned
pan_pack_add_fatan_assist_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7046 pan_pack_add_fatan_assist_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7047 {
7048     unsigned src0 = bi_get_src(ins, regs, 0);
7049     assert((1 << src0) & 0xf7);
7050     unsigned src1 = bi_get_src(ins, regs, 1);
7051     assert((1 << src1) & 0xf7);
7052 
7053     return 0x67a00 | (src0 << 0) | (src1 << 3);
7054 }
7055 
7056 static inline unsigned
pan_pack_add_mux_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7057 pan_pack_add_mux_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7058 {
7059     unsigned src0 = bi_get_src(ins, regs, 0);
7060     unsigned src1 = bi_get_src(ins, regs, 1);
7061     unsigned src2 = bi_get_src(ins, regs, 2);
7062 
7063     unsigned mux = 1;
7064 
7065     return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9);
7066 }
7067 
7068 static inline unsigned
pan_pack_fma_lshift_xor_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7069 pan_pack_fma_lshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7070 {
7071     unsigned src0 = bi_get_src(ins, regs, 0);
7072     assert((1 << src0) & 0xfb);
7073     unsigned src1 = bi_get_src(ins, regs, 1);
7074     assert((1 << src1) & 0xfb);
7075     unsigned src2 = bi_get_src(ins, regs, 2);
7076 
7077     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7078     unsigned lanes2_temp = 0;
7079     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
7080     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
7081     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
7082     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
7083     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
7084     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
7085     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
7086     else unreachable("Could not pattern match widen");
7087     unsigned lanes2 = lanes2_temp;
7088     assert(lanes2 < 8);
7089 
7090     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7091     assert(not_result < 2);
7092 
7093     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
7094         unsigned derived_9 = 0;
7095         if (lanes2 == 0) derived_9 = 0;
7096         else if (lanes2 == 1) derived_9 = 1;
7097         else if (lanes2 == 2) derived_9 = 2;
7098         else if (lanes2 == 3) derived_9 = 3;
7099         else unreachable("No pattern match at pos 9");
7100 
7101         return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
7102     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
7103         unsigned derived_9 = 0;
7104         if (lanes2 == 4) derived_9 = 1;
7105         else if (lanes2 == 5) derived_9 = 2;
7106         else if (lanes2 == 6) derived_9 = 3;
7107         else unreachable("No pattern match at pos 9");
7108 
7109         return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
7110     } else {
7111         unreachable("No matching state found in fma_lshift_xor_v2i16");
7112     }
7113 }
7114 
7115 static inline unsigned
pan_pack_add_load_i96(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7116 pan_pack_add_load_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7117 {
7118     unsigned src0 = bi_get_src(ins, regs, 0);
7119     unsigned src1 = bi_get_src(ins, regs, 1);
7120 
7121     assert(ins->segment);
7122     unsigned seg = ins->segment;
7123     assert(seg < 8);
7124 
7125     bi_write_staging_register(clause, ins);
7126     return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6);
7127 }
7128 
7129 static inline unsigned
pan_pack_fma_lshift_or_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7130 pan_pack_fma_lshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7131 {
7132     unsigned src0 = bi_get_src(ins, regs, 0);
7133     assert((1 << src0) & 0xfb);
7134     unsigned src1 = bi_get_src(ins, regs, 1);
7135     assert((1 << src1) & 0xfb);
7136     unsigned src2 = bi_get_src(ins, regs, 2);
7137 
7138     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7139     unsigned lane2_temp = 0;
7140     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7141     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7142     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7143     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7144     else unreachable("Could not pattern match widen");
7145     unsigned lane2 = lane2_temp;
7146     assert(lane2 < 4);
7147 
7148     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
7149     assert(not1 < 2);
7150 
7151     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7152     assert(not_result < 2);
7153 
7154     return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
7155 }
7156 
7157 static inline unsigned
pan_pack_fma_lshift_or_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7158 pan_pack_fma_lshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7159 {
7160     unsigned src0 = bi_get_src(ins, regs, 0);
7161     assert((1 << src0) & 0xfb);
7162     unsigned src1 = bi_get_src(ins, regs, 1);
7163     assert((1 << src1) & 0xfb);
7164     unsigned src2 = bi_get_src(ins, regs, 2);
7165 
7166     unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7167     unsigned lanes2_temp = 0;
7168     if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
7169     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
7170     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
7171     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
7172     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
7173     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
7174     else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
7175     else unreachable("Could not pattern match widen");
7176     unsigned lanes2 = lanes2_temp;
7177     assert(lanes2 < 8);
7178 
7179     unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
7180     assert(not1 < 2);
7181 
7182     unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7183     assert(not_result < 2);
7184 
7185     if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
7186         unsigned derived_9 = 0;
7187         if (lanes2 == 0) derived_9 = 0;
7188         else if (lanes2 == 1) derived_9 = 1;
7189         else if (lanes2 == 2) derived_9 = 2;
7190         else if (lanes2 == 3) derived_9 = 3;
7191         else unreachable("No pattern match at pos 9");
7192 
7193         return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
7194     } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
7195         unsigned derived_9 = 0;
7196         if (lanes2 == 4) derived_9 = 1;
7197         else if (lanes2 == 5) derived_9 = 2;
7198         else if (lanes2 == 6) derived_9 = 3;
7199         else unreachable("No pattern match at pos 9");
7200 
7201         return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
7202     } else {
7203         unreachable("No matching state found in fma_lshift_or_v2i16");
7204     }
7205 }
7206 
7207 static inline unsigned
pan_pack_add_ld_gclk_u64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7208 pan_pack_add_ld_gclk_u64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7209 {
7210 
7211     unsigned source = 7;
7212 
7213     bi_write_staging_register(clause, ins);
7214     return 0xd7800 | (source << 0);
7215 }
7216 
7217 static inline unsigned
pan_pack_add_seg_add(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7218 pan_pack_add_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7219 {
7220     unsigned src0 = bi_get_src(ins, regs, 0);
7221 
7222     assert(ins->segment);
7223     unsigned seg = ins->segment;
7224     assert(seg < 8);
7225 
7226     unsigned preserve_null = 0;
7227 
7228     return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
7229 }
7230 
7231 static inline unsigned
pan_pack_add_axchg_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7232 pan_pack_add_axchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7233 {
7234     unsigned src0 = bi_get_src(ins, regs, 1);
7235     unsigned src1 = bi_get_src(ins, regs, 2);
7236 
7237     assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
7238     unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
7239     assert(seg < 2);
7240 
7241     bi_read_staging_register(clause, ins);
7242     assert(ins->src[0] == ins->dest);
7243     return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9);
7244 }
7245 
7246 static inline unsigned
pan_pack_add_isub_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7247 pan_pack_add_isub_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7248 {
7249     unsigned src0 = bi_get_src(ins, regs, 0);
7250     unsigned src1 = bi_get_src(ins, regs, 1);
7251 
7252     unsigned saturate = 0;
7253 
7254     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7255     unsigned lanes0_temp = 0;
7256     if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
7257     else unreachable("Could not pattern match widen");
7258     unsigned lanes0 = lanes0_temp;
7259     assert(lanes0 < 8);
7260 
7261     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7262     unsigned lanes1_temp = 0;
7263     if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
7264     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
7265     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
7266     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
7267     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
7268     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
7269     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
7270     else unreachable("Could not pattern match widen");
7271     unsigned lanes1 = lanes1_temp;
7272     assert(lanes1 < 8);
7273 
7274     if ((lanes0 == 0) && (lanes1 == 0)) {
7275         return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8);
7276     } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
7277         unsigned derived_9 = 0;
7278         if (lanes1 == 1) derived_9 = 0;
7279         else if (lanes1 == 2) derived_9 = 1;
7280         else if (lanes1 == 3) derived_9 = 2;
7281         else if (lanes1 == 4) derived_9 = 3;
7282         else unreachable("No pattern match at pos 9");
7283 
7284         return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
7285     } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
7286         unsigned derived_9 = 0;
7287         if (lanes1 == 5) derived_9 = 0;
7288         else if (lanes1 == 6) derived_9 = 1;
7289         else unreachable("No pattern match at pos 9");
7290 
7291         return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
7292     } else {
7293         unreachable("No matching state found in add_isub_v4s8");
7294     }
7295 }
7296 
7297 static inline unsigned
pan_pack_fma_fma_rscale_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7298 pan_pack_fma_fma_rscale_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7299 {
7300     unsigned src0 = bi_get_src(ins, regs, 0);
7301     assert((1 << src0) & 0xfb);
7302     unsigned src1 = bi_get_src(ins, regs, 1);
7303     assert((1 << src1) & 0xfb);
7304     unsigned src2 = bi_get_src(ins, regs, 2);
7305     unsigned src3 = bi_get_src(ins, regs, 3);
7306 
7307     assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);
7308     unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0;
7309     assert(round < 2);
7310 
7311     unsigned clamp = ins->outmod;
7312     assert(clamp < 4);
7313 
7314     unsigned neg0 = ins->src_neg[0];
7315     assert(neg0 < 2);
7316 
7317     unsigned neg1 = ins->src_neg[1];
7318     assert(neg1 < 2);
7319 
7320     unsigned abs0 = ins->src_abs[0];
7321     assert(abs0 < 2);
7322 
7323     unsigned neg2 = ins->src_neg[2];
7324     assert(neg2 < 2);
7325 
7326     unsigned special = 0;
7327 
7328     unsigned derived_16 = 0;
7329     if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0;
7330     else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1;
7331     else unreachable("No pattern match at pos 16");
7332 
7333     unsigned derived_12 = 0;
7334     if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0;
7335     else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1;
7336     else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2;
7337     else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3;
7338     else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4;
7339     else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5;
7340     else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7;
7341     else unreachable("No pattern match at pos 12");
7342 
7343     return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12);
7344 }
7345 
7346 static inline unsigned
pan_pack_add_fpow_sc_apply(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7347 pan_pack_add_fpow_sc_apply(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7348 {
7349     unsigned src0 = bi_get_src(ins, regs, 0);
7350     unsigned src1 = bi_get_src(ins, regs, 1);
7351 
7352     return 0x75080 | (src0 << 0) | (src1 << 3);
7353 }
7354 
7355 static inline unsigned
pan_pack_add_v2f16_to_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7356 pan_pack_add_v2f16_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7357 {
7358     unsigned src0 = bi_get_src(ins, regs, 0);
7359 
7360     unsigned round = ins->roundmode;
7361     assert(round < 8);
7362 
7363     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7364     unsigned swz0_temp = 0;
7365     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7366     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7367     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7368     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7369     else unreachable("Could not pattern match widen");
7370     unsigned swz0 = swz0_temp;
7371     assert(swz0 < 4);
7372 
7373     if (round != 4) {
7374         unsigned derived_4 = 0;
7375         if (round == 0) derived_4 = 0;
7376         else if (round == 1) derived_4 = 1;
7377         else if (round == 2) derived_4 = 2;
7378         else if (round == 3) derived_4 = 3;
7379         else unreachable("No pattern match at pos 4");
7380 
7381         return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
7382     } else if (round == 4) {
7383         return 0x3ca80 | (src0 << 0) | (swz0 << 4);
7384     } else {
7385         unreachable("No matching state found in add_v2f16_to_v2s16");
7386     }
7387 }
7388 
7389 static inline unsigned
pan_pack_add_icmp_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7390 pan_pack_add_icmp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7391 {
7392     unsigned src0 = bi_get_src(ins, regs, 0);
7393     unsigned src1 = bi_get_src(ins, regs, 1);
7394 
7395     unsigned result_type = 1;
7396 
7397     unsigned cmpf_table[] = {
7398         ~0, ~0, ~0, ~0, ~0, 0, 1
7399     };
7400     unsigned cmpf = cmpf_table[ins->cond];
7401     assert(cmpf < 2);
7402 
7403     return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
7404 }
7405 
7406 static inline unsigned
pan_pack_add_eureka(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7407 pan_pack_add_eureka(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7408 {
7409     unsigned src0 = bi_get_src(ins, regs, 0);
7410 
7411     return 0xd7850 | (src0 << 0);
7412 }
7413 
7414 static inline unsigned
pan_pack_add_branch_u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7415 pan_pack_add_branch_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7416 {
7417     unsigned src0 = bi_get_src(ins, regs, 0);
7418     unsigned src1 = bi_get_src(ins, regs, 1);
7419     unsigned src2 = bi_get_src(ins, regs, 2);
7420     assert((1 << src2) & 0xf7);
7421 
7422     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7423     unsigned widen0_temp = 0;
7424     if (widen0_sz == 32) widen0_temp = 0;
7425     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7426     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7427     else unreachable("Could not pattern match widen");
7428     unsigned widen0 = widen0_temp;
7429     assert(widen0 < 4);
7430 
7431     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7432     unsigned widen1_temp = 0;
7433     if (widen1_sz == 32) widen1_temp = 0;
7434     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
7435     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
7436     else unreachable("Could not pattern match widen");
7437     unsigned widen1 = widen1_temp;
7438     assert(widen1 < 4);
7439 
7440     unsigned cmpf_table[] = {
7441         ~0, 2, 3, 1, 0, ~0, ~0
7442     };
7443     unsigned cmpf = cmpf_table[ins->cond];
7444     assert(cmpf < 4);
7445 
7446     if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) {
7447         { unsigned temp = src0; src0 = src1; src1 = temp; }
7448         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
7449         if (cmpf == 0) cmpf = 2;
7450         else if (cmpf == 3) cmpf = 1;
7451         else if (cmpf == 2) cmpf = 0;
7452         else if (cmpf == 1) cmpf = 3;
7453     }
7454 
7455     unsigned derived_12 = 0;
7456     if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
7457     else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
7458     else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3;
7459     else unreachable("No pattern match at pos 12");
7460 
7461     unsigned derived_9 = 0;
7462     if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0;
7463     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1;
7464     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2;
7465     else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3;
7466     else unreachable("No pattern match at pos 9");
7467 
7468     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
7469 }
7470 
7471 static inline unsigned
pan_pack_add_v2f32_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7472 pan_pack_add_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7473 {
7474     unsigned src0 = bi_get_src(ins, regs, 0);
7475     unsigned src1 = bi_get_src(ins, regs, 1);
7476 
7477     unsigned abs0 = ins->src_abs[0];
7478     assert(abs0 < 2);
7479 
7480     unsigned abs1 = ins->src_abs[1];
7481     assert(abs1 < 2);
7482 
7483     unsigned neg0 = ins->src_neg[0];
7484     assert(neg0 < 2);
7485 
7486     unsigned neg1 = ins->src_neg[1];
7487     assert(neg1 < 2);
7488 
7489     unsigned clamp = ins->outmod;
7490     assert(clamp < 4);
7491 
7492     unsigned round = ins->roundmode;
7493     assert(round < 8);
7494 
7495     unsigned derived_6 = 0;
7496     if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0;
7497     else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1;
7498     else unreachable("No pattern match at pos 6");
7499 
7500     unsigned derived_7 = 0;
7501     if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0;
7502     else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1;
7503     else unreachable("No pattern match at pos 7");
7504 
7505     return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7);
7506 }
7507 
7508 static inline unsigned
pan_pack_add_frcbrt_approx_a_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7509 pan_pack_add_frcbrt_approx_a_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7510 {
7511     unsigned src0 = bi_get_src(ins, regs, 0);
7512     assert((1 << src0) & 0xf7);
7513 
7514     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7515     unsigned widen0_temp = 0;
7516     if (widen0_sz == 32) widen0_temp = 0;
7517     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7518     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7519     else unreachable("Could not pattern match widen");
7520     unsigned widen0 = widen0_temp;
7521     assert(widen0 < 4);
7522 
7523     unsigned neg = ins->src_neg[0];
7524     assert(neg < 2);
7525 
7526     unsigned abs0 = ins->src_abs[0];
7527     assert(abs0 < 2);
7528 
7529     unsigned divzero = 0;
7530 
7531     if (widen0 == 0) {
7532         return 0x67200 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
7533     } else if (widen0 != 0) {
7534         unsigned derived_7 = 0;
7535         if (widen0 == 1) derived_7 = 0;
7536         else if (widen0 == 2) derived_7 = 1;
7537         else unreachable("No pattern match at pos 7");
7538 
7539         return 0x67240 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
7540     } else {
7541         unreachable("No matching state found in add_frcbrt_approx_a_f32");
7542     }
7543 }
7544 
7545 static inline unsigned
pan_pack_fma_atom_c_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7546 pan_pack_fma_atom_c_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7547 {
7548     unsigned src0 = bi_get_src(ins, regs, 0);
7549     assert((1 << src0) & 0xf3);
7550     unsigned src1 = bi_get_src(ins, regs, 1);
7551     assert((1 << src1) & 0xf3);
7552     unsigned src2 = bi_get_src(ins, regs, 2);
7553     assert((1 << src2) & 0xf7);
7554 
7555     unsigned atom_opc = 2;
7556 
7557     return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
7558 }
7559 
7560 static inline unsigned
pan_pack_fma_seg_add(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7561 pan_pack_fma_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7562 {
7563     unsigned src0 = bi_get_src(ins, regs, 0);
7564     assert((1 << src0) & 0xfb);
7565 
7566     assert(ins->segment);
7567     unsigned seg = ins->segment;
7568     assert(seg < 8);
7569 
7570     unsigned preserve_null = 0;
7571 
7572     return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
7573 }
7574 
7575 static inline unsigned
pan_pack_add_store_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7576 pan_pack_add_store_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7577 {
7578     unsigned src0 = bi_get_src(ins, regs, 1);
7579     unsigned src1 = bi_get_src(ins, regs, 2);
7580 
7581     assert(ins->segment);
7582     unsigned seg = ins->segment;
7583     assert(seg < 8);
7584 
7585     bi_read_staging_register(clause, ins);
7586     return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
7587 }
7588 
7589 static inline unsigned
pan_pack_add_fatan_assist_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7590 pan_pack_add_fatan_assist_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7591 {
7592     unsigned src0 = bi_get_src(ins, regs, 0);
7593     assert((1 << src0) & 0xf7);
7594     unsigned src1 = bi_get_src(ins, regs, 1);
7595     assert((1 << src1) & 0xf7);
7596 
7597     unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7598     unsigned lane1_temp = 0;
7599     if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
7600     else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
7601     else unreachable("Could not pattern match widen");
7602     unsigned lane1 = lane1_temp;
7603     assert(lane1 < 2);
7604 
7605     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7606     unsigned lane0_temp = 0;
7607     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
7608     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
7609     else unreachable("Could not pattern match widen");
7610     unsigned lane0 = lane0_temp;
7611     assert(lane0 < 2);
7612 
7613     return 0x67800 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7);
7614 }
7615 
7616 static inline unsigned
pan_pack_add_v2u16_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7617 pan_pack_add_v2u16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7618 {
7619     unsigned src0 = bi_get_src(ins, regs, 0);
7620 
7621     unsigned round = ins->roundmode;
7622     assert(round < 8);
7623 
7624     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7625     unsigned swz0_temp = 0;
7626     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7627     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7628     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7629     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7630     else unreachable("Could not pattern match widen");
7631     unsigned swz0 = swz0_temp;
7632     assert(swz0 < 4);
7633 
7634     if (round != 4) {
7635         unsigned derived_4 = 0;
7636         if (round == 0) derived_4 = 0;
7637         else if (round == 1) derived_4 = 1;
7638         else if (round == 2) derived_4 = 2;
7639         else if (round == 3) derived_4 = 3;
7640         else unreachable("No pattern match at pos 4");
7641 
7642         return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
7643     } else if (round == 4) {
7644         return 0x3cb08 | (src0 << 0) | (swz0 << 4);
7645     } else {
7646         unreachable("No matching state found in add_v2u16_to_v2f16");
7647     }
7648 }
7649 
7650 static inline unsigned
pan_pack_add_iadd_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7651 pan_pack_add_iadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7652 {
7653     unsigned src0 = bi_get_src(ins, regs, 0);
7654     unsigned src1 = bi_get_src(ins, regs, 1);
7655 
7656     unsigned saturate = 0;
7657 
7658     unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7659     unsigned lanes0_temp = 0;
7660     if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
7661     else unreachable("Could not pattern match widen");
7662     unsigned lanes0 = lanes0_temp;
7663     assert(lanes0 < 8);
7664 
7665     unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7666     unsigned lanes1_temp = 0;
7667     if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
7668     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
7669     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
7670     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
7671     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
7672     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
7673     else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
7674     else unreachable("Could not pattern match widen");
7675     unsigned lanes1 = lanes1_temp;
7676     assert(lanes1 < 8);
7677 
7678     if ((lanes0 == 0) && (lanes1 == 0)) {
7679         unsigned derived_7 = 0;
7680         if (saturate == 0) derived_7 = 0;
7681         else if (saturate == 1) derived_7 = 1;
7682         else unreachable("No pattern match at pos 7");
7683 
7684         return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
7685     } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
7686         unsigned derived_7 = 0;
7687         if (saturate == 0) derived_7 = 0;
7688         else if (saturate == 1) derived_7 = 1;
7689         else unreachable("No pattern match at pos 7");
7690 
7691         unsigned derived_9 = 0;
7692         if (lanes1 == 1) derived_9 = 0;
7693         else if (lanes1 == 2) derived_9 = 1;
7694         else if (lanes1 == 3) derived_9 = 2;
7695         else if (lanes1 == 4) derived_9 = 3;
7696         else unreachable("No pattern match at pos 9");
7697 
7698         return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
7699     } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
7700         unsigned derived_7 = 0;
7701         if (saturate == 0) derived_7 = 0;
7702         else if (saturate == 1) derived_7 = 1;
7703         else unreachable("No pattern match at pos 7");
7704 
7705         unsigned derived_9 = 0;
7706         if (lanes1 == 5) derived_9 = 0;
7707         else if (lanes1 == 6) derived_9 = 1;
7708         else unreachable("No pattern match at pos 9");
7709 
7710         return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
7711     } else {
7712         unreachable("No matching state found in add_iadd_v4u8");
7713     }
7714 }
7715 
7716 static inline unsigned
pan_pack_add_store_i96(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7717 pan_pack_add_store_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7718 {
7719     unsigned src0 = bi_get_src(ins, regs, 1);
7720     unsigned src1 = bi_get_src(ins, regs, 2);
7721 
7722     assert(ins->segment);
7723     unsigned seg = ins->segment;
7724     assert(seg < 8);
7725 
7726     bi_read_staging_register(clause, ins);
7727     return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
7728 }
7729 
7730 static inline unsigned
pan_pack_fma_lshift_and_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7731 pan_pack_fma_lshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7732 {
7733     unsigned src0 = bi_get_src(ins, regs, 0);
7734     assert((1 << src0) & 0xfb);
7735     unsigned src1 = bi_get_src(ins, regs, 1);
7736     assert((1 << src1) & 0xfb);
7737     unsigned src2 = bi_get_src(ins, regs, 2);
7738 
7739     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7740     unsigned lane2_temp = 0;
7741     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7742     else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7743     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7744     else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7745     else unreachable("Could not pattern match widen");
7746     unsigned lane2 = lane2_temp;
7747     assert(lane2 < 4);
7748 
7749     unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
7750     assert(not1 < 2);
7751 
7752     unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
7753     assert(not_result < 2);
7754 
7755     return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
7756 }
7757 
7758 static inline unsigned
pan_pack_fma_u16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7759 pan_pack_fma_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7760 {
7761     unsigned src0 = bi_get_src(ins, regs, 0);
7762     assert((1 << src0) & 0xfb);
7763 
7764     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7765     unsigned lane0_temp = 0;
7766     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
7767     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
7768     else unreachable("Could not pattern match widen");
7769     unsigned lane0 = lane0_temp;
7770     assert(lane0 < 2);
7771 
7772     return 0x700cc8 | (src0 << 0) | (lane0 << 4);
7773 }
7774 
7775 static inline unsigned
pan_pack_add_wmask(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7776 pan_pack_add_wmask(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7777 {
7778     unsigned src0 = bi_get_src(ins, regs, 0);
7779 
7780     unsigned subgroup = 1;
7781 
7782     unsigned fill = 0;
7783     return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3);
7784 }
7785 
7786 static inline unsigned
pan_pack_add_fadd_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7787 pan_pack_add_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7788 {
7789     unsigned src0 = bi_get_src(ins, regs, 0);
7790     unsigned src1 = bi_get_src(ins, regs, 1);
7791 
7792     unsigned abs1 = ins->src_abs[1];
7793     assert(abs1 < 2);
7794 
7795     unsigned neg0 = ins->src_neg[0];
7796     assert(neg0 < 2);
7797 
7798     unsigned neg1 = ins->src_neg[1];
7799     assert(neg1 < 2);
7800 
7801     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7802     unsigned swz0_temp = 0;
7803     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7804     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7805     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7806     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7807     else unreachable("Could not pattern match widen");
7808     unsigned swz0 = swz0_temp;
7809     assert(swz0 < 4);
7810 
7811     unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7812     unsigned swz1_temp = 0;
7813     if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
7814     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
7815     else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
7816     else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
7817     else unreachable("Could not pattern match widen");
7818     unsigned swz1 = swz1_temp;
7819     assert(swz1 < 4);
7820 
7821     unsigned round = ins->roundmode;
7822     assert(round < 4);
7823 
7824     unsigned abs0 = ins->src_abs[0];
7825     assert(abs0 < 2);
7826 
7827     return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15);
7828 }
7829 
7830 static inline unsigned
pan_pack_add_flog_table_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7831 pan_pack_add_flog_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7832 {
7833     unsigned src0 = bi_get_src(ins, regs, 0);
7834     assert((1 << src0) & 0xf7);
7835 
7836     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7837     unsigned widen0_temp = 0;
7838     if (widen0_sz == 32) widen0_temp = 0;
7839     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7840     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7841     else unreachable("Could not pattern match widen");
7842     unsigned widen0 = widen0_temp;
7843     assert(widen0 < 4);
7844 
7845     unsigned mode = 0;
7846 
7847     unsigned precision = 0;
7848 
7849     unsigned neg = ins->src_neg[0];
7850     assert(neg < 2);
7851 
7852     unsigned abs0 = ins->src_abs[0];
7853     assert(abs0 < 2);
7854 
7855     unsigned divzero = 0;
7856 
7857     if ((mode == 0) && (widen0 == 0) && (precision == 0)) {
7858         return 0x67300 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
7859     } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) {
7860         unsigned derived_7 = 0;
7861         if (widen0 == 1) derived_7 = 0;
7862         else if (widen0 == 2) derived_7 = 1;
7863         else unreachable("No pattern match at pos 7");
7864 
7865         return 0x67340 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
7866     } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) {
7867         unsigned derived_5 = 0;
7868         if (mode == 1) derived_5 = 0;
7869         else if (mode == 2) derived_5 = 1;
7870         else unreachable("No pattern match at pos 5");
7871 
7872         return 0x67b00 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5);
7873     } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) {
7874         unsigned derived_5 = 0;
7875         if (mode == 1) derived_5 = 0;
7876         else if (mode == 2) derived_5 = 1;
7877         else unreachable("No pattern match at pos 5");
7878 
7879         unsigned derived_7 = 0;
7880         if (widen0 == 1) derived_7 = 0;
7881         else if (widen0 == 2) derived_7 = 1;
7882         else unreachable("No pattern match at pos 7");
7883 
7884         return 0x67b40 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7);
7885     } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg == 0)) {
7886         unsigned derived_3 = 0;
7887         if (mode == 2) derived_3 = 0;
7888         else if (mode == 1) derived_3 = 1;
7889         else unreachable("No pattern match at pos 3");
7890 
7891         unsigned derived_4 = 0;
7892         if (precision == 1) derived_4 = 0;
7893         else if (precision == 2) derived_4 = 1;
7894         else unreachable("No pattern match at pos 4");
7895 
7896         return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4);
7897     } else {
7898         unreachable("No matching state found in add_flog_table_f32");
7899     }
7900 }
7901 
7902 static inline unsigned
pan_pack_add_branchz_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7903 pan_pack_add_branchz_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7904 {
7905     unsigned src0 = bi_get_src(ins, regs, 0);
7906     unsigned src1 = bi_get_src(ins, regs, 1);
7907     assert((1 << src1) & 0xf7);
7908 
7909     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7910     unsigned widen0_temp = 0;
7911     if (widen0_sz == 32) widen0_temp = 0;
7912     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7913     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7914     else unreachable("Could not pattern match widen");
7915     unsigned widen0 = widen0_temp;
7916     assert(widen0 < 4);
7917 
7918     unsigned cmpf_table[] = {
7919         ~0, ~0, ~0, ~0, ~0, 0, 1
7920     };
7921     unsigned cmpf = cmpf_table[ins->cond];
7922     assert(cmpf < 2);
7923 
7924     unsigned derived_4 = 0;
7925     if (widen0 == 2) derived_4 = 1;
7926     else if (widen0 == 1) derived_4 = 2;
7927     else unreachable("No pattern match at pos 4");
7928 
7929     unsigned derived_3 = 0;
7930     if (cmpf == 1) derived_3 = 0;
7931     else if (cmpf == 0) derived_3 = 1;
7932     else unreachable("No pattern match at pos 3");
7933 
7934     return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3);
7935 }
7936 
7937 static inline unsigned
pan_pack_add_ilogb_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7938 pan_pack_add_ilogb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7939 {
7940     unsigned src0 = bi_get_src(ins, regs, 0);
7941 
7942     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7943     unsigned swz0_temp = 0;
7944     if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7945     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7946     else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7947     else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7948     else unreachable("Could not pattern match widen");
7949     unsigned swz0 = swz0_temp;
7950     assert(swz0 < 4);
7951 
7952     return 0x3d9c0 | (src0 << 0) | (swz0 << 3);
7953 }
7954 
7955 static inline unsigned
pan_pack_add_v2s8_to_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7956 pan_pack_add_v2s8_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7957 {
7958     unsigned src0 = bi_get_src(ins, regs, 0);
7959 
7960     unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7961     unsigned swz0_temp = 0;
7962     if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7963     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7964     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
7965     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
7966     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
7967     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
7968     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
7969     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
7970     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
7971     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
7972     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
7973     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
7974     else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
7975     else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
7976     else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
7977     else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
7978     else unreachable("Could not pattern match widen");
7979     unsigned swz0 = swz0_temp;
7980     assert(swz0 < 16);
7981 
7982     return 0x3c700 | (src0 << 0) | (swz0 << 4);
7983 }
7984 
7985 static inline unsigned
pan_pack_add_u32_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7986 pan_pack_add_u32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7987 {
7988     unsigned src0 = bi_get_src(ins, regs, 0);
7989 
7990     unsigned round = ins->roundmode;
7991     assert(round < 8);
7992 
7993     if (round != 4) {
7994         unsigned derived_4 = 0;
7995         if (round == 0) derived_4 = 0;
7996         else if (round == 1) derived_4 = 1;
7997         else if (round == 2) derived_4 = 2;
7998         else if (round == 3) derived_4 = 3;
7999         else unreachable("No pattern match at pos 4");
8000 
8001         return 0x3cbc8 | (src0 << 0) | (derived_4 << 4);
8002     } else if (round == 4) {
8003         return 0x3cd08 | (src0 << 0);
8004     } else {
8005         unreachable("No matching state found in add_u32_to_f32");
8006     }
8007 }
8008 
8009 static inline unsigned
pan_pack_add_blend(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8010 pan_pack_add_blend(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8011 {
8012     unsigned src0 = bi_get_src(ins, regs, 1);
8013     unsigned src1 = bi_get_src(ins, regs, 2);
8014     assert((1 << src1) & 0xf7);
8015     unsigned src2 = bi_get_src(ins, regs, 3);
8016     assert((1 << src2) & 0xf7);
8017 
8018     bi_read_staging_register(clause, ins);
8019     return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6);
8020 }
8021 
8022 static inline unsigned
pan_pack_fma_fma_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8023 pan_pack_fma_fma_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8024 {
8025     unsigned src0 = bi_get_src(ins, regs, 0);
8026     assert((1 << src0) & 0xfb);
8027     unsigned src1 = bi_get_src(ins, regs, 1);
8028     assert((1 << src1) & 0xfb);
8029     unsigned src2 = bi_get_src(ins, regs, 2);
8030 
8031     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8032     unsigned widen0_temp = 0;
8033     if (widen0_sz == 32) widen0_temp = 0;
8034     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
8035     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
8036     else unreachable("Could not pattern match widen");
8037     unsigned widen0 = widen0_temp;
8038     assert(widen0 < 4);
8039 
8040     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8041     unsigned widen1_temp = 0;
8042     if (widen1_sz == 32) widen1_temp = 0;
8043     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
8044     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
8045     else unreachable("Could not pattern match widen");
8046     unsigned widen1 = widen1_temp;
8047     assert(widen1 < 4);
8048 
8049     unsigned neg0 = ins->src_neg[0];
8050     assert(neg0 < 2);
8051 
8052     unsigned neg1 = ins->src_neg[1];
8053     assert(neg1 < 2);
8054 
8055     unsigned abs0 = ins->src_abs[0];
8056     assert(abs0 < 2);
8057 
8058     unsigned round = ins->roundmode;
8059     assert(round < 4);
8060 
8061     unsigned clamp = ins->outmod;
8062     assert(clamp < 4);
8063 
8064     unsigned abs1 = ins->src_abs[1];
8065     assert(abs1 < 2);
8066 
8067     unsigned neg2 = ins->src_neg[2];
8068     assert(neg2 < 2);
8069 
8070     unsigned abs2 = ins->src_abs[2];
8071     assert(abs2 < 2);
8072 
8073     if ((widen0 == 2) && (widen1 == 1)) {
8074         { unsigned temp = src0; src0 = src1; src1 = temp; }
8075         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
8076         { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
8077         { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
8078     }
8079 
8080     unsigned derived_9 = 0;
8081     if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
8082     else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
8083     else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
8084     else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
8085     else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
8086     else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
8087     else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
8088     else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
8089     else unreachable("No pattern match at pos 9");
8090 
8091     unsigned derived_17 = 0;
8092     if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0;
8093     else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1;
8094     else unreachable("No pattern match at pos 17");
8095 
8096     return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17);
8097 }
8098 
8099 static inline unsigned
pan_pack_add_branchz_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8100 pan_pack_add_branchz_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8101 {
8102     unsigned src0 = bi_get_src(ins, regs, 0);
8103     unsigned src1 = bi_get_src(ins, regs, 1);
8104     assert((1 << src1) & 0xf7);
8105 
8106     unsigned cmpf_table[] = {
8107         ~0, 4, 5, 2, 1, 0, 3
8108     };
8109     unsigned cmpf = cmpf_table[ins->cond];
8110     assert(cmpf < 8);
8111 
8112     unsigned derived_3 = 0;
8113     if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0;
8114     else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1;
8115     else unreachable("No pattern match at pos 3");
8116 
8117     unsigned derived_9 = 0;
8118     if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5;
8119     else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6;
8120     else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7;
8121     else unreachable("No pattern match at pos 9");
8122 
8123     return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9);
8124 }
8125 
8126 static inline unsigned
pan_pack_add_lea_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8127 pan_pack_add_lea_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8128 {
8129     unsigned src0 = bi_get_src(ins, regs, 0);
8130     unsigned src1 = bi_get_src(ins, regs, 1);
8131     unsigned src2 = bi_get_src(ins, regs, 2);
8132 
8133     unsigned format = 1;
8134 
8135     bi_write_staging_register(clause, ins);
8136     return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11);
8137 }
8138 
8139 static inline unsigned
pan_pack_add_branch_diverg(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8140 pan_pack_add_branch_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8141 {
8142     unsigned src0 = bi_get_src(ins, regs, 0);
8143     assert((1 << src0) & 0xf7);
8144 
8145     return 0x6f83c | (src0 << 6);
8146 }
8147 
8148 static inline unsigned
pan_pack_fma_lrot_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8149 pan_pack_fma_lrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8150 {
8151     unsigned src0 = bi_get_src(ins, regs, 0);
8152     assert((1 << src0) & 0xfb);
8153     unsigned src1 = bi_get_src(ins, regs, 1);
8154     assert((1 << src1) & 0xfb);
8155     unsigned src2 = bi_get_src(ins, regs, 2);
8156 
8157     unsigned bytes2 = 0;
8158 
8159     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8160     unsigned lane2_temp = 0;
8161     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
8162     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
8163     else unreachable("Could not pattern match widen");
8164     unsigned lane2 = lane2_temp;
8165     assert(lane2 < 2);
8166 
8167     unsigned result_word = 0;
8168 
8169     return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
8170 }
8171 
8172 static inline unsigned
pan_pack_fma_flshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8173 pan_pack_fma_flshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8174 {
8175     unsigned src0 = bi_get_src(ins, regs, 0);
8176     assert((1 << src0) & 0xfb);
8177     unsigned src1 = bi_get_src(ins, regs, 1);
8178     assert((1 << src1) & 0xfb);
8179     unsigned src2 = bi_get_src(ins, regs, 2);
8180 
8181     unsigned bytes2 = 0;
8182 
8183     unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8184     unsigned lane2_temp = 0;
8185     if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
8186     else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
8187     else unreachable("Could not pattern match widen");
8188     unsigned lane2 = lane2_temp;
8189     assert(lane2 < 2);
8190 
8191     return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10);
8192 }
8193 
8194 static inline unsigned
pan_pack_fma_fmul_cslice(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8195 pan_pack_fma_fmul_cslice(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8196 {
8197     unsigned src0 = bi_get_src(ins, regs, 0);
8198     assert((1 << src0) & 0xfb);
8199     unsigned src1 = bi_get_src(ins, regs, 1);
8200     assert((1 << src1) & 0xfb);
8201 
8202     unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8203     unsigned lane0_temp = 0;
8204     if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
8205     else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
8206     else unreachable("Could not pattern match widen");
8207     unsigned lane0 = lane0_temp;
8208     assert(lane0 < 2);
8209 
8210     unsigned abs0 = ins->src_abs[0];
8211     assert(abs0 < 2);
8212 
8213     unsigned neg0 = ins->src_neg[0];
8214     assert(neg0 < 2);
8215 
8216     return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8);
8217 }
8218 
8219 static inline unsigned
pan_pack_add_branch_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8220 pan_pack_add_branch_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8221 {
8222     unsigned src0 = bi_get_src(ins, regs, 0);
8223     unsigned src1 = bi_get_src(ins, regs, 1);
8224     unsigned src2 = bi_get_src(ins, regs, 2);
8225     assert((1 << src2) & 0xf7);
8226 
8227     unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8228     unsigned widen0_temp = 0;
8229     if (widen0_sz == 32) widen0_temp = 0;
8230     else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
8231     else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
8232     else unreachable("Could not pattern match widen");
8233     unsigned widen0 = widen0_temp;
8234     assert(widen0 < 4);
8235 
8236     unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8237     unsigned widen1_temp = 0;
8238     if (widen1_sz == 32) widen1_temp = 0;
8239     else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
8240     else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
8241     else unreachable("Could not pattern match widen");
8242     unsigned widen1 = widen1_temp;
8243     assert(widen1 < 4);
8244 
8245     unsigned cmpf_table[] = {
8246         ~0, ~0, ~0, ~0, ~0, 0, 1
8247     };
8248     unsigned cmpf = cmpf_table[ins->cond];
8249     assert(cmpf < 2);
8250 
8251     if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) {
8252         { unsigned temp = src0; src0 = src1; src1 = temp; }
8253         { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
8254     }
8255 
8256     unsigned derived_12 = 0;
8257     if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
8258     else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
8259     else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3;
8260     else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4;
8261     else unreachable("No pattern match at pos 12");
8262 
8263     unsigned derived_9 = 0;
8264     if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1;
8265     else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4;
8266     else unreachable("No pattern match at pos 9");
8267 
8268     return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
8269 }
8270 
8271 static inline unsigned
pan_pack_add_f32_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8272 pan_pack_add_f32_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8273 {
8274     unsigned src0 = bi_get_src(ins, regs, 0);
8275 
8276     unsigned round = ins->roundmode;
8277     assert(round < 8);
8278 
8279     if (round != 4) {
8280         unsigned derived_4 = 0;
8281         if (round == 0) derived_4 = 0;
8282         else if (round == 1) derived_4 = 1;
8283         else if (round == 2) derived_4 = 2;
8284         else if (round == 3) derived_4 = 3;
8285         else unreachable("No pattern match at pos 4");
8286 
8287         return 0x3c988 | (src0 << 0) | (derived_4 << 4);
8288     } else if (round == 4) {
8289         return 0x3cca8 | (src0 << 0);
8290     } else {
8291         unreachable("No matching state found in add_f32_to_u32");
8292     }
8293 }
8294 
8295 static inline unsigned
pan_pack_add_load_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8296 pan_pack_add_load_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8297 {
8298     unsigned src0 = bi_get_src(ins, regs, 0);
8299     unsigned src1 = bi_get_src(ins, regs, 1);
8300 
8301     assert(ins->segment);
8302     unsigned seg = ins->segment;
8303     assert(seg < 8);
8304 
8305     unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8306     unsigned lane_temp = 0;
8307     if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 0;
8308     else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 1;
8309     else if (lane_sz == 32) lane_temp = 2;
8310     else if (lane_sz == 64) lane_temp = 3;
8311     else unreachable("Could not pattern match widen");
8312     unsigned lane = lane_temp;
8313     assert(lane < 4);
8314 
8315     ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
8316     bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
8317     unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
8318     assert(extend < 4);
8319 
8320     bi_write_staging_register(clause, ins);
8321     if ((extend == 0) && ((lane == 0) || (lane == 1))) {
8322         unsigned derived_9 = 0;
8323         if (lane == 0) derived_9 = 0;
8324         else if (lane == 1) derived_9 = 1;
8325         else unreachable("No pattern match at pos 9");
8326 
8327         return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8328     } else if ((extend != 0) && (lane == 2)) {
8329         unsigned derived_9 = 0;
8330         if (extend == 1) derived_9 = 0;
8331         else if (extend == 2) derived_9 = 1;
8332         else unreachable("No pattern match at pos 9");
8333 
8334         return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8335     } else if ((extend != 0) && (lane == 3)) {
8336         unsigned derived_9 = 0;
8337         if (extend == 1) derived_9 = 0;
8338         else if (extend == 2) derived_9 = 1;
8339         else unreachable("No pattern match at pos 9");
8340 
8341         return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8342     } else {
8343         unreachable("No matching state found in add_load_i16");
8344     }
8345 }
8346 
8347 static inline unsigned
pan_pack_add_mux_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8348 pan_pack_add_mux_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8349 {
8350     unsigned src0 = bi_get_src(ins, regs, 0);
8351     unsigned src1 = bi_get_src(ins, regs, 1);
8352     unsigned src2 = bi_get_src(ins, regs, 2);
8353 
8354     unsigned mux = 1;
8355 
8356     unsigned swap2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8357     unsigned swap2_temp = 0;
8358     if (swap2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swap2_temp = 0;
8359     else if (swap2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swap2_temp = 1;
8360     else unreachable("Could not pattern match widen");
8361     unsigned swap2 = swap2_temp;
8362     assert(swap2 < 2);
8363 
8364     unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8365     unsigned swap1_temp = 0;
8366     if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
8367     else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
8368     else unreachable("Could not pattern match widen");
8369     unsigned swap1 = swap1_temp;
8370     assert(swap1 < 2);
8371 
8372     unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8373     unsigned swap0_temp = 0;
8374     if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
8375     else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
8376     else unreachable("Could not pattern match widen");
8377     unsigned swap0 = swap0_temp;
8378     assert(swap0 < 2);
8379 
8380     return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13);
8381 }
8382 
8383 #endif
8384