1 /*
2 * Copyright (C) 2020 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* Autogenerated file, do not edit */
25
26 #ifndef _BI_GENERATED_PACK_H
27 #define _BI_GENERATED_PACK_H
28
29 #include "compiler.h"
30 #include "bi_pack_helpers.h"
31
32 static inline unsigned
pan_pack_fma_rshift_and_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)33 pan_pack_fma_rshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
34 {
35 unsigned src0 = bi_get_src(ins, regs, 0);
36 assert((1 << src0) & 0xfb);
37 unsigned src1 = bi_get_src(ins, regs, 1);
38 assert((1 << src1) & 0xfb);
39 unsigned src2 = bi_get_src(ins, regs, 2);
40
41 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
42 unsigned lane2_temp = 0;
43 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
44 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
45 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
46 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
47 else unreachable("Could not pattern match widen");
48 unsigned lane2 = lane2_temp;
49 assert(lane2 < 4);
50
51 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
52 assert(not1 < 2);
53
54 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
55 assert(not_result < 2);
56
57 return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
58 }
59
60 static inline unsigned
pan_pack_add_iadd_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)61 pan_pack_add_iadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
62 {
63 unsigned src0 = bi_get_src(ins, regs, 0);
64 unsigned src1 = bi_get_src(ins, regs, 1);
65
66 unsigned saturate = 0;
67
68 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
69 unsigned lanes1_temp = 0;
70 if (lanes1_sz == 32) lanes1_temp = 0;
71 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
72 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
73 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
74 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
75 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
76 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
77 else unreachable("Could not pattern match widen");
78 unsigned lanes1 = lanes1_temp;
79 assert(lanes1 < 8);
80
81 if (lanes1 == 0) {
82 unsigned derived_7 = 0;
83 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
84 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
85 else unreachable("No pattern match at pos 7");
86
87 return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
88 } else if ((lanes1 == 1) || (lanes1 == 2)) {
89 unsigned derived_7 = 0;
90 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
91 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
92 else unreachable("No pattern match at pos 7");
93
94 unsigned derived_9 = 0;
95 if (lanes1 == 1) derived_9 = 0;
96 else if (lanes1 == 2) derived_9 = 1;
97 else unreachable("No pattern match at pos 9");
98
99 return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
100 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
101 unsigned derived_7 = 0;
102 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
103 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
104 else unreachable("No pattern match at pos 7");
105
106 unsigned derived_9 = 0;
107 if (lanes1 == 3) derived_9 = 0;
108 else if (lanes1 == 4) derived_9 = 1;
109 else if (lanes1 == 5) derived_9 = 2;
110 else if (lanes1 == 6) derived_9 = 3;
111 else unreachable("No pattern match at pos 9");
112
113 return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
114 } else {
115 unreachable("No matching state found in add_iadd_u32");
116 }
117 }
118
119 static inline unsigned
pan_pack_add_ld_var_flat(bi_clause * clause,bi_instruction * ins,bi_registers * regs)120 pan_pack_add_ld_var_flat(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
121 {
122 unsigned src0 = bi_get_src(ins, regs, 0);
123
124 unsigned vecsize = ins->vector_channels - 1;
125 assert(vecsize < 4);
126
127 unsigned register_format_temp = 0;
128 if (ins->format == nir_type_float32) register_format_temp = 0;
129 else if (ins->format == nir_type_float16) register_format_temp = 1;
130 else if (ins->format == nir_type_uint32) register_format_temp = 2;
131 else if (ins->format == nir_type_int32) register_format_temp = 3;
132 else unreachable("Could not pattern match register format");
133 unsigned register_format = register_format_temp;
134 assert(register_format < 8);
135
136 unsigned function = 3;
137
138 bi_write_staging_register(clause, ins);
139 if (register_format != 4) {
140 unsigned derived_10 = 0;
141 if ((register_format == 0) || (register_format == 1)) derived_10 = 0;
142 else if ((register_format == 2) || (register_format == 3)) derived_10 = 1;
143 else unreachable("No pattern match at pos 10");
144
145 unsigned derived_19 = 0;
146 if ((register_format == 0) || (register_format == 2)) derived_19 = 0;
147 else if ((register_format == 1) || (register_format == 3)) derived_19 = 1;
148 else unreachable("No pattern match at pos 19");
149
150 return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19);
151 } else if (register_format == 4) {
152 return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0);
153 } else {
154 unreachable("No matching state found in add_ld_var_flat");
155 }
156 }
157
158 static inline unsigned
pan_pack_add_store_i24(bi_clause * clause,bi_instruction * ins,bi_registers * regs)159 pan_pack_add_store_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
160 {
161 unsigned src0 = bi_get_src(ins, regs, 1);
162 unsigned src1 = bi_get_src(ins, regs, 2);
163
164 assert(ins->segment);
165 unsigned seg = ins->segment;
166 assert(seg < 8);
167
168 bi_read_staging_register(clause, ins);
169 return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6);
170 }
171
172 static inline unsigned
pan_pack_fma_clz_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)173 pan_pack_fma_clz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
174 {
175 unsigned src0 = bi_get_src(ins, regs, 0);
176 assert((1 << src0) & 0xfb);
177
178 unsigned mask = 0;
179
180 return 0x701fd0 | (src0 << 0) | (mask << 3);
181 }
182
183 static inline unsigned
pan_pack_fma_clz_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)184 pan_pack_fma_clz_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
185 {
186 unsigned src0 = bi_get_src(ins, regs, 0);
187 assert((1 << src0) & 0xfb);
188
189 unsigned mask = 0;
190
191 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
192 unsigned swz0_temp = 0;
193 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
194 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
195 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
196 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
197 else unreachable("Could not pattern match widen");
198 unsigned swz0 = swz0_temp;
199 assert(swz0 < 4);
200
201 return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4);
202 }
203
204 static inline unsigned
pan_pack_fma_popcount_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)205 pan_pack_fma_popcount_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
206 {
207 unsigned src0 = bi_get_src(ins, regs, 0);
208 assert((1 << src0) & 0xfb);
209
210 return 0x73c6d8 | (src0 << 0);
211 }
212
213 static inline unsigned
pan_pack_add_fatan_table_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)214 pan_pack_add_fatan_table_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
215 {
216 unsigned src0 = bi_get_src(ins, regs, 0);
217 assert((1 << src0) & 0xf7);
218 unsigned src1 = bi_get_src(ins, regs, 1);
219 assert((1 << src1) & 0xf7);
220
221 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
222 unsigned lane1_temp = 0;
223 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
224 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
225 else unreachable("Could not pattern match widen");
226 unsigned lane1 = lane1_temp;
227 assert(lane1 < 2);
228
229 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
230 unsigned lane0_temp = 0;
231 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
232 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
233 else unreachable("Could not pattern match widen");
234 unsigned lane0 = lane0_temp;
235 assert(lane0 < 2);
236
237 return 0x67900 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7);
238 }
239
240 static inline unsigned
pan_pack_fma_rrot_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)241 pan_pack_fma_rrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
242 {
243 unsigned src0 = bi_get_src(ins, regs, 0);
244 assert((1 << src0) & 0xfb);
245 unsigned src1 = bi_get_src(ins, regs, 1);
246 assert((1 << src1) & 0xfb);
247 unsigned src2 = bi_get_src(ins, regs, 2);
248
249 unsigned bytes2 = 0;
250
251 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
252 unsigned lane2_temp = 0;
253 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
254 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
255 else unreachable("Could not pattern match widen");
256 unsigned lane2 = lane2_temp;
257 assert(lane2 < 2);
258
259 unsigned result_word = 0;
260
261 return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
262 }
263
264 static inline unsigned
pan_pack_fma_isubb_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)265 pan_pack_fma_isubb_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
266 {
267 unsigned src0 = bi_get_src(ins, regs, 0);
268 assert((1 << src0) & 0xfb);
269 unsigned src1 = bi_get_src(ins, regs, 1);
270 assert((1 << src1) & 0xfb);
271 unsigned src2 = bi_get_src(ins, regs, 2);
272
273 return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
274 }
275
276 static inline unsigned
pan_pack_add_frcbrt_approx_b_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)277 pan_pack_add_frcbrt_approx_b_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
278 {
279 unsigned src0 = bi_get_src(ins, regs, 0);
280 assert((1 << src0) & 0xf7);
281
282 return 0x67ab0 | (src0 << 0);
283 }
284
285 static inline unsigned
pan_pack_fma_lshift_xor_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)286 pan_pack_fma_lshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
287 {
288 unsigned src0 = bi_get_src(ins, regs, 0);
289 assert((1 << src0) & 0xfb);
290 unsigned src1 = bi_get_src(ins, regs, 1);
291 assert((1 << src1) & 0xfb);
292 unsigned src2 = bi_get_src(ins, regs, 2);
293
294 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
295 unsigned lanes2_temp = 0;
296 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
297 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
298 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
299 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
300 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
301 else unreachable("Could not pattern match widen");
302 unsigned lanes2 = lanes2_temp;
303 assert(lanes2 < 8);
304
305 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
306 assert(not_result < 2);
307
308 if (lanes2 != 0) {
309 unsigned derived_9 = 0;
310 if (lanes2 == 1) derived_9 = 0;
311 else if (lanes2 == 2) derived_9 = 1;
312 else if (lanes2 == 3) derived_9 = 2;
313 else if (lanes2 == 4) derived_9 = 3;
314 else unreachable("No pattern match at pos 9");
315
316 return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
317 } else if (lanes2 == 0) {
318 return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13);
319 } else {
320 unreachable("No matching state found in fma_lshift_xor_v4i8");
321 }
322 }
323
324 static inline unsigned
pan_pack_add_texs_cube_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)325 pan_pack_add_texs_cube_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
326 {
327 unsigned src0 = bi_get_src(ins, regs, 0);
328 unsigned src1 = bi_get_src(ins, regs, 1);
329 unsigned src2 = bi_get_src(ins, regs, 2);
330
331 unsigned skip = ins->skip;
332 assert(skip < 2);
333
334 unsigned sampler_index = ins->texture.sampler_index;
335 unsigned texture_index = ins->texture.texture_index;
336 bi_write_staging_register(clause, ins);
337 return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12);
338 }
339
340 static inline unsigned
pan_pack_add_fround_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)341 pan_pack_add_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
342 {
343 unsigned src0 = bi_get_src(ins, regs, 0);
344
345 unsigned abs0 = ins->src_abs[0];
346 assert(abs0 < 2);
347
348 unsigned neg0 = ins->src_neg[0];
349 assert(neg0 < 2);
350
351 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
352 unsigned widen0_temp = 0;
353 if (widen0_sz == 32) widen0_temp = 1;
354 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
355 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
356 else unreachable("Could not pattern match widen");
357 unsigned widen0 = widen0_temp;
358 assert(widen0 < 4);
359
360 unsigned round = ins->roundmode;
361 assert(round < 4);
362
363 return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9);
364 }
365
366 static inline unsigned
pan_pack_add_fexp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)367 pan_pack_add_fexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
368 {
369 unsigned src0 = bi_get_src(ins, regs, 0);
370 assert((1 << src0) & 0xf7);
371 unsigned src1 = bi_get_src(ins, regs, 1);
372 assert((1 << src1) & 0xf7);
373
374 return 0x66ac0 | (src0 << 0) | (src1 << 3);
375 }
376
377 static inline unsigned
pan_pack_add_doorbell(bi_clause * clause,bi_instruction * ins,bi_registers * regs)378 pan_pack_add_doorbell(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
379 {
380 unsigned src0 = bi_get_src(ins, regs, 0);
381
382 return 0xd7860 | (src0 << 0);
383 }
384
385 static inline unsigned
pan_pack_add_logb_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)386 pan_pack_add_logb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
387 {
388 unsigned src0 = bi_get_src(ins, regs, 0);
389
390 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
391 unsigned swz0_temp = 0;
392 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
393 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
394 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
395 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
396 else unreachable("Could not pattern match widen");
397 unsigned swz0 = swz0_temp;
398 assert(swz0 < 4);
399
400 return 0x3d980 | (src0 << 0) | (swz0 << 3);
401 }
402
403 static inline unsigned
pan_pack_add_store_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)404 pan_pack_add_store_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
405 {
406 unsigned src0 = bi_get_src(ins, regs, 1);
407 unsigned src1 = bi_get_src(ins, regs, 2);
408
409 assert(ins->segment);
410 unsigned seg = ins->segment;
411 assert(seg < 8);
412
413 bi_read_staging_register(clause, ins);
414 return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6);
415 }
416
417 static inline unsigned
pan_pack_fma_arshift_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)418 pan_pack_fma_arshift_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
419 {
420 unsigned src0 = bi_get_src(ins, regs, 0);
421 assert((1 << src0) & 0xfb);
422 unsigned src1 = bi_get_src(ins, regs, 1);
423 assert((1 << src1) & 0x8);
424 unsigned src2 = bi_get_src(ins, regs, 2);
425
426 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
427 unsigned lanes2_temp = 0;
428 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
429 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
430 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
431 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
432 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
433 else unreachable("Could not pattern match widen");
434 unsigned lanes2 = lanes2_temp;
435 assert(lanes2 < 8);
436
437 if (lanes2 != 0) {
438 unsigned derived_9 = 0;
439 if (lanes2 == 1) derived_9 = 0;
440 else if (lanes2 == 2) derived_9 = 1;
441 else if (lanes2 == 3) derived_9 = 2;
442 else if (lanes2 == 4) derived_9 = 3;
443 else unreachable("No pattern match at pos 9");
444
445 return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
446 } else if (lanes2 == 0) {
447 return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6);
448 } else {
449 unreachable("No matching state found in fma_arshift_v4i8");
450 }
451 }
452
453 static inline unsigned
pan_pack_fma_vn_asst1_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)454 pan_pack_fma_vn_asst1_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
455 {
456 unsigned src0 = bi_get_src(ins, regs, 0);
457 assert((1 << src0) & 0xfb);
458 unsigned src1 = bi_get_src(ins, regs, 1);
459 assert((1 << src1) & 0xfb);
460 unsigned src2 = bi_get_src(ins, regs, 2);
461 unsigned src3 = bi_get_src(ins, regs, 3);
462
463 unsigned neg2 = ins->src_neg[2];
464 assert(neg2 < 2);
465
466 return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12);
467 }
468
469 static inline unsigned
pan_pack_add_ldexp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)470 pan_pack_add_ldexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
471 {
472 unsigned src0 = bi_get_src(ins, regs, 0);
473 unsigned src1 = bi_get_src(ins, regs, 1);
474
475 unsigned round = ins->roundmode;
476 assert(round < 8);
477
478 return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6);
479 }
480
481 static inline unsigned
pan_pack_add_isub_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)482 pan_pack_add_isub_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
483 {
484 unsigned src0 = bi_get_src(ins, regs, 0);
485 unsigned src1 = bi_get_src(ins, regs, 1);
486
487 unsigned saturate = 0;
488
489 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
490 unsigned lanes0_temp = 0;
491 if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
492 else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
493 else unreachable("Could not pattern match widen");
494 unsigned lanes0 = lanes0_temp;
495 assert(lanes0 < 2);
496
497 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
498 unsigned lanes1_temp = 0;
499 if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
500 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
501 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
502 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
503 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
504 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
505 else unreachable("Could not pattern match widen");
506 unsigned lanes1 = lanes1_temp;
507 assert(lanes1 < 8);
508
509 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
510 unsigned derived_7 = 0;
511 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
512 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
513 else unreachable("No pattern match at pos 7");
514
515 unsigned derived_9 = 0;
516 if (lanes1 == 0) derived_9 = 0;
517 else if (lanes1 == 1) derived_9 = 1;
518 else unreachable("No pattern match at pos 9");
519
520 unsigned derived_10 = 0;
521 if (lanes0 == 0) derived_10 = 0;
522 else if (lanes0 == 1) derived_10 = 1;
523 else unreachable("No pattern match at pos 10");
524
525 return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10);
526 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
527 unsigned derived_7 = 0;
528 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
529 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
530 else unreachable("No pattern match at pos 7");
531
532 unsigned derived_9 = 0;
533 if (lanes1 == 2) derived_9 = 0;
534 else if (lanes1 == 3) derived_9 = 1;
535 else unreachable("No pattern match at pos 9");
536
537 return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
538 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
539 unsigned derived_7 = 0;
540 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
541 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
542 else unreachable("No pattern match at pos 7");
543
544 unsigned derived_9 = 0;
545 if (lanes1 == 4) derived_9 = 0;
546 else if (lanes1 == 5) derived_9 = 1;
547 else unreachable("No pattern match at pos 9");
548
549 return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
550 } else {
551 unreachable("No matching state found in add_isub_v2u16");
552 }
553 }
554
555 static inline unsigned
pan_pack_add_branchc_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)556 pan_pack_add_branchc_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
557 {
558 unsigned src0 = bi_get_src(ins, regs, 0);
559 unsigned src1 = bi_get_src(ins, regs, 1);
560 assert((1 << src1) & 0xf7);
561
562 unsigned combine = 0;
563
564 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
565 unsigned lane0_temp = 0;
566 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
567 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
568 else unreachable("Could not pattern match widen");
569 unsigned lane0 = lane0_temp;
570 assert(lane0 < 2);
571
572 unsigned derived_9 = 0;
573 if (lane0 == 0) derived_9 = 0;
574 else if (lane0 == 1) derived_9 = 1;
575 else unreachable("No pattern match at pos 9");
576
577 unsigned derived_3 = 0;
578 if (lane0 == 1) derived_3 = 0;
579 else if (lane0 == 0) derived_3 = 1;
580 else unreachable("No pattern match at pos 3");
581
582 return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3);
583 }
584
585 static inline unsigned
pan_pack_fma_fround_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)586 pan_pack_fma_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
587 {
588 unsigned src0 = bi_get_src(ins, regs, 0);
589 assert((1 << src0) & 0xfb);
590
591 unsigned abs0 = ins->src_abs[0];
592 assert(abs0 < 2);
593
594 unsigned neg0 = ins->src_neg[0];
595 assert(neg0 < 2);
596
597 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
598 unsigned widen0_temp = 0;
599 if (widen0_sz == 32) widen0_temp = 1;
600 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
601 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
602 else unreachable("Could not pattern match widen");
603 unsigned widen0 = widen0_temp;
604 assert(widen0 < 4);
605
606 unsigned round = ins->roundmode;
607 assert(round < 8);
608
609 if (round != 4) {
610 unsigned derived_9 = 0;
611 if (round == 0) derived_9 = 0;
612 else if (round == 1) derived_9 = 1;
613 else if (round == 2) derived_9 = 2;
614 else if (round == 3) derived_9 = 3;
615 else unreachable("No pattern match at pos 9");
616
617 return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9);
618 } else if (round == 4) {
619 return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3);
620 } else {
621 unreachable("No matching state found in fma_fround_f32");
622 }
623 }
624
625 static inline unsigned
pan_pack_add_vn_asst2_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)626 pan_pack_add_vn_asst2_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
627 {
628 unsigned src0 = bi_get_src(ins, regs, 0);
629
630 unsigned neg0 = ins->src_neg[0];
631 assert(neg0 < 2);
632
633 return 0x3dfa0 | (src0 << 0) | (neg0 << 3);
634 }
635
636 static inline unsigned
pan_pack_add_fround_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)637 pan_pack_add_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
638 {
639 unsigned src0 = bi_get_src(ins, regs, 0);
640
641 unsigned abs0 = ins->src_abs[0];
642 assert(abs0 < 2);
643
644 unsigned neg0 = ins->src_neg[0];
645 assert(neg0 < 2);
646
647 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
648 unsigned swz0_temp = 0;
649 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
650 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
651 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
652 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
653 else unreachable("Could not pattern match widen");
654 unsigned swz0 = swz0_temp;
655 assert(swz0 < 4);
656
657 unsigned round = ins->roundmode;
658 assert(round < 4);
659
660 return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9);
661 }
662
663 static inline unsigned
pan_pack_fma_atom_c_return_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)664 pan_pack_fma_atom_c_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
665 {
666 unsigned src0 = bi_get_src(ins, regs, 0);
667 assert((1 << src0) & 0xf3);
668 unsigned src1 = bi_get_src(ins, regs, 1);
669 assert((1 << src1) & 0xf3);
670 unsigned src2 = bi_get_src(ins, regs, 2);
671 assert((1 << src2) & 0xf7);
672
673 unsigned atom_opc = 2;
674
675 return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
676 }
677
678 static inline unsigned
pan_pack_add_icmpi_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)679 pan_pack_add_icmpi_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
680 {
681 unsigned src0 = bi_get_src(ins, regs, 0);
682 unsigned src1 = bi_get_src(ins, regs, 1);
683
684 unsigned result_type = 1;
685
686 unsigned cmpf_table[] = {
687 ~0, ~0, ~0, 1, 0, ~0, ~0
688 };
689 unsigned cmpf = cmpf_table[ins->cond];
690 assert(cmpf < 2);
691
692 return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
693 }
694
695 static inline unsigned
pan_pack_fma_fma_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)696 pan_pack_fma_fma_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
697 {
698 unsigned src0 = bi_get_src(ins, regs, 0);
699 assert((1 << src0) & 0xfb);
700 unsigned src1 = bi_get_src(ins, regs, 1);
701 assert((1 << src1) & 0xfb);
702 unsigned src2 = bi_get_src(ins, regs, 2);
703
704 unsigned neg0 = ins->src_neg[0];
705 assert(neg0 < 2);
706
707 unsigned neg1 = ins->src_neg[1];
708 assert(neg1 < 2);
709
710 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
711 unsigned swz0_temp = 0;
712 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
713 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
714 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
715 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
716 else unreachable("Could not pattern match widen");
717 unsigned swz0 = swz0_temp;
718 assert(swz0 < 4);
719
720 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
721 unsigned swz1_temp = 0;
722 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
723 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
724 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
725 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
726 else unreachable("Could not pattern match widen");
727 unsigned swz1 = swz1_temp;
728 assert(swz1 < 4);
729
730 unsigned round = ins->roundmode;
731 assert(round < 4);
732
733 unsigned clamp = ins->outmod;
734 assert(clamp < 4);
735
736 unsigned neg2 = ins->src_neg[2];
737 assert(neg2 < 2);
738
739 unsigned swz2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
740 unsigned swz2_temp = 0;
741 if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) swz2_temp = 0;
742 else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swz2_temp = 1;
743 else if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swz2_temp = 2;
744 else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) swz2_temp = 3;
745 else unreachable("Could not pattern match widen");
746 unsigned swz2 = swz2_temp;
747 assert(swz2 < 4);
748
749 unsigned derived_17 = 0;
750 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0;
751 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1;
752 else unreachable("No pattern match at pos 17");
753
754 return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17);
755 }
756
757 static inline unsigned
pan_pack_fma_arshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)758 pan_pack_fma_arshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
759 {
760 unsigned src0 = bi_get_src(ins, regs, 0);
761 assert((1 << src0) & 0xfb);
762 unsigned src1 = bi_get_src(ins, regs, 1);
763 assert((1 << src1) & 0xfb);
764 unsigned src2 = bi_get_src(ins, regs, 2);
765
766 unsigned bytes2 = 0;
767
768 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
769 unsigned lane2_temp = 0;
770 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
771 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
772 else unreachable("Could not pattern match widen");
773 unsigned lane2 = lane2_temp;
774 assert(lane2 < 2);
775
776 unsigned result_word = 0;
777
778 return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
779 }
780
781 static inline unsigned
pan_pack_fma_fmul_slice_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)782 pan_pack_fma_fmul_slice_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
783 {
784 unsigned src0 = bi_get_src(ins, regs, 0);
785 assert((1 << src0) & 0xfb);
786 unsigned src1 = bi_get_src(ins, regs, 1);
787 assert((1 << src1) & 0xfb);
788
789 return 0x70cb40 | (src0 << 0) | (src1 << 3);
790 }
791
792 static inline unsigned
pan_pack_add_ld_var_flat_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)793 pan_pack_add_ld_var_flat_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
794 {
795
796 unsigned vecsize = ins->vector_channels - 1;
797 assert(vecsize < 4);
798
799 unsigned register_format_temp = 0;
800 if (ins->format == nir_type_float32) register_format_temp = 0;
801 else if (ins->format == nir_type_float16) register_format_temp = 1;
802 else if (ins->format == nir_type_uint32) register_format_temp = 2;
803 else if (ins->format == nir_type_int32) register_format_temp = 3;
804 else unreachable("Could not pattern match register format");
805 unsigned register_format = register_format_temp;
806 assert(register_format < 8);
807
808 unsigned function = 3;
809
810 unsigned index = bi_get_immediate(ins, 0);
811 bi_write_staging_register(clause, ins);
812 if (register_format != 4) {
813 unsigned derived_10 = 0;
814 if ((register_format == 0) || (register_format == 1)) derived_10 = 0;
815 else if ((register_format == 2) || (register_format == 3)) derived_10 = 1;
816 else unreachable("No pattern match at pos 10");
817
818 unsigned derived_19 = 0;
819 if ((register_format == 0) || (register_format == 2)) derived_19 = 0;
820 else if ((register_format == 1) || (register_format == 3)) derived_19 = 1;
821 else unreachable("No pattern match at pos 19");
822
823 return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19);
824 } else if (register_format == 4) {
825 return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3);
826 } else {
827 unreachable("No matching state found in add_ld_var_flat_imm");
828 }
829 }
830
831 static inline unsigned
pan_pack_fma_csel_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)832 pan_pack_fma_csel_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
833 {
834 unsigned src0 = bi_get_src(ins, regs, 0);
835 assert((1 << src0) & 0xfb);
836 unsigned src1 = bi_get_src(ins, regs, 1);
837 assert((1 << src1) & 0xfb);
838 unsigned src2 = bi_get_src(ins, regs, 2);
839 unsigned src3 = bi_get_src(ins, regs, 3);
840
841 unsigned cmpf_table[] = {
842 ~0, 4, 5, 2, 1, 0, 3
843 };
844 unsigned cmpf = cmpf_table[ins->cond];
845 assert(cmpf < 8);
846
847 if ((cmpf == 4) || (cmpf == 5)) {
848 { unsigned temp = src0; src0 = src1; src1 = temp; }
849 if (cmpf == 5) cmpf = 2;
850 else if (cmpf == 4) cmpf = 1;
851 }
852
853 if (cmpf == 3) {
854 { unsigned temp = src2; src2 = src3; src3 = temp; }
855 if (cmpf == 3) cmpf = 0;
856 }
857
858 unsigned derived_12 = 0;
859 if (cmpf == 0) derived_12 = 0;
860 else if (cmpf == 1) derived_12 = 1;
861 else if (cmpf == 2) derived_12 = 2;
862 else unreachable("No pattern match at pos 12");
863
864 return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
865 }
866
867 static inline unsigned
pan_pack_fma_csel_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)868 pan_pack_fma_csel_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
869 {
870 unsigned src0 = bi_get_src(ins, regs, 0);
871 assert((1 << src0) & 0xfb);
872 unsigned src1 = bi_get_src(ins, regs, 1);
873 assert((1 << src1) & 0xfb);
874 unsigned src2 = bi_get_src(ins, regs, 2);
875 unsigned src3 = bi_get_src(ins, regs, 3);
876
877 unsigned cmpf_table[] = {
878 ~0, ~0, ~0, ~0, ~0, 0, 1
879 };
880 unsigned cmpf = cmpf_table[ins->cond];
881 assert(cmpf < 2);
882
883 if (cmpf == 1) {
884 { unsigned temp = src2; src2 = src3; src3 = temp; }
885 if (cmpf == 1) cmpf = 0;
886 }
887
888 unsigned derived_12 = 0;
889 if (cmpf == 0) derived_12 = 3;
890 else unreachable("No pattern match at pos 12");
891
892 return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
893 }
894
895 static inline unsigned
pan_pack_add_load_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)896 pan_pack_add_load_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
897 {
898 unsigned src0 = bi_get_src(ins, regs, 0);
899 unsigned src1 = bi_get_src(ins, regs, 1);
900
901 assert(ins->segment);
902 unsigned seg = ins->segment;
903 assert(seg < 8);
904
905 unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
906 unsigned lane_temp = 0;
907 if (lane_sz == 32) lane_temp = 0;
908 else if (lane_sz == 64) lane_temp = 1;
909 else unreachable("Could not pattern match widen");
910 unsigned lane = lane_temp;
911 assert(lane < 2);
912
913 ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
914 bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
915 unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
916 assert(extend < 4);
917
918 bi_write_staging_register(clause, ins);
919 if ((extend == 0) && (lane == 0)) {
920 return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
921 } else if ((extend != 0) && (lane == 1)) {
922 unsigned derived_9 = 0;
923 if (extend == 1) derived_9 = 0;
924 else if (extend == 2) derived_9 = 1;
925 else unreachable("No pattern match at pos 9");
926
927 return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
928 } else {
929 unreachable("No matching state found in add_load_i32");
930 }
931 }
932
933 static inline unsigned
pan_pack_add_st_tile(bi_clause * clause,bi_instruction * ins,bi_registers * regs)934 pan_pack_add_st_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
935 {
936 unsigned src0 = bi_get_src(ins, regs, 1);
937 unsigned src1 = bi_get_src(ins, regs, 2);
938 unsigned src2 = bi_get_src(ins, regs, 3);
939 assert((1 << src2) & 0xf7);
940
941 unsigned vecsize = ins->vector_channels - 1;
942 assert(vecsize < 4);
943
944 bi_read_staging_register(clause, ins);
945 return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
946 }
947
948 static inline unsigned
pan_pack_fma_rshift_or_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)949 pan_pack_fma_rshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
950 {
951 unsigned src0 = bi_get_src(ins, regs, 0);
952 assert((1 << src0) & 0xfb);
953 unsigned src1 = bi_get_src(ins, regs, 1);
954 assert((1 << src1) & 0xfb);
955 unsigned src2 = bi_get_src(ins, regs, 2);
956
957 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
958 unsigned lanes2_temp = 0;
959 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
960 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
961 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
962 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
963 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
964 else unreachable("Could not pattern match widen");
965 unsigned lanes2 = lanes2_temp;
966 assert(lanes2 < 8);
967
968 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
969 assert(not1 < 2);
970
971 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
972 assert(not_result < 2);
973
974 if (lanes2 != 0) {
975 unsigned derived_9 = 0;
976 if (lanes2 == 1) derived_9 = 0;
977 else if (lanes2 == 2) derived_9 = 1;
978 else if (lanes2 == 3) derived_9 = 2;
979 else if (lanes2 == 4) derived_9 = 3;
980 else unreachable("No pattern match at pos 9");
981
982 return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
983 } else if (lanes2 == 0) {
984 return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
985 } else {
986 unreachable("No matching state found in fma_rshift_or_v4i8");
987 }
988 }
989
990 static inline unsigned
pan_pack_fma_csel_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)991 pan_pack_fma_csel_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
992 {
993 unsigned src0 = bi_get_src(ins, regs, 0);
994 assert((1 << src0) & 0xfb);
995 unsigned src1 = bi_get_src(ins, regs, 1);
996 assert((1 << src1) & 0xfb);
997 unsigned src2 = bi_get_src(ins, regs, 2);
998 unsigned src3 = bi_get_src(ins, regs, 3);
999
1000 unsigned cmpf_table[] = {
1001 ~0, 2, 3, 1, 0, ~0, ~0
1002 };
1003 unsigned cmpf = cmpf_table[ins->cond];
1004 assert(cmpf < 4);
1005
1006 if ((cmpf == 2) || (cmpf == 3)) {
1007 { unsigned temp = src0; src0 = src1; src1 = temp; }
1008 if (cmpf == 3) cmpf = 1;
1009 else if (cmpf == 2) cmpf = 0;
1010 }
1011
1012 unsigned derived_12 = 0;
1013 if (cmpf == 0) derived_12 = 0;
1014 else if (cmpf == 1) derived_12 = 1;
1015 else unreachable("No pattern match at pos 12");
1016
1017 return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
1018 }
1019
1020 static inline unsigned
pan_pack_add_mkvec_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1021 pan_pack_add_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1022 {
1023 unsigned src0 = bi_get_src(ins, regs, 0);
1024 unsigned src1 = bi_get_src(ins, regs, 1);
1025
1026 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1027 unsigned lane0_temp = 0;
1028 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
1029 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
1030 else unreachable("Could not pattern match widen");
1031 unsigned lane0 = lane0_temp;
1032 assert(lane0 < 2);
1033
1034 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1035 unsigned lane1_temp = 0;
1036 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
1037 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
1038 else unreachable("Could not pattern match widen");
1039 unsigned lane1 = lane1_temp;
1040 assert(lane1 < 2);
1041
1042 return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7);
1043 }
1044
1045 static inline unsigned
pan_pack_fma_atom_pre_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1046 pan_pack_fma_atom_pre_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1047 {
1048 unsigned src0 = bi_get_src(ins, regs, 0);
1049 assert((1 << src0) & 0xfb);
1050 unsigned src1 = bi_get_src(ins, regs, 1);
1051 assert((1 << src1) & 0xfb);
1052 unsigned src2 = bi_get_src(ins, regs, 2);
1053
1054 unsigned atom_opc = 2;
1055
1056 return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1057 }
1058
1059 static inline unsigned
pan_pack_fma_shaddxl_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1060 pan_pack_fma_shaddxl_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1061 {
1062 unsigned src0 = bi_get_src(ins, regs, 0);
1063 assert((1 << src0) & 0xfb);
1064 unsigned src1 = bi_get_src(ins, regs, 1);
1065 assert((1 << src1) & 0xfb);
1066
1067 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1068 unsigned lane1_temp = 0;
1069 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
1070 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
1071 else if (lane1_sz == 32) lane1_temp = 2;
1072 else unreachable("Could not pattern match widen");
1073 unsigned lane1 = lane1_temp;
1074 assert(lane1 < 4);
1075
1076 unsigned shift = 0;
1077 return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6);
1078 }
1079
1080 static inline unsigned
pan_pack_add_branch_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1081 pan_pack_add_branch_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1082 {
1083 unsigned src0 = bi_get_src(ins, regs, 0);
1084 unsigned src1 = bi_get_src(ins, regs, 1);
1085 unsigned src2 = bi_get_src(ins, regs, 2);
1086 assert((1 << src2) & 0xf7);
1087
1088 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1089 unsigned widen0_temp = 0;
1090 if (widen0_sz == 32) widen0_temp = 0;
1091 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1092 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1093 else unreachable("Could not pattern match widen");
1094 unsigned widen0 = widen0_temp;
1095 assert(widen0 < 4);
1096
1097 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1098 unsigned widen1_temp = 0;
1099 if (widen1_sz == 32) widen1_temp = 0;
1100 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1101 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1102 else unreachable("Could not pattern match widen");
1103 unsigned widen1 = widen1_temp;
1104 assert(widen1 < 4);
1105
1106 unsigned cmpf_table[] = {
1107 ~0, ~0, ~0, ~0, ~0, 0, 1
1108 };
1109 unsigned cmpf = cmpf_table[ins->cond];
1110 assert(cmpf < 2);
1111
1112 if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) {
1113 { unsigned temp = src0; src0 = src1; src1 = temp; }
1114 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1115 }
1116
1117 unsigned derived_12 = 0;
1118 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
1119 else unreachable("No pattern match at pos 12");
1120
1121 unsigned derived_9 = 0;
1122 if ((src0 == src1) && (cmpf == 0)) derived_9 = 1;
1123 else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4;
1124 else unreachable("No pattern match at pos 9");
1125
1126 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
1127 }
1128
1129 static inline unsigned
pan_pack_fma_quiet_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1130 pan_pack_fma_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1131 {
1132 unsigned src0 = bi_get_src(ins, regs, 0);
1133 assert((1 << src0) & 0xfb);
1134
1135 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1136 unsigned swz0_temp = 0;
1137 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1138 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1139 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1140 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1141 else unreachable("Could not pattern match widen");
1142 unsigned swz0 = swz0_temp;
1143 assert(swz0 < 4);
1144
1145 return 0x701900 | (src0 << 0) | (swz0 << 4);
1146 }
1147
1148 static inline unsigned
pan_pack_fma_lshift_or_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1149 pan_pack_fma_lshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1150 {
1151 unsigned src0 = bi_get_src(ins, regs, 0);
1152 assert((1 << src0) & 0xfb);
1153 unsigned src1 = bi_get_src(ins, regs, 1);
1154 assert((1 << src1) & 0xfb);
1155 unsigned src2 = bi_get_src(ins, regs, 2);
1156
1157 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1158 unsigned lanes2_temp = 0;
1159 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
1160 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
1161 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
1162 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
1163 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
1164 else unreachable("Could not pattern match widen");
1165 unsigned lanes2 = lanes2_temp;
1166 assert(lanes2 < 8);
1167
1168 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
1169 assert(not1 < 2);
1170
1171 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
1172 assert(not_result < 2);
1173
1174 if (lanes2 != 0) {
1175 unsigned derived_9 = 0;
1176 if (lanes2 == 1) derived_9 = 0;
1177 else if (lanes2 == 2) derived_9 = 1;
1178 else if (lanes2 == 3) derived_9 = 2;
1179 else if (lanes2 == 4) derived_9 = 3;
1180 else unreachable("No pattern match at pos 9");
1181
1182 return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
1183 } else if (lanes2 == 0) {
1184 return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
1185 } else {
1186 unreachable("No matching state found in fma_lshift_or_v4i8");
1187 }
1188 }
1189
1190 static inline unsigned
pan_pack_add_ld_var_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1191 pan_pack_add_ld_var_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1192 {
1193 unsigned src0 = bi_get_src(ins, regs, 1);
1194
1195 unsigned vecsize = ins->vector_channels - 1;
1196 assert(vecsize < 4);
1197
1198 unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
1199 assert(update < 4);
1200
1201 unsigned register_format_temp = 0;
1202 if (ins->format == nir_type_float32) register_format_temp = 0;
1203 else if (ins->format == nir_type_float16) register_format_temp = 1;
1204 else unreachable("Could not pattern match register format");
1205 unsigned register_format = register_format_temp;
1206 assert(register_format < 4);
1207
1208 unsigned sample = ins->load_vary.interp_mode;
1209 assert(sample < 8);
1210
1211 unsigned index = bi_get_immediate(ins, 0);
1212 bi_write_staging_register(clause, ins);
1213 if (register_format != 2) {
1214 unsigned derived_19 = 0;
1215 if (register_format == 0) derived_19 = 0;
1216 else if (register_format == 1) derived_19 = 1;
1217 else unreachable("No pattern match at pos 19");
1218
1219 unsigned derived_10 = 0;
1220 if ((sample == 0) && (update == 0)) derived_10 = 0;
1221 else if ((sample == 1) && (update == 0)) derived_10 = 1;
1222 else if ((sample == 2) && (update == 0)) derived_10 = 2;
1223 else if ((sample == 3) && (update == 0)) derived_10 = 3;
1224 else if ((sample == 4) && (update == 1)) derived_10 = 4;
1225 else if ((sample == 0) && (update == 2)) derived_10 = 8;
1226 else if ((sample == 1) && (update == 2)) derived_10 = 9;
1227 else if ((sample == 0) && (update == 3)) derived_10 = 10;
1228 else if ((sample == 1) && (update == 3)) derived_10 = 11;
1229 else if ((sample == 2) && (update == 3)) derived_10 = 12;
1230 else if ((sample == 3) && (update == 3)) derived_10 = 13;
1231 else unreachable("No pattern match at pos 10");
1232
1233 return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10);
1234 } else if (register_format == 2) {
1235 unsigned derived_10 = 0;
1236 if ((sample == 0) && (update == 0)) derived_10 = 0;
1237 else if ((sample == 1) && (update == 0)) derived_10 = 1;
1238 else if ((sample == 2) && (update == 0)) derived_10 = 2;
1239 else if ((sample == 3) && (update == 0)) derived_10 = 3;
1240 else if ((sample == 4) && (update == 1)) derived_10 = 4;
1241 else if ((sample == 0) && (update == 2)) derived_10 = 8;
1242 else if ((sample == 1) && (update == 2)) derived_10 = 9;
1243 else if ((sample == 0) && (update == 3)) derived_10 = 10;
1244 else if ((sample == 1) && (update == 3)) derived_10 = 11;
1245 else if ((sample == 2) && (update == 3)) derived_10 = 12;
1246 else if ((sample == 3) && (update == 3)) derived_10 = 13;
1247 else unreachable("No pattern match at pos 10");
1248
1249 return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10);
1250 } else {
1251 unreachable("No matching state found in add_ld_var_imm");
1252 }
1253 }
1254
1255 static inline unsigned
pan_pack_fma_atom_c_return_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1256 pan_pack_fma_atom_c_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1257 {
1258 unsigned src0 = bi_get_src(ins, regs, 0);
1259 assert((1 << src0) & 0xf3);
1260 unsigned src1 = bi_get_src(ins, regs, 1);
1261 assert((1 << src1) & 0xf3);
1262 unsigned src2 = bi_get_src(ins, regs, 2);
1263 assert((1 << src2) & 0xf7);
1264
1265 unsigned atom_opc = 2;
1266
1267 return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1268 }
1269
1270 static inline unsigned
pan_pack_add_barrier(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1271 pan_pack_add_barrier(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1272 {
1273
1274 return 0xd7874;
1275 }
1276
1277 static inline unsigned
pan_pack_add_quiet_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1278 pan_pack_add_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1279 {
1280 unsigned src0 = bi_get_src(ins, regs, 0);
1281
1282 return 0x3d970 | (src0 << 0);
1283 }
1284
1285 static inline unsigned
pan_pack_fma_atom_c_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1286 pan_pack_fma_atom_c_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1287 {
1288 unsigned src0 = bi_get_src(ins, regs, 0);
1289 assert((1 << src0) & 0xf3);
1290 unsigned src1 = bi_get_src(ins, regs, 1);
1291 assert((1 << src1) & 0xf3);
1292 unsigned src2 = bi_get_src(ins, regs, 2);
1293 assert((1 << src2) & 0xf7);
1294
1295 unsigned atom_opc = 2;
1296
1297 return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
1298 }
1299
1300 static inline unsigned
pan_pack_add_v2s8_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1301 pan_pack_add_v2s8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1302 {
1303 unsigned src0 = bi_get_src(ins, regs, 0);
1304
1305 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1306 unsigned swz0_temp = 0;
1307 if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1308 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1309 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
1310 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
1311 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
1312 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
1313 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
1314 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
1315 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
1316 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
1317 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
1318 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
1319 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
1320 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
1321 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
1322 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
1323 else unreachable("Could not pattern match widen");
1324 unsigned swz0 = swz0_temp;
1325 assert(swz0 < 16);
1326
1327 return 0x3c800 | (src0 << 0) | (swz0 << 4);
1328 }
1329
1330 static inline unsigned
pan_pack_add_fsincos_offset_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1331 pan_pack_add_fsincos_offset_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1332 {
1333 unsigned src0 = bi_get_src(ins, regs, 0);
1334 assert((1 << src0) & 0xf7);
1335
1336 unsigned scale = 0;
1337
1338 return 0x67aa0 | (src0 << 0) | (scale << 3);
1339 }
1340
1341 static inline unsigned
pan_pack_add_lea_attr(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1342 pan_pack_add_lea_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1343 {
1344 unsigned src0 = bi_get_src(ins, regs, 0);
1345 unsigned src1 = bi_get_src(ins, regs, 1);
1346 unsigned src2 = bi_get_src(ins, regs, 2);
1347
1348 unsigned register_format_temp = 0;
1349 if (ins->format == nir_type_float16) register_format_temp = 0;
1350 else if (ins->format == nir_type_float32) register_format_temp = 1;
1351 else if (ins->format == nir_type_int32) register_format_temp = 2;
1352 else if (ins->format == nir_type_uint32) register_format_temp = 3;
1353 else if (ins->format == nir_type_int16) register_format_temp = 4;
1354 else if (ins->format == nir_type_uint16) register_format_temp = 5;
1355 else if (ins->format == nir_type_float64) register_format_temp = 6;
1356 else if (ins->format == nir_type_int64) register_format_temp = 7;
1357 else unreachable("Could not pattern match register format");
1358 unsigned register_format = register_format_temp;
1359 assert(register_format < 16);
1360
1361 bi_write_staging_register(clause, ins);
1362 if (register_format != 8) {
1363 unsigned derived_11 = 0;
1364 if (register_format == 0) derived_11 = 0;
1365 else if (register_format == 1) derived_11 = 1;
1366 else if (register_format == 2) derived_11 = 2;
1367 else if (register_format == 3) derived_11 = 3;
1368 else if (register_format == 4) derived_11 = 4;
1369 else if (register_format == 5) derived_11 = 5;
1370 else if (register_format == 6) derived_11 = 6;
1371 else if (register_format == 7) derived_11 = 7;
1372 else unreachable("No pattern match at pos 11");
1373
1374 return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11);
1375 } else if (register_format == 8) {
1376 return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6);
1377 } else {
1378 unreachable("No matching state found in add_lea_attr");
1379 }
1380 }
1381
1382 static inline unsigned
pan_pack_add_fadd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1383 pan_pack_add_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1384 {
1385 unsigned src0 = bi_get_src(ins, regs, 0);
1386 unsigned src1 = bi_get_src(ins, regs, 1);
1387
1388 unsigned round = ins->roundmode;
1389 assert(round < 8);
1390
1391 unsigned abs1 = ins->src_abs[1];
1392 assert(abs1 < 2);
1393
1394 unsigned neg0 = ins->src_neg[0];
1395 assert(neg0 < 2);
1396
1397 unsigned neg1 = ins->src_neg[1];
1398 assert(neg1 < 2);
1399
1400 unsigned clamp = ins->outmod;
1401 assert(clamp < 4);
1402
1403 unsigned abs0 = ins->src_abs[0];
1404 assert(abs0 < 2);
1405
1406 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1407 unsigned widen0_temp = 0;
1408 if (widen0_sz == 32) widen0_temp = 0;
1409 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1410 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1411 else unreachable("Could not pattern match widen");
1412 unsigned widen0 = widen0_temp;
1413 assert(widen0 < 4);
1414
1415 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1416 unsigned widen1_temp = 0;
1417 if (widen1_sz == 32) widen1_temp = 0;
1418 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1419 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1420 else unreachable("Could not pattern match widen");
1421 unsigned widen1 = widen1_temp;
1422 assert(widen1 < 4);
1423
1424 if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) {
1425 { unsigned temp = src0; src0 = src1; src1 = temp; }
1426 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
1427 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
1428 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1429 }
1430
1431 if (round != 4) {
1432 unsigned derived_13 = 0;
1433 if (round == 0) derived_13 = 0;
1434 else if (round == 1) derived_13 = 1;
1435 else if (round == 2) derived_13 = 2;
1436 else if (round == 3) derived_13 = 3;
1437 else unreachable("No pattern match at pos 13");
1438
1439 unsigned derived_9 = 0;
1440 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
1441 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
1442 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
1443 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
1444 else unreachable("No pattern match at pos 9");
1445
1446 return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9);
1447 } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) {
1448 return 0x75200 | (src0 << 0) | (src1 << 3);
1449 } else {
1450 unreachable("No matching state found in add_fadd_f32");
1451 }
1452 }
1453
1454 static inline unsigned
pan_pack_fma_atom_post_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1455 pan_pack_fma_atom_post_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1456 {
1457 unsigned src0 = bi_get_src(ins, regs, 0);
1458 assert((1 << src0) & 0xfb);
1459 unsigned src1 = bi_get_src(ins, regs, 1);
1460 assert((1 << src1) & 0xfb);
1461
1462 unsigned atom_opc = 2;
1463
1464 return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
1465 }
1466
1467 static inline unsigned
pan_pack_fma_seg_sub(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1468 pan_pack_fma_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1469 {
1470 unsigned src0 = bi_get_src(ins, regs, 0);
1471 assert((1 << src0) & 0xfb);
1472
1473 assert(ins->segment);
1474 unsigned seg = ins->segment;
1475 assert(seg < 8);
1476
1477 unsigned preserve_null = 0;
1478
1479 return 0x701540 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
1480 }
1481
1482 static inline unsigned
pan_pack_add_seg_sub(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1483 pan_pack_add_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1484 {
1485 unsigned src0 = bi_get_src(ins, regs, 0);
1486
1487 assert(ins->segment);
1488 unsigned seg = ins->segment;
1489 assert(seg < 8);
1490
1491 unsigned preserve_null = 0;
1492
1493 return 0x3d540 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
1494 }
1495
1496 static inline unsigned
pan_pack_fma_frexpe_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1497 pan_pack_fma_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1498 {
1499 unsigned src0 = bi_get_src(ins, regs, 0);
1500 assert((1 << src0) & 0xfb);
1501
1502 unsigned neg = ins->src_neg[0];
1503 assert(neg < 2);
1504
1505 unsigned sqrt = 0;
1506
1507 unsigned log = 1;
1508
1509 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1510 unsigned widen0_temp = 0;
1511 if (widen0_sz == 32) widen0_temp = 1;
1512 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
1513 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
1514 else unreachable("Could not pattern match widen");
1515 unsigned widen0 = widen0_temp;
1516 assert(widen0 < 4);
1517
1518 if (log == 0) {
1519 return 0x701c20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3);
1520 } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
1521 return 0x701e20 | (src0 << 0) | (widen0 << 3);
1522 } else {
1523 unreachable("No matching state found in fma_frexpe_f32");
1524 }
1525 }
1526
1527 static inline unsigned
pan_pack_add_frsq_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1528 pan_pack_add_frsq_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1529 {
1530 unsigned src0 = bi_get_src(ins, regs, 0);
1531 assert((1 << src0) & 0xf7);
1532
1533 unsigned neg = ins->src_neg[0];
1534 assert(neg < 2);
1535
1536 unsigned abs0 = ins->src_abs[0];
1537 assert(abs0 < 2);
1538
1539 unsigned divzero = 0;
1540
1541 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1542 unsigned lane0_temp = 0;
1543 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
1544 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
1545 else unreachable("Could not pattern match widen");
1546 unsigned lane0 = lane0_temp;
1547 assert(lane0 < 2);
1548
1549 return 0x67280 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8);
1550 }
1551
1552 static inline unsigned
pan_pack_fma_lshift_and_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1553 pan_pack_fma_lshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1554 {
1555 unsigned src0 = bi_get_src(ins, regs, 0);
1556 assert((1 << src0) & 0xfb);
1557 unsigned src1 = bi_get_src(ins, regs, 1);
1558 assert((1 << src1) & 0xfb);
1559 unsigned src2 = bi_get_src(ins, regs, 2);
1560
1561 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1562 unsigned lanes2_temp = 0;
1563 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
1564 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
1565 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
1566 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
1567 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
1568 else unreachable("Could not pattern match widen");
1569 unsigned lanes2 = lanes2_temp;
1570 assert(lanes2 < 8);
1571
1572 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
1573 assert(not1 < 2);
1574
1575 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
1576 assert(not_result < 2);
1577
1578 if (lanes2 != 0) {
1579 unsigned derived_9 = 0;
1580 if (lanes2 == 1) derived_9 = 0;
1581 else if (lanes2 == 2) derived_9 = 1;
1582 else if (lanes2 == 3) derived_9 = 2;
1583 else if (lanes2 == 4) derived_9 = 3;
1584 else unreachable("No pattern match at pos 9");
1585
1586 return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
1587 } else if (lanes2 == 0) {
1588 return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
1589 } else {
1590 unreachable("No matching state found in fma_lshift_and_v4i8");
1591 }
1592 }
1593
1594 static inline unsigned
pan_pack_add_branch_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1595 pan_pack_add_branch_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1596 {
1597 unsigned src0 = bi_get_src(ins, regs, 0);
1598 unsigned src1 = bi_get_src(ins, regs, 1);
1599 unsigned src2 = bi_get_src(ins, regs, 2);
1600 assert((1 << src2) & 0xf7);
1601
1602 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1603 unsigned widen0_temp = 0;
1604 if (widen0_sz == 32) widen0_temp = 0;
1605 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
1606 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
1607 else unreachable("Could not pattern match widen");
1608 unsigned widen0 = widen0_temp;
1609 assert(widen0 < 4);
1610
1611 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1612 unsigned widen1_temp = 0;
1613 if (widen1_sz == 32) widen1_temp = 0;
1614 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
1615 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
1616 else unreachable("Could not pattern match widen");
1617 unsigned widen1 = widen1_temp;
1618 assert(widen1 < 4);
1619
1620 unsigned cmpf_table[] = {
1621 ~0, 4, 5, 2, 1, 0, 3
1622 };
1623 unsigned cmpf = cmpf_table[ins->cond];
1624 assert(cmpf < 8);
1625
1626 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) {
1627 { unsigned temp = src0; src0 = src1; src1 = temp; }
1628 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
1629 if (cmpf == 1) cmpf = 4;
1630 else if (cmpf == 5) cmpf = 2;
1631 else if (cmpf == 4) cmpf = 1;
1632 else if (cmpf == 2) cmpf = 5;
1633 }
1634
1635 unsigned derived_12 = 0;
1636 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
1637 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
1638 else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3;
1639 else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4;
1640 else unreachable("No pattern match at pos 12");
1641
1642 unsigned derived_9 = 0;
1643 if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4;
1644 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5;
1645 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6;
1646 else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7;
1647 else unreachable("No pattern match at pos 9");
1648
1649 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
1650 }
1651
1652 static inline unsigned
pan_pack_add_clper_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1653 pan_pack_add_clper_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1654 {
1655 unsigned src0 = bi_get_src(ins, regs, 0);
1656 assert((1 << src0) & 0x7);
1657 unsigned src1 = bi_get_src(ins, regs, 1);
1658
1659 unsigned lane_op = 0;
1660
1661 unsigned subgroup = 1;
1662
1663 unsigned inactive_result = 0;
1664
1665 return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10);
1666 }
1667
1668 static inline unsigned
pan_pack_add_v2s16_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1669 pan_pack_add_v2s16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1670 {
1671 unsigned src0 = bi_get_src(ins, regs, 0);
1672
1673 unsigned round = ins->roundmode;
1674 assert(round < 8);
1675
1676 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1677 unsigned swz0_temp = 0;
1678 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1679 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1680 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1681 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1682 else unreachable("Could not pattern match widen");
1683 unsigned swz0 = swz0_temp;
1684 assert(swz0 < 4);
1685
1686 if (round != 4) {
1687 unsigned derived_4 = 0;
1688 if (round == 0) derived_4 = 0;
1689 else if (round == 1) derived_4 = 1;
1690 else if (round == 2) derived_4 = 2;
1691 else if (round == 3) derived_4 = 3;
1692 else unreachable("No pattern match at pos 4");
1693
1694 return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
1695 } else if (round == 4) {
1696 return 0x3cb00 | (src0 << 0) | (swz0 << 4);
1697 } else {
1698 unreachable("No matching state found in add_v2s16_to_v2f16");
1699 }
1700 }
1701
1702 static inline unsigned
pan_pack_fma_atom_c1_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1703 pan_pack_fma_atom_c1_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1704 {
1705 unsigned src0 = bi_get_src(ins, regs, 0);
1706 assert((1 << src0) & 0xf3);
1707 unsigned src1 = bi_get_src(ins, regs, 1);
1708 assert((1 << src1) & 0xf3);
1709
1710 unsigned atom_opc = 2;
1711
1712 return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
1713 }
1714
1715 static inline unsigned
pan_pack_add_axchg_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1716 pan_pack_add_axchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1717 {
1718 unsigned src0 = bi_get_src(ins, regs, 1);
1719 unsigned src1 = bi_get_src(ins, regs, 2);
1720
1721 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
1722 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
1723 assert(seg < 2);
1724
1725 bi_read_staging_register(clause, ins);
1726 assert(ins->src[0] == ins->dest);
1727 return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
1728 }
1729
1730 static inline unsigned
pan_pack_fma_vn_asst1_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1731 pan_pack_fma_vn_asst1_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1732 {
1733 unsigned src0 = bi_get_src(ins, regs, 0);
1734 assert((1 << src0) & 0xfb);
1735 unsigned src1 = bi_get_src(ins, regs, 1);
1736 assert((1 << src1) & 0xfb);
1737 unsigned src2 = bi_get_src(ins, regs, 2);
1738
1739 unsigned h = 0;
1740
1741 unsigned l = 0;
1742
1743 unsigned neg2 = ins->src_neg[2];
1744 assert(neg2 < 2);
1745
1746 return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11);
1747 }
1748
1749 static inline unsigned
pan_pack_fma_fma_rscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1750 pan_pack_fma_fma_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1751 {
1752 unsigned src0 = bi_get_src(ins, regs, 0);
1753 assert((1 << src0) & 0xfb);
1754 unsigned src1 = bi_get_src(ins, regs, 1);
1755 assert((1 << src1) & 0xfb);
1756 unsigned src2 = bi_get_src(ins, regs, 2);
1757 unsigned src3 = bi_get_src(ins, regs, 3);
1758
1759 assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);
1760 unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0;
1761 assert(round < 2);
1762
1763 unsigned clamp = ins->outmod;
1764 assert(clamp < 4);
1765
1766 unsigned neg0 = ins->src_neg[0];
1767 assert(neg0 < 2);
1768
1769 unsigned neg1 = ins->src_neg[1];
1770 assert(neg1 < 2);
1771
1772 unsigned abs0 = ins->src_abs[0];
1773 assert(abs0 < 2);
1774
1775 unsigned neg2 = ins->src_neg[2];
1776 assert(neg2 < 2);
1777
1778 unsigned special = 0;
1779
1780 unsigned derived_16 = 0;
1781 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0;
1782 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1;
1783 else unreachable("No pattern match at pos 16");
1784
1785 unsigned derived_12 = 0;
1786 if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0;
1787 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1;
1788 else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2;
1789 else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3;
1790 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4;
1791 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5;
1792 else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6;
1793 else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7;
1794 else unreachable("No pattern match at pos 12");
1795
1796 return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12);
1797 }
1798
1799 static inline unsigned
pan_pack_add_hadd_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1800 pan_pack_add_hadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1801 {
1802 unsigned src0 = bi_get_src(ins, regs, 0);
1803 unsigned src1 = bi_get_src(ins, regs, 1);
1804
1805 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
1806 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
1807 assert(round < 2);
1808
1809 return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12);
1810 }
1811
1812 static inline unsigned
pan_pack_fma_imul_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1813 pan_pack_fma_imul_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1814 {
1815 unsigned src0 = bi_get_src(ins, regs, 0);
1816 assert((1 << src0) & 0xfb);
1817 unsigned src1 = bi_get_src(ins, regs, 1);
1818 assert((1 << src1) & 0xfb);
1819
1820 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
1821 unsigned swz0_temp = 0;
1822 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
1823 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
1824 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
1825 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
1826 else unreachable("Could not pattern match widen");
1827 unsigned swz0 = swz0_temp;
1828 assert(swz0 < 4);
1829
1830 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1831 unsigned swz1_temp = 0;
1832 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
1833 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
1834 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
1835 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
1836 else unreachable("Could not pattern match widen");
1837 unsigned swz1 = swz1_temp;
1838 assert(swz1 < 4);
1839
1840 return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11);
1841 }
1842
1843 static inline unsigned
pan_pack_add_load_i48(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1844 pan_pack_add_load_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1845 {
1846 unsigned src0 = bi_get_src(ins, regs, 0);
1847 unsigned src1 = bi_get_src(ins, regs, 1);
1848
1849 assert(ins->segment);
1850 unsigned seg = ins->segment;
1851 assert(seg < 8);
1852
1853 bi_write_staging_register(clause, ins);
1854 return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6);
1855 }
1856
1857 static inline unsigned
pan_pack_add_hadd_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1858 pan_pack_add_hadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1859 {
1860 unsigned src0 = bi_get_src(ins, regs, 0);
1861 unsigned src1 = bi_get_src(ins, regs, 1);
1862
1863 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
1864 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
1865 assert(round < 2);
1866
1867 return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12);
1868 }
1869
1870 static inline unsigned
pan_pack_add_imov_fma(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1871 pan_pack_add_imov_fma(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1872 {
1873
1874 unsigned threads = 0;
1875
1876 return 0xd7820 | (threads << 3);
1877 }
1878
1879 static inline unsigned
pan_pack_add_icmpi_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1880 pan_pack_add_icmpi_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1881 {
1882 unsigned src0 = bi_get_src(ins, regs, 0);
1883 unsigned src1 = bi_get_src(ins, regs, 1);
1884
1885 unsigned result_type = 1;
1886
1887 unsigned cmpf_table[] = {
1888 ~0, ~0, ~0, 1, 0, ~0, ~0
1889 };
1890 unsigned cmpf = cmpf_table[ins->cond];
1891 assert(cmpf < 2);
1892
1893 return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
1894 }
1895
1896 static inline unsigned
pan_pack_add_store_i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1897 pan_pack_add_store_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1898 {
1899 unsigned src0 = bi_get_src(ins, regs, 1);
1900 unsigned src1 = bi_get_src(ins, regs, 2);
1901
1902 assert(ins->segment);
1903 unsigned seg = ins->segment;
1904 assert(seg < 8);
1905
1906 bi_read_staging_register(clause, ins);
1907 return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6);
1908 }
1909
1910 static inline unsigned
pan_pack_fma_jump_ex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1911 pan_pack_fma_jump_ex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1912 {
1913 unsigned src0 = bi_get_src(ins, regs, 0);
1914 assert((1 << src0) & 0xfb);
1915 unsigned src1 = bi_get_src(ins, regs, 1);
1916 assert((1 << src1) & 0xfb);
1917 unsigned src2 = bi_get_src(ins, regs, 2);
1918
1919 unsigned test_mode = 0;
1920
1921 unsigned stack_mode = 2;
1922
1923 return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10);
1924 }
1925
1926 static inline unsigned
pan_pack_add_iadd_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1927 pan_pack_add_iadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1928 {
1929 unsigned src0 = bi_get_src(ins, regs, 0);
1930 unsigned src1 = bi_get_src(ins, regs, 1);
1931
1932 unsigned saturate = 0;
1933
1934 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
1935 unsigned lanes1_temp = 0;
1936 if (lanes1_sz == 32) lanes1_temp = 0;
1937 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
1938 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
1939 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
1940 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
1941 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
1942 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
1943 else unreachable("Could not pattern match widen");
1944 unsigned lanes1 = lanes1_temp;
1945 assert(lanes1 < 8);
1946
1947 if (lanes1 == 0) {
1948 return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8);
1949 } else if ((lanes1 == 1) || (lanes1 == 2)) {
1950 unsigned derived_9 = 0;
1951 if (lanes1 == 1) derived_9 = 0;
1952 else if (lanes1 == 2) derived_9 = 1;
1953 else unreachable("No pattern match at pos 9");
1954
1955 return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
1956 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
1957 unsigned derived_9 = 0;
1958 if (lanes1 == 3) derived_9 = 0;
1959 else if (lanes1 == 4) derived_9 = 1;
1960 else if (lanes1 == 5) derived_9 = 2;
1961 else if (lanes1 == 6) derived_9 = 3;
1962 else unreachable("No pattern match at pos 9");
1963
1964 return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
1965 } else {
1966 unreachable("No matching state found in add_iadd_s32");
1967 }
1968 }
1969
1970 static inline unsigned
pan_pack_fma_rshift_xor_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)1971 pan_pack_fma_rshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
1972 {
1973 unsigned src0 = bi_get_src(ins, regs, 0);
1974 assert((1 << src0) & 0xfb);
1975 unsigned src1 = bi_get_src(ins, regs, 1);
1976 assert((1 << src1) & 0xfb);
1977 unsigned src2 = bi_get_src(ins, regs, 2);
1978
1979 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
1980 unsigned lanes2_temp = 0;
1981 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
1982 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
1983 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
1984 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
1985 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
1986 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
1987 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
1988 else unreachable("Could not pattern match widen");
1989 unsigned lanes2 = lanes2_temp;
1990 assert(lanes2 < 8);
1991
1992 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
1993 assert(not_result < 2);
1994
1995 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
1996 unsigned derived_9 = 0;
1997 if (lanes2 == 0) derived_9 = 0;
1998 else if (lanes2 == 1) derived_9 = 1;
1999 else if (lanes2 == 2) derived_9 = 2;
2000 else if (lanes2 == 3) derived_9 = 3;
2001 else unreachable("No pattern match at pos 9");
2002
2003 return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
2004 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
2005 unsigned derived_9 = 0;
2006 if (lanes2 == 4) derived_9 = 1;
2007 else if (lanes2 == 5) derived_9 = 2;
2008 else if (lanes2 == 6) derived_9 = 3;
2009 else unreachable("No pattern match at pos 9");
2010
2011 return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
2012 } else {
2013 unreachable("No matching state found in fma_rshift_xor_v2i16");
2014 }
2015 }
2016
2017 static inline unsigned
pan_pack_fma_csel_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2018 pan_pack_fma_csel_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2019 {
2020 unsigned src0 = bi_get_src(ins, regs, 0);
2021 assert((1 << src0) & 0xfb);
2022 unsigned src1 = bi_get_src(ins, regs, 1);
2023 assert((1 << src1) & 0xfb);
2024 unsigned src2 = bi_get_src(ins, regs, 2);
2025 unsigned src3 = bi_get_src(ins, regs, 3);
2026
2027 unsigned cmpf_table[] = {
2028 ~0, 2, 3, 1, 0, ~0, ~0
2029 };
2030 unsigned cmpf = cmpf_table[ins->cond];
2031 assert(cmpf < 4);
2032
2033 if ((cmpf == 2) || (cmpf == 3)) {
2034 { unsigned temp = src0; src0 = src1; src1 = temp; }
2035 if (cmpf == 3) cmpf = 1;
2036 else if (cmpf == 2) cmpf = 0;
2037 }
2038
2039 unsigned derived_12 = 0;
2040 if (cmpf == 0) derived_12 = 0;
2041 else if (cmpf == 1) derived_12 = 1;
2042 else unreachable("No pattern match at pos 12");
2043
2044 return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
2045 }
2046
2047 static inline unsigned
pan_pack_add_shaddxh_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2048 pan_pack_add_shaddxh_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2049 {
2050 unsigned src0 = bi_get_src(ins, regs, 0);
2051 unsigned src1 = bi_get_src(ins, regs, 1);
2052
2053 return 0x3f8c0 | (src0 << 0) | (src1 << 3);
2054 }
2055
2056 static inline unsigned
pan_pack_add_isub_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2057 pan_pack_add_isub_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2058 {
2059 unsigned src0 = bi_get_src(ins, regs, 0);
2060 unsigned src1 = bi_get_src(ins, regs, 1);
2061
2062 unsigned saturate = 0;
2063
2064 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2065 unsigned lanes0_temp = 0;
2066 if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
2067 else unreachable("Could not pattern match widen");
2068 unsigned lanes0 = lanes0_temp;
2069 assert(lanes0 < 8);
2070
2071 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2072 unsigned lanes1_temp = 0;
2073 if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
2074 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
2075 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
2076 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
2077 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
2078 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
2079 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
2080 else unreachable("Could not pattern match widen");
2081 unsigned lanes1 = lanes1_temp;
2082 assert(lanes1 < 8);
2083
2084 if ((lanes0 == 0) && (lanes1 == 0)) {
2085 unsigned derived_7 = 0;
2086 if (saturate == 0) derived_7 = 0;
2087 else if (saturate == 1) derived_7 = 1;
2088 else unreachable("No pattern match at pos 7");
2089
2090 return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
2091 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
2092 unsigned derived_7 = 0;
2093 if (saturate == 0) derived_7 = 0;
2094 else if (saturate == 1) derived_7 = 1;
2095 else unreachable("No pattern match at pos 7");
2096
2097 unsigned derived_9 = 0;
2098 if (lanes1 == 1) derived_9 = 0;
2099 else if (lanes1 == 2) derived_9 = 1;
2100 else if (lanes1 == 3) derived_9 = 2;
2101 else if (lanes1 == 4) derived_9 = 3;
2102 else unreachable("No pattern match at pos 9");
2103
2104 return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
2105 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
2106 unsigned derived_7 = 0;
2107 if (saturate == 0) derived_7 = 0;
2108 else if (saturate == 1) derived_7 = 1;
2109 else unreachable("No pattern match at pos 7");
2110
2111 unsigned derived_9 = 0;
2112 if (lanes1 == 5) derived_9 = 0;
2113 else if (lanes1 == 6) derived_9 = 1;
2114 else unreachable("No pattern match at pos 9");
2115
2116 return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
2117 } else {
2118 unreachable("No matching state found in add_isub_v4u8");
2119 }
2120 }
2121
2122 static inline unsigned
pan_pack_add_frexpm_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2123 pan_pack_add_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2124 {
2125 unsigned src0 = bi_get_src(ins, regs, 0);
2126
2127 unsigned abs0 = ins->src_abs[0];
2128 assert(abs0 < 2);
2129
2130 unsigned sqrt = 0;
2131
2132 unsigned log = 1;
2133
2134 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2135 unsigned widen0_temp = 0;
2136 if (widen0_sz == 32) widen0_temp = 1;
2137 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2138 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2139 else unreachable("Could not pattern match widen");
2140 unsigned widen0 = widen0_temp;
2141 assert(widen0 < 4);
2142
2143 unsigned neg0 = ins->src_neg[0];
2144 assert(neg0 < 2);
2145
2146 if ((log == 0) && (neg0 == 0)) {
2147 return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3);
2148 } else if ((log == 1) && (sqrt == 0)) {
2149 return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7);
2150 } else {
2151 unreachable("No matching state found in add_frexpm_f32");
2152 }
2153 }
2154
2155 static inline unsigned
pan_pack_add_frexpe_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2156 pan_pack_add_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2157 {
2158 unsigned src0 = bi_get_src(ins, regs, 0);
2159
2160 unsigned neg = ins->src_neg[0];
2161 assert(neg < 2);
2162
2163 unsigned sqrt = 0;
2164
2165 unsigned log = 1;
2166
2167 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2168 unsigned swz0_temp = 0;
2169 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2170 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2171 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2172 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2173 else unreachable("Could not pattern match widen");
2174 unsigned swz0 = swz0_temp;
2175 assert(swz0 < 4);
2176
2177 if (log == 0) {
2178 return 0x3dc00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3);
2179 } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
2180 return 0x3de00 | (src0 << 0) | (swz0 << 3);
2181 } else {
2182 unreachable("No matching state found in add_frexpe_v2f16");
2183 }
2184 }
2185
2186 static inline unsigned
pan_pack_add_logb_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2187 pan_pack_add_logb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2188 {
2189 unsigned src0 = bi_get_src(ins, regs, 0);
2190
2191 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2192 unsigned widen0_temp = 0;
2193 if (widen0_sz == 32) widen0_temp = 1;
2194 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2195 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2196 else unreachable("Could not pattern match widen");
2197 unsigned widen0 = widen0_temp;
2198 assert(widen0 < 4);
2199
2200 return 0x3d9a0 | (src0 << 0) | (widen0 << 3);
2201 }
2202
2203 static inline unsigned
pan_pack_fma_rshift_or_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2204 pan_pack_fma_rshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2205 {
2206 unsigned src0 = bi_get_src(ins, regs, 0);
2207 assert((1 << src0) & 0xfb);
2208 unsigned src1 = bi_get_src(ins, regs, 1);
2209 assert((1 << src1) & 0xfb);
2210 unsigned src2 = bi_get_src(ins, regs, 2);
2211
2212 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
2213 unsigned lanes2_temp = 0;
2214 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
2215 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
2216 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
2217 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
2218 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
2219 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
2220 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
2221 else unreachable("Could not pattern match widen");
2222 unsigned lanes2 = lanes2_temp;
2223 assert(lanes2 < 8);
2224
2225 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
2226 assert(not1 < 2);
2227
2228 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
2229 assert(not_result < 2);
2230
2231 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
2232 unsigned derived_9 = 0;
2233 if (lanes2 == 0) derived_9 = 0;
2234 else if (lanes2 == 1) derived_9 = 1;
2235 else if (lanes2 == 2) derived_9 = 2;
2236 else if (lanes2 == 3) derived_9 = 3;
2237 else unreachable("No pattern match at pos 9");
2238
2239 return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
2240 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
2241 unsigned derived_9 = 0;
2242 if (lanes2 == 4) derived_9 = 1;
2243 else if (lanes2 == 5) derived_9 = 2;
2244 else if (lanes2 == 6) derived_9 = 3;
2245 else unreachable("No pattern match at pos 9");
2246
2247 return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
2248 } else {
2249 unreachable("No matching state found in fma_rshift_or_v2i16");
2250 }
2251 }
2252
2253 static inline unsigned
pan_pack_fma_shaddxl_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2254 pan_pack_fma_shaddxl_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2255 {
2256 unsigned src0 = bi_get_src(ins, regs, 0);
2257 assert((1 << src0) & 0xfb);
2258 unsigned src1 = bi_get_src(ins, regs, 1);
2259 assert((1 << src1) & 0xfb);
2260
2261 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2262 unsigned lane1_temp = 0;
2263 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
2264 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
2265 else if (lane1_sz == 32) lane1_temp = 2;
2266 else unreachable("Could not pattern match widen");
2267 unsigned lane1 = lane1_temp;
2268 assert(lane1 < 4);
2269
2270 unsigned shift = 0;
2271 return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6);
2272 }
2273
2274 static inline unsigned
pan_pack_add_fcmp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2275 pan_pack_add_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2276 {
2277 unsigned src0 = bi_get_src(ins, regs, 0);
2278 unsigned src1 = bi_get_src(ins, regs, 1);
2279
2280 unsigned neg0 = ins->src_neg[0];
2281 assert(neg0 < 2);
2282
2283 unsigned neg1 = ins->src_neg[1];
2284 assert(neg1 < 2);
2285
2286 unsigned cmpf_table[] = {
2287 ~0, 4, 5, 2, 1, 0, 3
2288 };
2289 unsigned cmpf = cmpf_table[ins->cond];
2290 assert(cmpf < 8);
2291
2292 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2293 unsigned swz0_temp = 0;
2294 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2295 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2296 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2297 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2298 else unreachable("Could not pattern match widen");
2299 unsigned swz0 = swz0_temp;
2300 assert(swz0 < 4);
2301
2302 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2303 unsigned swz1_temp = 0;
2304 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2305 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2306 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2307 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2308 else unreachable("Could not pattern match widen");
2309 unsigned swz1 = swz1_temp;
2310 assert(swz1 < 4);
2311
2312 unsigned result_type = 2;
2313
2314 if ((neg0 == 0) && (neg1 == 1)) {
2315 { unsigned temp = src0; src0 = src1; src1 = temp; }
2316 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2317 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2318 if (cmpf == 1) cmpf = 4;
2319 else if (cmpf == 5) cmpf = 2;
2320 else if (cmpf == 4) cmpf = 1;
2321 else if (cmpf == 2) cmpf = 5;
2322 }
2323
2324 unsigned derived_13 = 0;
2325 if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0;
2326 else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1;
2327 else unreachable("No pattern match at pos 13");
2328
2329 return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13);
2330 }
2331
2332 static inline unsigned
pan_pack_fma_clz_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2333 pan_pack_fma_clz_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2334 {
2335 unsigned src0 = bi_get_src(ins, regs, 0);
2336 assert((1 << src0) & 0xfb);
2337
2338 unsigned mask = 0;
2339
2340 return 0x701f90 | (src0 << 0) | (mask << 3);
2341 }
2342
2343 static inline unsigned
pan_pack_add_frexpe_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2344 pan_pack_add_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2345 {
2346 unsigned src0 = bi_get_src(ins, regs, 0);
2347
2348 unsigned neg = ins->src_neg[0];
2349 assert(neg < 2);
2350
2351 unsigned sqrt = 0;
2352
2353 unsigned log = 1;
2354
2355 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2356 unsigned widen0_temp = 0;
2357 if (widen0_sz == 32) widen0_temp = 1;
2358 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
2359 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
2360 else unreachable("Could not pattern match widen");
2361 unsigned widen0 = widen0_temp;
2362 assert(widen0 < 4);
2363
2364 if (log == 0) {
2365 return 0x3dc20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3);
2366 } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
2367 return 0x3de20 | (src0 << 0) | (widen0 << 3);
2368 } else {
2369 unreachable("No matching state found in add_frexpe_f32");
2370 }
2371 }
2372
2373 static inline unsigned
pan_pack_fma_quiet_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2374 pan_pack_fma_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2375 {
2376 unsigned src0 = bi_get_src(ins, regs, 0);
2377 assert((1 << src0) & 0xfb);
2378
2379 return 0x701970 | (src0 << 0);
2380 }
2381
2382 static inline unsigned
pan_pack_add_fmin_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2383 pan_pack_add_fmin_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2384 {
2385 unsigned src0 = bi_get_src(ins, regs, 0);
2386 unsigned src1 = bi_get_src(ins, regs, 1);
2387
2388 unsigned abs0 = ins->src_abs[0];
2389 assert(abs0 < 2);
2390
2391 unsigned abs1 = ins->src_abs[1];
2392 assert(abs1 < 2);
2393
2394 unsigned neg0 = ins->src_neg[0];
2395 assert(neg0 < 2);
2396
2397 unsigned neg1 = ins->src_neg[1];
2398 assert(neg1 < 2);
2399
2400 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2401 unsigned swz0_temp = 0;
2402 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2403 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2404 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2405 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2406 else unreachable("Could not pattern match widen");
2407 unsigned swz0 = swz0_temp;
2408 assert(swz0 < 4);
2409
2410 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2411 unsigned swz1_temp = 0;
2412 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2413 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2414 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2415 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2416 else unreachable("Could not pattern match widen");
2417 unsigned swz1 = swz1_temp;
2418 assert(swz1 < 4);
2419
2420 unsigned sem = 0;
2421
2422 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
2423 { unsigned temp = src0; src0 = src1; src1 = temp; }
2424 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
2425 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2426 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2427 if (sem == 3) sem = 2;
2428 else if (sem == 2) sem = 3;
2429 }
2430
2431 unsigned derived_6 = 0;
2432 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
2433 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
2434 else unreachable("No pattern match at pos 6");
2435
2436 return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6);
2437 }
2438
2439 static inline unsigned
pan_pack_add_var_tex_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2440 pan_pack_add_var_tex_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2441 {
2442
2443 unsigned update = 0;
2444
2445 unsigned skip = ins->skip;
2446 assert(skip < 2);
2447
2448 unsigned lod_mode = 1 - ins->texture.compute_lod;
2449 assert(lod_mode < 2);
2450
2451 unsigned sample = ins->load_vary.interp_mode;
2452 assert(sample < 2);
2453
2454 unsigned varying_index = bi_get_immediate(ins, 0);
2455 unsigned texture_index = ins->texture.texture_index;
2456 bi_write_staging_register(clause, ins);
2457 unsigned derived_5 = 0;
2458 if ((sample == 0) && (update == 0)) derived_5 = 0;
2459 else if ((sample == 1) && (update == 1)) derived_5 = 1;
2460 else unreachable("No pattern match at pos 5");
2461
2462 return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5);
2463 }
2464
2465 static inline unsigned
pan_pack_add_branch_lowbits_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2466 pan_pack_add_branch_lowbits_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2467 {
2468 unsigned src0 = bi_get_src(ins, regs, 0);
2469 unsigned src1 = bi_get_src(ins, regs, 1);
2470 assert((1 << src1) & 0xf7);
2471
2472 return 0x6fa38 | (src0 << 0) | (src1 << 6);
2473 }
2474
2475 static inline unsigned
pan_pack_fma_lshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2476 pan_pack_fma_lshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2477 {
2478 unsigned src0 = bi_get_src(ins, regs, 0);
2479 assert((1 << src0) & 0xfb);
2480 unsigned src1 = bi_get_src(ins, regs, 1);
2481 assert((1 << src1) & 0xfb);
2482 unsigned src2 = bi_get_src(ins, regs, 2);
2483
2484 unsigned bytes2 = 0;
2485
2486 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
2487 unsigned lane2_temp = 0;
2488 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
2489 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
2490 else unreachable("Could not pattern match widen");
2491 unsigned lane2 = lane2_temp;
2492 assert(lane2 < 2);
2493
2494 unsigned result_word = 0;
2495
2496 return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
2497 }
2498
2499 static inline unsigned
pan_pack_fma_idp_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2500 pan_pack_fma_idp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2501 {
2502 unsigned src0 = bi_get_src(ins, regs, 0);
2503 assert((1 << src0) & 0xfb);
2504 unsigned src1 = bi_get_src(ins, regs, 1);
2505 assert((1 << src1) & 0xfb);
2506
2507 ASSERTED bool sign0_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
2508 bool sign0_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
2509 assert(sign0_small);
2510 unsigned sign0 = sign0_signed ? 1 : 0;
2511 assert(sign0 < 2);
2512
2513 ASSERTED bool sign1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16;
2514 bool sign1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int;
2515 assert(sign1_small);
2516 unsigned sign1 = sign1_signed ? 1 : 0;
2517 assert(sign1 < 2);
2518
2519 return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10);
2520 }
2521
2522 static inline unsigned
pan_pack_add_icmp_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2523 pan_pack_add_icmp_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2524 {
2525 unsigned src0 = bi_get_src(ins, regs, 0);
2526 unsigned src1 = bi_get_src(ins, regs, 1);
2527
2528 unsigned result_type = 1;
2529
2530 unsigned cmpf_table[] = {
2531 ~0, 2, 3, 1, 0, ~0, ~0
2532 };
2533 unsigned cmpf = cmpf_table[ins->cond];
2534 assert(cmpf < 4);
2535
2536 if ((cmpf == 2) || (cmpf == 3)) {
2537 { unsigned temp = src0; src0 = src1; src1 = temp; }
2538 if (cmpf == 3) cmpf = 1;
2539 else if (cmpf == 2) cmpf = 0;
2540 }
2541
2542 unsigned derived_6 = 0;
2543 if (cmpf == 0) derived_6 = 0;
2544 else if (cmpf == 1) derived_6 = 1;
2545 else unreachable("No pattern match at pos 6");
2546
2547 return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
2548 }
2549
2550 static inline unsigned
pan_pack_add_acmpstore_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2551 pan_pack_add_acmpstore_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2552 {
2553 unsigned src0 = bi_get_src(ins, regs, 1);
2554 unsigned src1 = bi_get_src(ins, regs, 2);
2555
2556 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
2557 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
2558 assert(seg < 2);
2559
2560 bi_read_staging_register(clause, ins);
2561 return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9);
2562 }
2563
2564 static inline unsigned
pan_pack_add_hadd_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2565 pan_pack_add_hadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2566 {
2567 unsigned src0 = bi_get_src(ins, regs, 0);
2568 unsigned src1 = bi_get_src(ins, regs, 1);
2569
2570 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
2571 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
2572 assert(round < 2);
2573
2574 return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12);
2575 }
2576
2577 static inline unsigned
pan_pack_add_cubeface2(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2578 pan_pack_add_cubeface2(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2579 {
2580 unsigned src0 = bi_get_src(ins, regs, 0);
2581
2582 return 0x3de58 | (src0 << 0);
2583 }
2584
2585 static inline unsigned
pan_pack_fma_s16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2586 pan_pack_fma_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2587 {
2588 unsigned src0 = bi_get_src(ins, regs, 0);
2589 assert((1 << src0) & 0xfb);
2590
2591 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2592 unsigned lane0_temp = 0;
2593 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2594 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2595 else unreachable("Could not pattern match widen");
2596 unsigned lane0 = lane0_temp;
2597 assert(lane0 < 2);
2598
2599 return 0x700cc0 | (src0 << 0) | (lane0 << 4);
2600 }
2601
2602 static inline unsigned
pan_pack_fma_fround_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2603 pan_pack_fma_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2604 {
2605 unsigned src0 = bi_get_src(ins, regs, 0);
2606 assert((1 << src0) & 0xfb);
2607
2608 unsigned abs0 = ins->src_abs[0];
2609 assert(abs0 < 2);
2610
2611 unsigned neg0 = ins->src_neg[0];
2612 assert(neg0 < 2);
2613
2614 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2615 unsigned swz0_temp = 0;
2616 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2617 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2618 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2619 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2620 else unreachable("Could not pattern match widen");
2621 unsigned swz0 = swz0_temp;
2622 assert(swz0 < 4);
2623
2624 unsigned round = ins->roundmode;
2625 assert(round < 8);
2626
2627 if (round != 4) {
2628 unsigned derived_9 = 0;
2629 if (round == 0) derived_9 = 0;
2630 else if (round == 1) derived_9 = 1;
2631 else if (round == 2) derived_9 = 2;
2632 else if (round == 3) derived_9 = 3;
2633 else unreachable("No pattern match at pos 9");
2634
2635 return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9);
2636 } else if (round == 4) {
2637 return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3);
2638 } else {
2639 unreachable("No matching state found in fma_fround_v2f16");
2640 }
2641 }
2642
2643 static inline unsigned
pan_pack_add_v2u8_to_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2644 pan_pack_add_v2u8_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2645 {
2646 unsigned src0 = bi_get_src(ins, regs, 0);
2647
2648 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2649 unsigned swz0_temp = 0;
2650 if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2651 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2652 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
2653 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
2654 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
2655 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
2656 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
2657 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
2658 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
2659 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
2660 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
2661 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
2662 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
2663 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
2664 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
2665 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
2666 else unreachable("Could not pattern match widen");
2667 unsigned swz0 = swz0_temp;
2668 assert(swz0 < 16);
2669
2670 return 0x3c708 | (src0 << 0) | (swz0 << 4);
2671 }
2672
2673 static inline unsigned
pan_pack_add_fmax_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2674 pan_pack_add_fmax_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2675 {
2676 unsigned src0 = bi_get_src(ins, regs, 0);
2677 unsigned src1 = bi_get_src(ins, regs, 1);
2678
2679 unsigned abs0 = ins->src_abs[0];
2680 assert(abs0 < 2);
2681
2682 unsigned abs1 = ins->src_abs[1];
2683 assert(abs1 < 2);
2684
2685 unsigned neg0 = ins->src_neg[0];
2686 assert(neg0 < 2);
2687
2688 unsigned neg1 = ins->src_neg[1];
2689 assert(neg1 < 2);
2690
2691 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2692 unsigned swz0_temp = 0;
2693 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
2694 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
2695 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
2696 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
2697 else unreachable("Could not pattern match widen");
2698 unsigned swz0 = swz0_temp;
2699 assert(swz0 < 4);
2700
2701 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2702 unsigned swz1_temp = 0;
2703 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
2704 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
2705 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
2706 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
2707 else unreachable("Could not pattern match widen");
2708 unsigned swz1 = swz1_temp;
2709 assert(swz1 < 4);
2710
2711 unsigned sem = 0;
2712
2713 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
2714 { unsigned temp = src0; src0 = src1; src1 = temp; }
2715 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
2716 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
2717 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
2718 if (sem == 3) sem = 2;
2719 else if (sem == 2) sem = 3;
2720 }
2721
2722 unsigned derived_6 = 0;
2723 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
2724 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
2725 else unreachable("No pattern match at pos 6");
2726
2727 return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6);
2728 }
2729
2730 static inline unsigned
pan_pack_fma_f16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2731 pan_pack_fma_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2732 {
2733 unsigned src0 = bi_get_src(ins, regs, 0);
2734 assert((1 << src0) & 0xfb);
2735
2736 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2737 unsigned lane0_temp = 0;
2738 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2739 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2740 else unreachable("Could not pattern match widen");
2741 unsigned lane0 = lane0_temp;
2742 assert(lane0 < 2);
2743
2744 return 0x700d10 | (src0 << 0) | (lane0 << 3);
2745 }
2746
2747 static inline unsigned
pan_pack_add_branchc_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2748 pan_pack_add_branchc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2749 {
2750 unsigned src0 = bi_get_src(ins, regs, 0);
2751 unsigned src1 = bi_get_src(ins, regs, 1);
2752 assert((1 << src1) & 0xf7);
2753
2754 unsigned combine = 0;
2755
2756 return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10);
2757 }
2758
2759 static inline unsigned
pan_pack_fma_dtsel_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2760 pan_pack_fma_dtsel_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2761 {
2762 unsigned src0 = bi_get_src(ins, regs, 0);
2763 assert((1 << src0) & 0xfb);
2764
2765 unsigned table = 63;
2766 return 0x70f200 | (src0 << 0) | (table << 3);
2767 }
2768
2769 static inline unsigned
pan_pack_add_iadd_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2770 pan_pack_add_iadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2771 {
2772 unsigned src0 = bi_get_src(ins, regs, 0);
2773 unsigned src1 = bi_get_src(ins, regs, 1);
2774
2775 unsigned saturate = 0;
2776
2777 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2778 unsigned lanes0_temp = 0;
2779 if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
2780 else unreachable("Could not pattern match widen");
2781 unsigned lanes0 = lanes0_temp;
2782 assert(lanes0 < 8);
2783
2784 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
2785 unsigned lanes1_temp = 0;
2786 if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
2787 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
2788 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
2789 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
2790 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
2791 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
2792 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
2793 else unreachable("Could not pattern match widen");
2794 unsigned lanes1 = lanes1_temp;
2795 assert(lanes1 < 8);
2796
2797 if ((lanes0 == 0) && (lanes1 == 0)) {
2798 return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8);
2799 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
2800 unsigned derived_9 = 0;
2801 if (lanes1 == 1) derived_9 = 0;
2802 else if (lanes1 == 2) derived_9 = 1;
2803 else if (lanes1 == 3) derived_9 = 2;
2804 else if (lanes1 == 4) derived_9 = 3;
2805 else unreachable("No pattern match at pos 9");
2806
2807 return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
2808 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
2809 unsigned derived_9 = 0;
2810 if (lanes1 == 5) derived_9 = 0;
2811 else if (lanes1 == 6) derived_9 = 1;
2812 else unreachable("No pattern match at pos 9");
2813
2814 return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
2815 } else {
2816 unreachable("No matching state found in add_iadd_v4s8");
2817 }
2818 }
2819
2820 static inline unsigned
pan_pack_add_texs_2d_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2821 pan_pack_add_texs_2d_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2822 {
2823 unsigned src0 = bi_get_src(ins, regs, 0);
2824 unsigned src1 = bi_get_src(ins, regs, 1);
2825
2826 unsigned skip = ins->skip;
2827 assert(skip < 2);
2828
2829 unsigned lod_mode = 1 - ins->texture.compute_lod;
2830 assert(lod_mode < 2);
2831
2832 unsigned texture_index = ins->texture.texture_index;
2833 unsigned sampler_index = ins->texture.sampler_index;
2834 bi_write_staging_register(clause, ins);
2835 return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10);
2836 }
2837
2838 static inline unsigned
pan_pack_add_vn_asst2_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2839 pan_pack_add_vn_asst2_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2840 {
2841 unsigned src0 = bi_get_src(ins, regs, 0);
2842
2843 unsigned scale = 0;
2844
2845 unsigned neg0 = ins->src_neg[0];
2846 assert(neg0 < 2);
2847
2848 if (scale == 0) {
2849 return 0x3df80 | (src0 << 0) | (neg0 << 3);
2850 } else if (scale == 1) {
2851 return 0x3de80 | (src0 << 0) | (neg0 << 4);
2852 } else {
2853 unreachable("No matching state found in add_vn_asst2_f32");
2854 }
2855 }
2856
2857 static inline unsigned
pan_pack_add_texc(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2858 pan_pack_add_texc(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2859 {
2860 unsigned src0 = bi_get_src(ins, regs, 1);
2861 unsigned src1 = bi_get_src(ins, regs, 2);
2862 unsigned src2 = bi_get_src(ins, regs, 3);
2863 assert((1 << src2) & 0xf7);
2864
2865 unsigned skip = ins->skip;
2866 assert(skip < 2);
2867
2868 bi_read_staging_register(clause, ins);
2869 assert(ins->src[0] == ins->dest);
2870 return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9);
2871 }
2872
2873 static inline unsigned
pan_pack_add_lea_attr_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2874 pan_pack_add_lea_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2875 {
2876 unsigned src0 = bi_get_src(ins, regs, 1);
2877 unsigned src1 = bi_get_src(ins, regs, 2);
2878
2879 unsigned register_format_temp = 0;
2880 if (ins->format == nir_type_float16) register_format_temp = 0;
2881 else if (ins->format == nir_type_float32) register_format_temp = 1;
2882 else if (ins->format == nir_type_int32) register_format_temp = 2;
2883 else if (ins->format == nir_type_uint32) register_format_temp = 3;
2884 else if (ins->format == nir_type_int16) register_format_temp = 4;
2885 else if (ins->format == nir_type_uint16) register_format_temp = 5;
2886 else if (ins->format == nir_type_float64) register_format_temp = 6;
2887 else if (ins->format == nir_type_int64) register_format_temp = 7;
2888 else unreachable("Could not pattern match register format");
2889 unsigned register_format = register_format_temp;
2890 assert(register_format < 16);
2891
2892 unsigned attribute_index = bi_get_immediate(ins, 0);
2893 bi_write_staging_register(clause, ins);
2894 if (register_format != 8) {
2895 unsigned derived_11 = 0;
2896 if (register_format == 0) derived_11 = 0;
2897 else if (register_format == 1) derived_11 = 1;
2898 else if (register_format == 2) derived_11 = 2;
2899 else if (register_format == 3) derived_11 = 3;
2900 else if (register_format == 4) derived_11 = 4;
2901 else if (register_format == 5) derived_11 = 5;
2902 else if (register_format == 6) derived_11 = 6;
2903 else if (register_format == 7) derived_11 = 7;
2904 else unreachable("No pattern match at pos 11");
2905
2906 return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11);
2907 } else if (register_format == 8) {
2908 return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6);
2909 } else {
2910 unreachable("No matching state found in add_lea_attr_imm");
2911 }
2912 }
2913
2914 static inline unsigned
pan_pack_add_f16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2915 pan_pack_add_f16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2916 {
2917 unsigned src0 = bi_get_src(ins, regs, 0);
2918
2919 unsigned round = ins->roundmode;
2920 assert(round < 8);
2921
2922 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2923 unsigned lane0_temp = 0;
2924 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2925 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2926 else unreachable("Could not pattern match widen");
2927 unsigned lane0 = lane0_temp;
2928 assert(lane0 < 2);
2929
2930 if (round != 4) {
2931 unsigned derived_4 = 0;
2932 if (round == 0) derived_4 = 0;
2933 else if (round == 1) derived_4 = 1;
2934 else if (round == 2) derived_4 = 2;
2935 else if (round == 3) derived_4 = 3;
2936 else unreachable("No pattern match at pos 4");
2937
2938 return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4);
2939 } else if (round == 4) {
2940 return 0x3cc40 | (src0 << 0) | (lane0 << 5);
2941 } else {
2942 unreachable("No matching state found in add_f16_to_s32");
2943 }
2944 }
2945
2946 static inline unsigned
pan_pack_add_st_cvt(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2947 pan_pack_add_st_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2948 {
2949 unsigned src0 = bi_get_src(ins, regs, 1);
2950 unsigned src1 = bi_get_src(ins, regs, 2);
2951 unsigned src2 = bi_get_src(ins, regs, 3);
2952 assert((1 << src2) & 0xf7);
2953
2954 unsigned vecsize = ins->vector_channels - 1;
2955 assert(vecsize < 4);
2956
2957 bi_read_staging_register(clause, ins);
2958 return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
2959 }
2960
2961 static inline unsigned
pan_pack_add_load_i24(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2962 pan_pack_add_load_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2963 {
2964 unsigned src0 = bi_get_src(ins, regs, 0);
2965 unsigned src1 = bi_get_src(ins, regs, 1);
2966
2967 assert(ins->segment);
2968 unsigned seg = ins->segment;
2969 assert(seg < 8);
2970
2971 bi_write_staging_register(clause, ins);
2972 return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6);
2973 }
2974
2975 static inline unsigned
pan_pack_fma_s8_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2976 pan_pack_fma_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2977 {
2978 unsigned src0 = bi_get_src(ins, regs, 0);
2979 assert((1 << src0) & 0xfb);
2980
2981 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
2982 unsigned lane0_temp = 0;
2983 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
2984 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
2985 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
2986 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
2987 else unreachable("Could not pattern match widen");
2988 unsigned lane0 = lane0_temp;
2989 assert(lane0 < 4);
2990
2991 return 0x700b40 | (src0 << 0) | (lane0 << 4);
2992 }
2993
2994 static inline unsigned
pan_pack_fma_cubeface1(bi_clause * clause,bi_instruction * ins,bi_registers * regs)2995 pan_pack_fma_cubeface1(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
2996 {
2997 unsigned src0 = bi_get_src(ins, regs, 0);
2998 assert((1 << src0) & 0xfb);
2999 unsigned src1 = bi_get_src(ins, regs, 1);
3000 assert((1 << src1) & 0xfb);
3001 unsigned src2 = bi_get_src(ins, regs, 2);
3002
3003 unsigned neg0 = ins->src_neg[0];
3004 assert(neg0 < 2);
3005
3006 unsigned neg1 = ins->src_neg[1];
3007 assert(neg1 < 2);
3008
3009 unsigned neg2 = ins->src_neg[2];
3010 assert(neg2 < 2);
3011
3012 unsigned derived_9 = 0;
3013 if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0;
3014 else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1;
3015 else unreachable("No pattern match at pos 9");
3016
3017 return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3018 }
3019
3020 static inline unsigned
pan_pack_add_branch_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3021 pan_pack_add_branch_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3022 {
3023 unsigned src0 = bi_get_src(ins, regs, 0);
3024 unsigned src1 = bi_get_src(ins, regs, 1);
3025 unsigned src2 = bi_get_src(ins, regs, 2);
3026 assert((1 << src2) & 0xf7);
3027
3028 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3029 unsigned widen0_temp = 0;
3030 if (widen0_sz == 32) widen0_temp = 0;
3031 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3032 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3033 else unreachable("Could not pattern match widen");
3034 unsigned widen0 = widen0_temp;
3035 assert(widen0 < 4);
3036
3037 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3038 unsigned widen1_temp = 0;
3039 if (widen1_sz == 32) widen1_temp = 0;
3040 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
3041 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
3042 else unreachable("Could not pattern match widen");
3043 unsigned widen1 = widen1_temp;
3044 assert(widen1 < 4);
3045
3046 unsigned cmpf_table[] = {
3047 ~0, 2, 3, 1, 0, ~0, ~0
3048 };
3049 unsigned cmpf = cmpf_table[ins->cond];
3050 assert(cmpf < 4);
3051
3052 if (src0 > src1) {
3053 { unsigned temp = src0; src0 = src1; src1 = temp; }
3054 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
3055 if (cmpf == 0) cmpf = 2;
3056 else if (cmpf == 3) cmpf = 1;
3057 else if (cmpf == 2) cmpf = 0;
3058 else if (cmpf == 1) cmpf = 3;
3059 }
3060
3061 unsigned derived_12 = 0;
3062 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
3063 else unreachable("No pattern match at pos 12");
3064
3065 unsigned derived_9 = 0;
3066 if ((src0 < src1) && (cmpf == 2)) derived_9 = 0;
3067 else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1;
3068 else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2;
3069 else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3;
3070 else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4;
3071 else unreachable("No pattern match at pos 9");
3072
3073 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
3074 }
3075
3076 static inline unsigned
pan_pack_add_iabs_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3077 pan_pack_add_iabs_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3078 {
3079 unsigned src0 = bi_get_src(ins, regs, 0);
3080
3081 return 0x3dea0 | (src0 << 0);
3082 }
3083
3084 static inline unsigned
pan_pack_add_iadd_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3085 pan_pack_add_iadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3086 {
3087 unsigned src0 = bi_get_src(ins, regs, 0);
3088 unsigned src1 = bi_get_src(ins, regs, 1);
3089
3090 unsigned saturate = 0;
3091
3092 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3093 unsigned lanes0_temp = 0;
3094 if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
3095 else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
3096 else unreachable("Could not pattern match widen");
3097 unsigned lanes0 = lanes0_temp;
3098 assert(lanes0 < 2);
3099
3100 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3101 unsigned lanes1_temp = 0;
3102 if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
3103 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
3104 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
3105 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
3106 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
3107 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
3108 else unreachable("Could not pattern match widen");
3109 unsigned lanes1 = lanes1_temp;
3110 assert(lanes1 < 8);
3111
3112 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
3113 unsigned derived_7 = 0;
3114 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3115 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3116 else unreachable("No pattern match at pos 7");
3117
3118 unsigned derived_9 = 0;
3119 if (lanes1 == 0) derived_9 = 0;
3120 else if (lanes1 == 1) derived_9 = 1;
3121 else unreachable("No pattern match at pos 9");
3122
3123 unsigned derived_10 = 0;
3124 if (lanes0 == 0) derived_10 = 0;
3125 else if (lanes0 == 1) derived_10 = 1;
3126 else unreachable("No pattern match at pos 10");
3127
3128 return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10);
3129 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
3130 unsigned derived_7 = 0;
3131 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3132 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3133 else unreachable("No pattern match at pos 7");
3134
3135 unsigned derived_9 = 0;
3136 if (lanes1 == 2) derived_9 = 0;
3137 else if (lanes1 == 3) derived_9 = 1;
3138 else unreachable("No pattern match at pos 9");
3139
3140 return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
3141 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
3142 unsigned derived_7 = 0;
3143 if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0;
3144 else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1;
3145 else unreachable("No pattern match at pos 7");
3146
3147 unsigned derived_9 = 0;
3148 if (lanes1 == 4) derived_9 = 0;
3149 else if (lanes1 == 5) derived_9 = 1;
3150 else unreachable("No pattern match at pos 9");
3151
3152 return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
3153 } else {
3154 unreachable("No matching state found in add_iadd_v2u16");
3155 }
3156 }
3157
3158 static inline unsigned
pan_pack_add_icmp_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3159 pan_pack_add_icmp_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3160 {
3161 unsigned src0 = bi_get_src(ins, regs, 0);
3162 unsigned src1 = bi_get_src(ins, regs, 1);
3163
3164 unsigned result_type = 1;
3165
3166 unsigned cmpf_table[] = {
3167 ~0, 2, 3, 1, 0, ~0, ~0
3168 };
3169 unsigned cmpf = cmpf_table[ins->cond];
3170 assert(cmpf < 4);
3171
3172 if ((cmpf == 2) || (cmpf == 3)) {
3173 { unsigned temp = src0; src0 = src1; src1 = temp; }
3174 if (cmpf == 3) cmpf = 1;
3175 else if (cmpf == 2) cmpf = 0;
3176 }
3177
3178 unsigned derived_6 = 0;
3179 if (cmpf == 0) derived_6 = 0;
3180 else if (cmpf == 1) derived_6 = 1;
3181 else unreachable("No pattern match at pos 6");
3182
3183 return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
3184 }
3185
3186 static inline unsigned
pan_pack_add_fsin_table_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3187 pan_pack_add_fsin_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3188 {
3189 unsigned src0 = bi_get_src(ins, regs, 0);
3190 assert((1 << src0) & 0xf7);
3191
3192 unsigned offset = 0;
3193
3194 return 0x67a80 | (src0 << 0) | (offset << 4);
3195 }
3196
3197 static inline unsigned
pan_pack_add_cube_ssel(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3198 pan_pack_add_cube_ssel(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3199 {
3200 unsigned src0 = bi_get_src(ins, regs, 0);
3201 unsigned src1 = bi_get_src(ins, regs, 1);
3202 unsigned src2 = bi_get_src(ins, regs, 2);
3203
3204 unsigned neg0 = ins->src_neg[0];
3205 assert(neg0 < 2);
3206
3207 unsigned neg1 = ins->src_neg[1];
3208 assert(neg1 < 2);
3209
3210 unsigned derived_9 = 0;
3211 if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0;
3212 else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1;
3213 else unreachable("No pattern match at pos 9");
3214
3215 return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3216 }
3217
3218 static inline unsigned
pan_pack_add_fatan_table_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3219 pan_pack_add_fatan_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3220 {
3221 unsigned src0 = bi_get_src(ins, regs, 0);
3222 assert((1 << src0) & 0xf7);
3223 unsigned src1 = bi_get_src(ins, regs, 1);
3224 assert((1 << src1) & 0xf7);
3225
3226 return 0x67a40 | (src0 << 0) | (src1 << 3);
3227 }
3228
3229 static inline unsigned
pan_pack_add_isub_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3230 pan_pack_add_isub_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3231 {
3232 unsigned src0 = bi_get_src(ins, regs, 0);
3233 unsigned src1 = bi_get_src(ins, regs, 1);
3234
3235 unsigned saturate = 0;
3236
3237 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3238 unsigned lanes0_temp = 0;
3239 if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
3240 else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
3241 else unreachable("Could not pattern match widen");
3242 unsigned lanes0 = lanes0_temp;
3243 assert(lanes0 < 2);
3244
3245 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3246 unsigned lanes1_temp = 0;
3247 if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
3248 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
3249 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
3250 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
3251 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
3252 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
3253 else unreachable("Could not pattern match widen");
3254 unsigned lanes1 = lanes1_temp;
3255 assert(lanes1 < 8);
3256
3257 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
3258 unsigned derived_9 = 0;
3259 if (lanes1 == 0) derived_9 = 0;
3260 else if (lanes1 == 1) derived_9 = 1;
3261 else unreachable("No pattern match at pos 9");
3262
3263 unsigned derived_10 = 0;
3264 if (lanes0 == 0) derived_10 = 0;
3265 else if (lanes0 == 1) derived_10 = 1;
3266 else unreachable("No pattern match at pos 10");
3267
3268 return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10);
3269 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
3270 unsigned derived_9 = 0;
3271 if (lanes1 == 2) derived_9 = 0;
3272 else if (lanes1 == 3) derived_9 = 1;
3273 else unreachable("No pattern match at pos 9");
3274
3275 return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3276 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
3277 unsigned derived_9 = 0;
3278 if (lanes1 == 4) derived_9 = 0;
3279 else if (lanes1 == 5) derived_9 = 1;
3280 else unreachable("No pattern match at pos 9");
3281
3282 return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3283 } else {
3284 unreachable("No matching state found in add_isub_v2s16");
3285 }
3286 }
3287
3288 static inline unsigned
pan_pack_fma_atom_c1_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3289 pan_pack_fma_atom_c1_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3290 {
3291 unsigned src0 = bi_get_src(ins, regs, 0);
3292 assert((1 << src0) & 0xf3);
3293 unsigned src1 = bi_get_src(ins, regs, 1);
3294 assert((1 << src1) & 0xf3);
3295
3296 unsigned atom_opc = 2;
3297
3298 return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
3299 }
3300
3301 static inline unsigned
pan_pack_add_isub_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3302 pan_pack_add_isub_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3303 {
3304 unsigned src0 = bi_get_src(ins, regs, 0);
3305 unsigned src1 = bi_get_src(ins, regs, 1);
3306
3307 unsigned saturate = 0;
3308
3309 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3310 unsigned lanes1_temp = 0;
3311 if (lanes1_sz == 32) lanes1_temp = 0;
3312 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
3313 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
3314 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
3315 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
3316 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
3317 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
3318 else unreachable("Could not pattern match widen");
3319 unsigned lanes1 = lanes1_temp;
3320 assert(lanes1 < 8);
3321
3322 if (lanes1 == 0) {
3323 return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8);
3324 } else if ((lanes1 == 1) || (lanes1 == 2)) {
3325 unsigned derived_9 = 0;
3326 if (lanes1 == 1) derived_9 = 0;
3327 else if (lanes1 == 2) derived_9 = 1;
3328 else unreachable("No pattern match at pos 9");
3329
3330 return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3331 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
3332 unsigned derived_9 = 0;
3333 if (lanes1 == 3) derived_9 = 0;
3334 else if (lanes1 == 4) derived_9 = 1;
3335 else if (lanes1 == 5) derived_9 = 2;
3336 else if (lanes1 == 6) derived_9 = 3;
3337 else unreachable("No pattern match at pos 9");
3338
3339 return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
3340 } else {
3341 unreachable("No matching state found in add_isub_s32");
3342 }
3343 }
3344
3345 static inline unsigned
pan_pack_add_ld_attr_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3346 pan_pack_add_ld_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3347 {
3348 unsigned src0 = bi_get_src(ins, regs, 1);
3349 unsigned src1 = bi_get_src(ins, regs, 2);
3350
3351 unsigned register_format_temp = 0;
3352 if (ins->format == nir_type_float16) register_format_temp = 0;
3353 else if (ins->format == nir_type_float32) register_format_temp = 1;
3354 else if (ins->format == nir_type_int32) register_format_temp = 2;
3355 else if (ins->format == nir_type_uint32) register_format_temp = 3;
3356 else if (ins->format == nir_type_int16) register_format_temp = 4;
3357 else if (ins->format == nir_type_uint16) register_format_temp = 5;
3358 else if (ins->format == nir_type_float64) register_format_temp = 6;
3359 else if (ins->format == nir_type_int64) register_format_temp = 7;
3360 else unreachable("Could not pattern match register format");
3361 unsigned register_format = register_format_temp;
3362 assert(register_format < 16);
3363
3364 unsigned vecsize = ins->vector_channels - 1;
3365 assert(vecsize < 4);
3366
3367 unsigned attribute_index = bi_get_immediate(ins, 0);
3368 bi_write_staging_register(clause, ins);
3369 if (register_format != 8) {
3370 unsigned derived_13 = 0;
3371 if (register_format == 0) derived_13 = 0;
3372 else if (register_format == 1) derived_13 = 1;
3373 else if (register_format == 2) derived_13 = 2;
3374 else if (register_format == 3) derived_13 = 3;
3375 else if (register_format == 4) derived_13 = 4;
3376 else if (register_format == 5) derived_13 = 5;
3377 else if (register_format == 6) derived_13 = 6;
3378 else if (register_format == 7) derived_13 = 7;
3379 else unreachable("No pattern match at pos 13");
3380
3381 return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13);
3382 } else if (register_format == 8) {
3383 return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6);
3384 } else {
3385 unreachable("No matching state found in add_ld_attr_imm");
3386 }
3387 }
3388
3389 static inline unsigned
pan_pack_fma_rshift_xor_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3390 pan_pack_fma_rshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3391 {
3392 unsigned src0 = bi_get_src(ins, regs, 0);
3393 assert((1 << src0) & 0xfb);
3394 unsigned src1 = bi_get_src(ins, regs, 1);
3395 assert((1 << src1) & 0xfb);
3396 unsigned src2 = bi_get_src(ins, regs, 2);
3397
3398 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
3399 unsigned lanes2_temp = 0;
3400 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
3401 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
3402 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
3403 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
3404 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
3405 else unreachable("Could not pattern match widen");
3406 unsigned lanes2 = lanes2_temp;
3407 assert(lanes2 < 8);
3408
3409 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
3410 assert(not_result < 2);
3411
3412 if (lanes2 != 0) {
3413 unsigned derived_9 = 0;
3414 if (lanes2 == 1) derived_9 = 0;
3415 else if (lanes2 == 2) derived_9 = 1;
3416 else if (lanes2 == 3) derived_9 = 2;
3417 else if (lanes2 == 4) derived_9 = 3;
3418 else unreachable("No pattern match at pos 9");
3419
3420 return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
3421 } else if (lanes2 == 0) {
3422 return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13);
3423 } else {
3424 unreachable("No matching state found in fma_rshift_xor_v4i8");
3425 }
3426 }
3427
3428 static inline unsigned
pan_pack_add_icmpm_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3429 pan_pack_add_icmpm_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3430 {
3431 unsigned src0 = bi_get_src(ins, regs, 0);
3432 unsigned src1 = bi_get_src(ins, regs, 1);
3433 unsigned src2 = bi_get_src(ins, regs, 2);
3434
3435 return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
3436 }
3437
3438 static inline unsigned
pan_pack_add_icmp_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3439 pan_pack_add_icmp_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3440 {
3441 unsigned src0 = bi_get_src(ins, regs, 0);
3442 unsigned src1 = bi_get_src(ins, regs, 1);
3443
3444 unsigned result_type = 1;
3445
3446 unsigned cmpf_table[] = {
3447 ~0, ~0, ~0, ~0, ~0, 0, 1
3448 };
3449 unsigned cmpf = cmpf_table[ins->cond];
3450 assert(cmpf < 2);
3451
3452 return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
3453 }
3454
3455 static inline unsigned
pan_pack_add_branch_s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3456 pan_pack_add_branch_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3457 {
3458 unsigned src0 = bi_get_src(ins, regs, 0);
3459 unsigned src1 = bi_get_src(ins, regs, 1);
3460 unsigned src2 = bi_get_src(ins, regs, 2);
3461 assert((1 << src2) & 0xf7);
3462
3463 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3464 unsigned widen0_temp = 0;
3465 if (widen0_sz == 32) widen0_temp = 0;
3466 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3467 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3468 else unreachable("Could not pattern match widen");
3469 unsigned widen0 = widen0_temp;
3470 assert(widen0 < 4);
3471
3472 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3473 unsigned widen1_temp = 0;
3474 if (widen1_sz == 32) widen1_temp = 0;
3475 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
3476 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
3477 else unreachable("Could not pattern match widen");
3478 unsigned widen1 = widen1_temp;
3479 assert(widen1 < 4);
3480
3481 unsigned cmpf_table[] = {
3482 ~0, 2, 3, 1, 0, ~0, ~0
3483 };
3484 unsigned cmpf = cmpf_table[ins->cond];
3485 assert(cmpf < 4);
3486
3487 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) {
3488 { unsigned temp = src0; src0 = src1; src1 = temp; }
3489 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
3490 if (cmpf == 0) cmpf = 2;
3491 else if (cmpf == 3) cmpf = 1;
3492 else if (cmpf == 2) cmpf = 0;
3493 else if (cmpf == 1) cmpf = 3;
3494 }
3495
3496 unsigned derived_12 = 0;
3497 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
3498 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
3499 else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4;
3500 else unreachable("No pattern match at pos 12");
3501
3502 unsigned derived_9 = 0;
3503 if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0;
3504 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1;
3505 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2;
3506 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3;
3507 else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4;
3508 else unreachable("No pattern match at pos 9");
3509
3510 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
3511 }
3512
3513 static inline unsigned
pan_pack_add_load_i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3514 pan_pack_add_load_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3515 {
3516 unsigned src0 = bi_get_src(ins, regs, 0);
3517 unsigned src1 = bi_get_src(ins, regs, 1);
3518
3519 assert(ins->segment);
3520 unsigned seg = ins->segment;
3521 assert(seg < 8);
3522
3523 unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3524 unsigned lane_temp = 0;
3525 if (lane_sz == 8 && ins->swizzle[0][0] == 0) lane_temp = 0;
3526 else if (lane_sz == 8 && ins->swizzle[0][0] == 1) lane_temp = 1;
3527 else if (lane_sz == 8 && ins->swizzle[0][0] == 2) lane_temp = 2;
3528 else if (lane_sz == 8 && ins->swizzle[0][0] == 3) lane_temp = 3;
3529 else if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 4;
3530 else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 5;
3531 else if (lane_sz == 32) lane_temp = 6;
3532 else if (lane_sz == 64) lane_temp = 7;
3533 else unreachable("Could not pattern match widen");
3534 unsigned lane = lane_temp;
3535 assert(lane < 8);
3536
3537 ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
3538 bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
3539 unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
3540 assert(extend < 4);
3541
3542 bi_write_staging_register(clause, ins);
3543 if ((extend == 0) && ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))) {
3544 unsigned derived_9 = 0;
3545 if (lane == 0) derived_9 = 0;
3546 else if (lane == 1) derived_9 = 1;
3547 else if (lane == 2) derived_9 = 2;
3548 else if (lane == 3) derived_9 = 3;
3549 else unreachable("No pattern match at pos 9");
3550
3551 return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3552 } else if ((extend != 0) && ((lane == 4) || (lane == 5))) {
3553 unsigned derived_9 = 0;
3554 if (extend == 1) derived_9 = 0;
3555 else if (extend == 2) derived_9 = 1;
3556 else unreachable("No pattern match at pos 9");
3557
3558 unsigned derived_10 = 0;
3559 if (lane == 4) derived_10 = 0;
3560 else if (lane == 5) derived_10 = 1;
3561 else unreachable("No pattern match at pos 10");
3562
3563 return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10);
3564 } else if ((extend != 0) && (lane == 6)) {
3565 unsigned derived_9 = 0;
3566 if (extend == 1) derived_9 = 0;
3567 else if (extend == 2) derived_9 = 1;
3568 else unreachable("No pattern match at pos 9");
3569
3570 return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3571 } else if ((extend != 0) && (lane == 7)) {
3572 unsigned derived_9 = 0;
3573 if (extend == 1) derived_9 = 0;
3574 else if (extend == 2) derived_9 = 1;
3575 else unreachable("No pattern match at pos 9");
3576
3577 return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
3578 } else {
3579 unreachable("No matching state found in add_load_i8");
3580 }
3581 }
3582
3583 static inline unsigned
pan_pack_fma_csel_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3584 pan_pack_fma_csel_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3585 {
3586 unsigned src0 = bi_get_src(ins, regs, 0);
3587 assert((1 << src0) & 0xfb);
3588 unsigned src1 = bi_get_src(ins, regs, 1);
3589 assert((1 << src1) & 0xfb);
3590 unsigned src2 = bi_get_src(ins, regs, 2);
3591 unsigned src3 = bi_get_src(ins, regs, 3);
3592
3593 unsigned cmpf_table[] = {
3594 ~0, 4, 5, 2, 1, 0, 3
3595 };
3596 unsigned cmpf = cmpf_table[ins->cond];
3597 assert(cmpf < 8);
3598
3599 if ((cmpf == 4) || (cmpf == 5)) {
3600 { unsigned temp = src0; src0 = src1; src1 = temp; }
3601 if (cmpf == 5) cmpf = 2;
3602 else if (cmpf == 4) cmpf = 1;
3603 }
3604
3605 if (cmpf == 3) {
3606 { unsigned temp = src2; src2 = src3; src3 = temp; }
3607 if (cmpf == 3) cmpf = 0;
3608 }
3609
3610 unsigned derived_12 = 0;
3611 if (cmpf == 0) derived_12 = 0;
3612 else if (cmpf == 1) derived_12 = 1;
3613 else if (cmpf == 2) derived_12 = 2;
3614 else unreachable("No pattern match at pos 12");
3615
3616 return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
3617 }
3618
3619 static inline unsigned
pan_pack_add_frsq_approx_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3620 pan_pack_add_frsq_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3621 {
3622 unsigned src0 = bi_get_src(ins, regs, 0);
3623 assert((1 << src0) & 0xf7);
3624
3625 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3626 unsigned widen0_temp = 0;
3627 if (widen0_sz == 32) widen0_temp = 0;
3628 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
3629 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
3630 else unreachable("Could not pattern match widen");
3631 unsigned widen0 = widen0_temp;
3632 assert(widen0 < 4);
3633
3634 unsigned neg = ins->src_neg[0];
3635 assert(neg < 2);
3636
3637 unsigned abs0 = ins->src_abs[0];
3638 assert(abs0 < 2);
3639
3640 unsigned divzero = 0;
3641
3642 if (widen0 == 0) {
3643 return 0x67100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
3644 } else if (widen0 != 0) {
3645 unsigned derived_7 = 0;
3646 if (widen0 == 1) derived_7 = 0;
3647 else if (widen0 == 2) derived_7 = 1;
3648 else unreachable("No pattern match at pos 7");
3649
3650 return 0x67140 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
3651 } else {
3652 unreachable("No matching state found in add_frsq_approx_f32");
3653 }
3654 }
3655
3656 static inline unsigned
pan_pack_add_iabs_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3657 pan_pack_add_iabs_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3658 {
3659 unsigned src0 = bi_get_src(ins, regs, 0);
3660
3661 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3662 unsigned swz0_temp = 0;
3663 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
3664 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
3665 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
3666 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
3667 else unreachable("Could not pattern match widen");
3668 unsigned swz0 = swz0_temp;
3669 assert(swz0 < 4);
3670
3671 return 0x3de88 | (src0 << 0) | (swz0 << 4);
3672 }
3673
3674 static inline unsigned
pan_pack_add_ld_attr_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3675 pan_pack_add_ld_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3676 {
3677 unsigned src0 = bi_get_src(ins, regs, 0);
3678 unsigned src1 = bi_get_src(ins, regs, 1);
3679 unsigned src2 = bi_get_src(ins, regs, 2);
3680
3681 unsigned register_format_temp = 0;
3682 if (ins->format == nir_type_float16) register_format_temp = 0;
3683 else if (ins->format == nir_type_float32) register_format_temp = 1;
3684 else if (ins->format == nir_type_int32) register_format_temp = 2;
3685 else if (ins->format == nir_type_uint32) register_format_temp = 3;
3686 else if (ins->format == nir_type_int16) register_format_temp = 4;
3687 else if (ins->format == nir_type_uint16) register_format_temp = 5;
3688 else if (ins->format == nir_type_float64) register_format_temp = 6;
3689 else if (ins->format == nir_type_int64) register_format_temp = 7;
3690 else unreachable("Could not pattern match register format");
3691 unsigned register_format = register_format_temp;
3692 assert(register_format < 16);
3693
3694 unsigned vecsize = ins->vector_channels - 1;
3695 assert(vecsize < 4);
3696
3697 bi_write_staging_register(clause, ins);
3698 if (register_format != 8) {
3699 unsigned derived_13 = 0;
3700 if (register_format == 0) derived_13 = 0;
3701 else if (register_format == 1) derived_13 = 1;
3702 else if (register_format == 2) derived_13 = 2;
3703 else if (register_format == 3) derived_13 = 3;
3704 else if (register_format == 4) derived_13 = 4;
3705 else if (register_format == 5) derived_13 = 5;
3706 else if (register_format == 6) derived_13 = 6;
3707 else if (register_format == 7) derived_13 = 7;
3708 else unreachable("No pattern match at pos 13");
3709
3710 return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13);
3711 } else if (register_format == 8) {
3712 return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11);
3713 } else {
3714 unreachable("No matching state found in add_ld_attr_tex");
3715 }
3716 }
3717
3718 static inline unsigned
pan_pack_fma_imuld(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3719 pan_pack_fma_imuld(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3720 {
3721 unsigned src0 = bi_get_src(ins, regs, 0);
3722 assert((1 << src0) & 0x33);
3723 unsigned src1 = bi_get_src(ins, regs, 1);
3724 assert((1 << src1) & 0x33);
3725
3726 unsigned threads = 0;
3727
3728 return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6);
3729 }
3730
3731 static inline unsigned
pan_pack_add_ld_var_special(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3732 pan_pack_add_ld_var_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3733 {
3734 unsigned src0 = bi_get_src(ins, regs, 1);
3735
3736 unsigned varying_name = ins->constant.u64 & 0x3;
3737 assert(varying_name < 32);
3738
3739 unsigned vecsize = ins->vector_channels - 1;
3740 assert(vecsize < 4);
3741
3742 unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
3743 assert(update < 4);
3744
3745 unsigned register_format_temp = 0;
3746 if (ins->format == nir_type_float32) register_format_temp = 0;
3747 else if (ins->format == nir_type_float16) register_format_temp = 1;
3748 else unreachable("Could not pattern match register format");
3749 unsigned register_format = register_format_temp;
3750 assert(register_format < 4);
3751
3752 unsigned sample = ins->load_vary.interp_mode;
3753 assert(sample < 8);
3754
3755 bi_write_staging_register(clause, ins);
3756 if (register_format != 2) {
3757 unsigned derived_3 = 0;
3758 if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0;
3759 else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2;
3760 else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3;
3761 else unreachable("No pattern match at pos 3");
3762
3763 unsigned derived_19 = 0;
3764 if (register_format == 0) derived_19 = 0;
3765 else if (register_format == 1) derived_19 = 1;
3766 else unreachable("No pattern match at pos 19");
3767
3768 unsigned derived_10 = 0;
3769 if ((sample == 0) && (update == 0)) derived_10 = 0;
3770 else if ((sample == 1) && (update == 0)) derived_10 = 1;
3771 else if ((sample == 2) && (update == 0)) derived_10 = 2;
3772 else if ((sample == 3) && (update == 0)) derived_10 = 3;
3773 else if ((sample == 4) && (update == 1)) derived_10 = 4;
3774 else if ((sample == 0) && (update == 2)) derived_10 = 8;
3775 else if ((sample == 1) && (update == 2)) derived_10 = 9;
3776 else if ((sample == 0) && (update == 3)) derived_10 = 10;
3777 else if ((sample == 1) && (update == 3)) derived_10 = 11;
3778 else if ((sample == 2) && (update == 3)) derived_10 = 12;
3779 else if ((sample == 3) && (update == 3)) derived_10 = 13;
3780 else unreachable("No pattern match at pos 10");
3781
3782 return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10);
3783 } else if (register_format == 2) {
3784 unsigned derived_3 = 0;
3785 if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0;
3786 else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2;
3787 else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3;
3788 else unreachable("No pattern match at pos 3");
3789
3790 unsigned derived_10 = 0;
3791 if ((sample == 0) && (update == 0)) derived_10 = 0;
3792 else if ((sample == 1) && (update == 0)) derived_10 = 1;
3793 else if ((sample == 2) && (update == 0)) derived_10 = 2;
3794 else if ((sample == 3) && (update == 0)) derived_10 = 3;
3795 else if ((sample == 4) && (update == 1)) derived_10 = 4;
3796 else if ((sample == 0) && (update == 2)) derived_10 = 8;
3797 else if ((sample == 1) && (update == 2)) derived_10 = 9;
3798 else if ((sample == 0) && (update == 3)) derived_10 = 10;
3799 else if ((sample == 1) && (update == 3)) derived_10 = 11;
3800 else if ((sample == 2) && (update == 3)) derived_10 = 12;
3801 else if ((sample == 3) && (update == 3)) derived_10 = 13;
3802 else unreachable("No pattern match at pos 10");
3803
3804 return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10);
3805 } else {
3806 unreachable("No matching state found in add_ld_var_special");
3807 }
3808 }
3809
3810 static inline unsigned
pan_pack_add_fcos_table_u6(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3811 pan_pack_add_fcos_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3812 {
3813 unsigned src0 = bi_get_src(ins, regs, 0);
3814 assert((1 << src0) & 0xf7);
3815
3816 unsigned offset = 0;
3817
3818 return 0x67a88 | (src0 << 0) | (offset << 4);
3819 }
3820
3821 static inline unsigned
pan_pack_add_ld_cvt(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3822 pan_pack_add_ld_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3823 {
3824 unsigned src0 = bi_get_src(ins, regs, 0);
3825 unsigned src1 = bi_get_src(ins, regs, 1);
3826 unsigned src2 = bi_get_src(ins, regs, 2);
3827 assert((1 << src2) & 0xf7);
3828
3829 unsigned vecsize = ins->vector_channels - 1;
3830 assert(vecsize < 4);
3831
3832 bi_write_staging_register(clause, ins);
3833 return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
3834 }
3835
3836 static inline unsigned
pan_pack_fma_arshift_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3837 pan_pack_fma_arshift_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3838 {
3839 unsigned src0 = bi_get_src(ins, regs, 0);
3840 assert((1 << src0) & 0xfb);
3841 unsigned src1 = bi_get_src(ins, regs, 1);
3842 assert((1 << src1) & 0x8);
3843 unsigned src2 = bi_get_src(ins, regs, 2);
3844
3845 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
3846 unsigned lanes2_temp = 0;
3847 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
3848 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
3849 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
3850 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
3851 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
3852 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
3853 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
3854 else unreachable("Could not pattern match widen");
3855 unsigned lanes2 = lanes2_temp;
3856 assert(lanes2 < 8);
3857
3858 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
3859 unsigned derived_9 = 0;
3860 if (lanes2 == 0) derived_9 = 0;
3861 else if (lanes2 == 1) derived_9 = 1;
3862 else if (lanes2 == 2) derived_9 = 2;
3863 else if (lanes2 == 3) derived_9 = 3;
3864 else unreachable("No pattern match at pos 9");
3865
3866 return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3867 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
3868 unsigned derived_9 = 0;
3869 if (lanes2 == 4) derived_9 = 1;
3870 else if (lanes2 == 5) derived_9 = 2;
3871 else if (lanes2 == 6) derived_9 = 3;
3872 else unreachable("No pattern match at pos 9");
3873
3874 return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
3875 } else {
3876 unreachable("No matching state found in fma_arshift_v2i16");
3877 }
3878 }
3879
3880 static inline unsigned
pan_pack_fma_csel_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3881 pan_pack_fma_csel_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3882 {
3883 unsigned src0 = bi_get_src(ins, regs, 0);
3884 assert((1 << src0) & 0xfb);
3885 unsigned src1 = bi_get_src(ins, regs, 1);
3886 assert((1 << src1) & 0xfb);
3887 unsigned src2 = bi_get_src(ins, regs, 2);
3888 unsigned src3 = bi_get_src(ins, regs, 3);
3889
3890 unsigned cmpf_table[] = {
3891 ~0, ~0, ~0, ~0, ~0, 0, 1
3892 };
3893 unsigned cmpf = cmpf_table[ins->cond];
3894 assert(cmpf < 2);
3895
3896 if (cmpf == 1) {
3897 { unsigned temp = src2; src2 = src3; src3 = temp; }
3898 if (cmpf == 1) cmpf = 0;
3899 }
3900
3901 unsigned derived_12 = 0;
3902 if (cmpf == 0) derived_12 = 3;
3903 else unreachable("No pattern match at pos 12");
3904
3905 return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
3906 }
3907
3908 static inline unsigned
pan_pack_add_ld_tile(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3909 pan_pack_add_ld_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3910 {
3911 unsigned src0 = bi_get_src(ins, regs, 0);
3912 unsigned src1 = bi_get_src(ins, regs, 1);
3913 unsigned src2 = bi_get_src(ins, regs, 2);
3914 assert((1 << src2) & 0xf7);
3915
3916 unsigned vecsize = ins->vector_channels - 1;
3917 assert(vecsize < 4);
3918
3919 bi_write_staging_register(clause, ins);
3920 return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9);
3921 }
3922
3923 static inline unsigned
pan_pack_add_icmp_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3924 pan_pack_add_icmp_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3925 {
3926 unsigned src0 = bi_get_src(ins, regs, 0);
3927 unsigned src1 = bi_get_src(ins, regs, 1);
3928
3929 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3930 unsigned swz0_temp = 0;
3931 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
3932 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
3933 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
3934 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
3935 else unreachable("Could not pattern match widen");
3936 unsigned swz0 = swz0_temp;
3937 assert(swz0 < 4);
3938
3939 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
3940 unsigned swz1_temp = 0;
3941 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
3942 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
3943 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
3944 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
3945 else unreachable("Could not pattern match widen");
3946 unsigned swz1 = swz1_temp;
3947 assert(swz1 < 4);
3948
3949 unsigned result_type = 1;
3950
3951 unsigned cmpf_table[] = {
3952 ~0, 2, 3, 1, 0, ~0, ~0
3953 };
3954 unsigned cmpf = cmpf_table[ins->cond];
3955 assert(cmpf < 4);
3956
3957 if ((cmpf == 2) || (cmpf == 3)) {
3958 { unsigned temp = src0; src0 = src1; src1 = temp; }
3959 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
3960 if (cmpf == 3) cmpf = 1;
3961 else if (cmpf == 2) cmpf = 0;
3962 }
3963
3964 unsigned derived_12 = 0;
3965 if (cmpf == 0) derived_12 = 0;
3966 else if (cmpf == 1) derived_12 = 1;
3967 else unreachable("No pattern match at pos 12");
3968
3969 return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12);
3970 }
3971
3972 static inline unsigned
pan_pack_add_load_i128(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3973 pan_pack_add_load_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3974 {
3975 unsigned src0 = bi_get_src(ins, regs, 0);
3976 unsigned src1 = bi_get_src(ins, regs, 1);
3977
3978 assert(ins->segment);
3979 unsigned seg = ins->segment;
3980 assert(seg < 8);
3981
3982 bi_write_staging_register(clause, ins);
3983 return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6);
3984 }
3985
3986 static inline unsigned
pan_pack_add_ilogb_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)3987 pan_pack_add_ilogb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
3988 {
3989 unsigned src0 = bi_get_src(ins, regs, 0);
3990
3991 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
3992 unsigned widen0_temp = 0;
3993 if (widen0_sz == 32) widen0_temp = 1;
3994 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
3995 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
3996 else unreachable("Could not pattern match widen");
3997 unsigned widen0 = widen0_temp;
3998 assert(widen0 < 4);
3999
4000 return 0x3d9e0 | (src0 << 0) | (widen0 << 3);
4001 }
4002
4003 static inline unsigned
pan_pack_add_frcp_approx_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4004 pan_pack_add_frcp_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4005 {
4006 unsigned src0 = bi_get_src(ins, regs, 0);
4007 assert((1 << src0) & 0xf7);
4008
4009 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4010 unsigned widen0_temp = 0;
4011 if (widen0_sz == 32) widen0_temp = 0;
4012 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4013 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4014 else unreachable("Could not pattern match widen");
4015 unsigned widen0 = widen0_temp;
4016 assert(widen0 < 4);
4017
4018 unsigned neg = ins->src_neg[0];
4019 assert(neg < 2);
4020
4021 unsigned abs0 = ins->src_abs[0];
4022 assert(abs0 < 2);
4023
4024 unsigned divzero = 0;
4025
4026 if (widen0 == 0) {
4027 return 0x67000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
4028 } else if (widen0 != 0) {
4029 unsigned derived_7 = 0;
4030 if (widen0 == 1) derived_7 = 0;
4031 else if (widen0 == 2) derived_7 = 1;
4032 else unreachable("No pattern match at pos 7");
4033
4034 return 0x67040 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
4035 } else {
4036 unreachable("No matching state found in add_frcp_approx_f32");
4037 }
4038 }
4039
4040 static inline unsigned
pan_pack_add_frcp_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4041 pan_pack_add_frcp_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4042 {
4043 unsigned src0 = bi_get_src(ins, regs, 0);
4044 assert((1 << src0) & 0xf7);
4045
4046 unsigned neg = ins->src_neg[0];
4047 assert(neg < 2);
4048
4049 unsigned abs0 = ins->src_abs[0];
4050 assert(abs0 < 2);
4051
4052 unsigned divzero = 0;
4053
4054 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4055 unsigned lane0_temp = 0;
4056 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4057 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4058 else unreachable("Could not pattern match widen");
4059 unsigned lane0 = lane0_temp;
4060 assert(lane0 < 2);
4061
4062 return 0x67080 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8);
4063 }
4064
4065 static inline unsigned
pan_pack_add_discard_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4066 pan_pack_add_discard_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4067 {
4068 unsigned src0 = bi_get_src(ins, regs, 0);
4069 unsigned src1 = bi_get_src(ins, regs, 1);
4070
4071 unsigned cmpf_table[] = {
4072 ~0, 4, 5, 2, 1, 0, 3
4073 };
4074 unsigned cmpf = cmpf_table[ins->cond];
4075 assert(cmpf < 8);
4076
4077 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4078 unsigned widen0_temp = 0;
4079 if (widen0_sz == 32) widen0_temp = 0;
4080 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4081 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4082 else unreachable("Could not pattern match widen");
4083 unsigned widen0 = widen0_temp;
4084 assert(widen0 < 4);
4085
4086 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4087 unsigned widen1_temp = 0;
4088 if (widen1_sz == 32) widen1_temp = 0;
4089 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4090 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4091 else unreachable("Could not pattern match widen");
4092 unsigned widen1 = widen1_temp;
4093 assert(widen1 < 4);
4094
4095 if ((cmpf == 1) || (cmpf == 2)) {
4096 { unsigned temp = src0; src0 = src1; src1 = temp; }
4097 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4098 if (cmpf == 1) cmpf = 4;
4099 else if (cmpf == 2) cmpf = 5;
4100 }
4101
4102 unsigned derived_6 = 0;
4103 if (cmpf == 0) derived_6 = 0;
4104 else if (cmpf == 3) derived_6 = 1;
4105 else if (cmpf == 4) derived_6 = 2;
4106 else if (cmpf == 5) derived_6 = 3;
4107 else unreachable("No pattern match at pos 6");
4108
4109 unsigned derived_8 = 0;
4110 if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0;
4111 else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1;
4112 else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2;
4113 else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3;
4114 else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4;
4115 else unreachable("No pattern match at pos 8");
4116
4117 return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8);
4118 }
4119
4120 static inline unsigned
pan_pack_fma_iaddc_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4121 pan_pack_fma_iaddc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4122 {
4123 unsigned src0 = bi_get_src(ins, regs, 0);
4124 assert((1 << src0) & 0xfb);
4125 unsigned src1 = bi_get_src(ins, regs, 1);
4126 assert((1 << src1) & 0xfb);
4127 unsigned src2 = bi_get_src(ins, regs, 2);
4128
4129 return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
4130 }
4131
4132 static inline unsigned
pan_pack_add_f16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4133 pan_pack_add_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4134 {
4135 unsigned src0 = bi_get_src(ins, regs, 0);
4136
4137 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4138 unsigned lane0_temp = 0;
4139 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4140 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4141 else unreachable("Could not pattern match widen");
4142 unsigned lane0 = lane0_temp;
4143 assert(lane0 < 2);
4144
4145 return 0x3cd10 | (src0 << 0) | (lane0 << 3);
4146 }
4147
4148 static inline unsigned
pan_pack_add_fexp_table_u4(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4149 pan_pack_add_fexp_table_u4(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4150 {
4151 unsigned src0 = bi_get_src(ins, regs, 0);
4152 assert((1 << src0) & 0xf7);
4153
4154 unsigned adj = 0;
4155
4156 return 0x67ac0 | (src0 << 0) | (adj << 3);
4157 }
4158
4159 static inline unsigned
pan_pack_add_branch_no_diverg(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4160 pan_pack_add_branch_no_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4161 {
4162 unsigned src0 = bi_get_src(ins, regs, 0);
4163 assert((1 << src0) & 0xf7);
4164
4165 return 0x6fa34 | (src0 << 6);
4166 }
4167
4168 static inline unsigned
pan_pack_add_acmpxchg_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4169 pan_pack_add_acmpxchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4170 {
4171 unsigned src0 = bi_get_src(ins, regs, 1);
4172 unsigned src1 = bi_get_src(ins, regs, 2);
4173
4174 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
4175 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
4176 assert(seg < 2);
4177
4178 bi_read_staging_register(clause, ins);
4179 assert(ins->src[0] == ins->dest);
4180 return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
4181 }
4182
4183 static inline unsigned
pan_pack_add_icmp_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4184 pan_pack_add_icmp_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4185 {
4186 unsigned src0 = bi_get_src(ins, regs, 0);
4187 unsigned src1 = bi_get_src(ins, regs, 1);
4188
4189 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4190 unsigned swz0_temp = 0;
4191 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4192 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4193 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4194 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4195 else unreachable("Could not pattern match widen");
4196 unsigned swz0 = swz0_temp;
4197 assert(swz0 < 4);
4198
4199 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4200 unsigned swz1_temp = 0;
4201 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
4202 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
4203 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
4204 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
4205 else unreachable("Could not pattern match widen");
4206 unsigned swz1 = swz1_temp;
4207 assert(swz1 < 4);
4208
4209 unsigned result_type = 1;
4210
4211 unsigned cmpf_table[] = {
4212 ~0, ~0, ~0, ~0, ~0, 0, 1
4213 };
4214 unsigned cmpf = cmpf_table[ins->cond];
4215 assert(cmpf < 2);
4216
4217 return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11);
4218 }
4219
4220 static inline unsigned
pan_pack_fma_fadd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4221 pan_pack_fma_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4222 {
4223 unsigned src0 = bi_get_src(ins, regs, 0);
4224 assert((1 << src0) & 0xfb);
4225 unsigned src1 = bi_get_src(ins, regs, 1);
4226 assert((1 << src1) & 0xfb);
4227
4228 unsigned abs1 = ins->src_abs[1];
4229 assert(abs1 < 2);
4230
4231 unsigned neg0 = ins->src_neg[0];
4232 assert(neg0 < 2);
4233
4234 unsigned neg1 = ins->src_neg[1];
4235 assert(neg1 < 2);
4236
4237 unsigned abs0 = ins->src_abs[0];
4238 assert(abs0 < 2);
4239
4240 unsigned round = ins->roundmode;
4241 assert(round < 4);
4242
4243 unsigned clamp = ins->outmod;
4244 assert(clamp < 4);
4245
4246 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4247 unsigned widen0_temp = 0;
4248 if (widen0_sz == 32) widen0_temp = 0;
4249 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4250 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4251 else unreachable("Could not pattern match widen");
4252 unsigned widen0 = widen0_temp;
4253 assert(widen0 < 4);
4254
4255 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4256 unsigned widen1_temp = 0;
4257 if (widen1_sz == 32) widen1_temp = 0;
4258 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4259 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4260 else unreachable("Could not pattern match widen");
4261 unsigned widen1 = widen1_temp;
4262 assert(widen1 < 4);
4263
4264 if ((widen0 == 2) && (widen1 == 1)) {
4265 { unsigned temp = src0; src0 = src1; src1 = temp; }
4266 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
4267 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
4268 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4269 }
4270
4271 unsigned derived_9 = 0;
4272 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
4273 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
4274 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
4275 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
4276 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
4277 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
4278 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
4279 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
4280 else unreachable("No pattern match at pos 9");
4281
4282 return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9);
4283 }
4284
4285 static inline unsigned
pan_pack_add_icmp_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4286 pan_pack_add_icmp_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4287 {
4288 unsigned src0 = bi_get_src(ins, regs, 0);
4289 unsigned src1 = bi_get_src(ins, regs, 1);
4290
4291 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4292 unsigned swz0_temp = 0;
4293 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4294 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4295 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4296 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4297 else unreachable("Could not pattern match widen");
4298 unsigned swz0 = swz0_temp;
4299 assert(swz0 < 4);
4300
4301 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4302 unsigned swz1_temp = 0;
4303 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
4304 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
4305 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
4306 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
4307 else unreachable("Could not pattern match widen");
4308 unsigned swz1 = swz1_temp;
4309 assert(swz1 < 4);
4310
4311 unsigned result_type = 1;
4312
4313 unsigned cmpf_table[] = {
4314 ~0, 2, 3, 1, 0, ~0, ~0
4315 };
4316 unsigned cmpf = cmpf_table[ins->cond];
4317 assert(cmpf < 4);
4318
4319 if ((cmpf == 2) || (cmpf == 3)) {
4320 { unsigned temp = src0; src0 = src1; src1 = temp; }
4321 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
4322 if (cmpf == 3) cmpf = 1;
4323 else if (cmpf == 2) cmpf = 0;
4324 }
4325
4326 unsigned derived_12 = 0;
4327 if (cmpf == 0) derived_12 = 0;
4328 else if (cmpf == 1) derived_12 = 1;
4329 else unreachable("No pattern match at pos 12");
4330
4331 return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12);
4332 }
4333
4334 static inline unsigned
pan_pack_fma_csel_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4335 pan_pack_fma_csel_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4336 {
4337 unsigned src0 = bi_get_src(ins, regs, 0);
4338 assert((1 << src0) & 0xfb);
4339 unsigned src1 = bi_get_src(ins, regs, 1);
4340 assert((1 << src1) & 0xfb);
4341 unsigned src2 = bi_get_src(ins, regs, 2);
4342 unsigned src3 = bi_get_src(ins, regs, 3);
4343
4344 unsigned cmpf_table[] = {
4345 ~0, 2, 3, 1, 0, ~0, ~0
4346 };
4347 unsigned cmpf = cmpf_table[ins->cond];
4348 assert(cmpf < 4);
4349
4350 if ((cmpf == 2) || (cmpf == 3)) {
4351 { unsigned temp = src0; src0 = src1; src1 = temp; }
4352 if (cmpf == 3) cmpf = 1;
4353 else if (cmpf == 2) cmpf = 0;
4354 }
4355
4356 unsigned derived_12 = 0;
4357 if (cmpf == 0) derived_12 = 0;
4358 else if (cmpf == 1) derived_12 = 1;
4359 else unreachable("No pattern match at pos 12");
4360
4361 return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
4362 }
4363
4364 static inline unsigned
pan_pack_add_branch_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4365 pan_pack_add_branch_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4366 {
4367 unsigned src0 = bi_get_src(ins, regs, 0);
4368 unsigned src1 = bi_get_src(ins, regs, 1);
4369 unsigned src2 = bi_get_src(ins, regs, 2);
4370 assert((1 << src2) & 0xf7);
4371
4372 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4373 unsigned widen0_temp = 0;
4374 if (widen0_sz == 32) widen0_temp = 0;
4375 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4376 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4377 else unreachable("Could not pattern match widen");
4378 unsigned widen0 = widen0_temp;
4379 assert(widen0 < 4);
4380
4381 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4382 unsigned widen1_temp = 0;
4383 if (widen1_sz == 32) widen1_temp = 0;
4384 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4385 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4386 else unreachable("Could not pattern match widen");
4387 unsigned widen1 = widen1_temp;
4388 assert(widen1 < 4);
4389
4390 unsigned cmpf_table[] = {
4391 ~0, 4, 5, 2, 1, 0, 3
4392 };
4393 unsigned cmpf = cmpf_table[ins->cond];
4394 assert(cmpf < 8);
4395
4396 if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) {
4397 { unsigned temp = src0; src0 = src1; src1 = temp; }
4398 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
4399 if (cmpf == 1) cmpf = 4;
4400 else if (cmpf == 5) cmpf = 2;
4401 else if (cmpf == 4) cmpf = 1;
4402 else if (cmpf == 2) cmpf = 5;
4403 }
4404
4405 unsigned derived_12 = 0;
4406 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
4407 else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5;
4408 else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6;
4409 else unreachable("No pattern match at pos 12");
4410
4411 unsigned derived_9 = 0;
4412 if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1;
4413 else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2;
4414 else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3;
4415 else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4;
4416 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5;
4417 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6;
4418 else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7;
4419 else unreachable("No pattern match at pos 9");
4420
4421 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
4422 }
4423
4424 static inline unsigned
pan_pack_add_icmp_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4425 pan_pack_add_icmp_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4426 {
4427 unsigned src0 = bi_get_src(ins, regs, 0);
4428 unsigned src1 = bi_get_src(ins, regs, 1);
4429
4430 unsigned result_type = 1;
4431
4432 unsigned cmpf_table[] = {
4433 ~0, 2, 3, 1, 0, ~0, ~0
4434 };
4435 unsigned cmpf = cmpf_table[ins->cond];
4436 assert(cmpf < 4);
4437
4438 if ((cmpf == 2) || (cmpf == 3)) {
4439 { unsigned temp = src0; src0 = src1; src1 = temp; }
4440 if (cmpf == 3) cmpf = 1;
4441 else if (cmpf == 2) cmpf = 0;
4442 }
4443
4444 unsigned derived_6 = 0;
4445 if (cmpf == 0) derived_6 = 0;
4446 else if (cmpf == 1) derived_6 = 1;
4447 else unreachable("No pattern match at pos 6");
4448
4449 return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
4450 }
4451
4452 static inline unsigned
pan_pack_add_texs_cube_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4453 pan_pack_add_texs_cube_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4454 {
4455 unsigned src0 = bi_get_src(ins, regs, 0);
4456 unsigned src1 = bi_get_src(ins, regs, 1);
4457 unsigned src2 = bi_get_src(ins, regs, 2);
4458
4459 unsigned skip = ins->skip;
4460 assert(skip < 2);
4461
4462 unsigned sampler_index = ins->texture.sampler_index;
4463 unsigned texture_index = ins->texture.texture_index;
4464 bi_write_staging_register(clause, ins);
4465 return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12);
4466 }
4467
4468 static inline unsigned
pan_pack_fma_rshift_or_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4469 pan_pack_fma_rshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4470 {
4471 unsigned src0 = bi_get_src(ins, regs, 0);
4472 assert((1 << src0) & 0xfb);
4473 unsigned src1 = bi_get_src(ins, regs, 1);
4474 assert((1 << src1) & 0xfb);
4475 unsigned src2 = bi_get_src(ins, regs, 2);
4476
4477 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
4478 unsigned lane2_temp = 0;
4479 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
4480 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
4481 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
4482 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
4483 else unreachable("Could not pattern match widen");
4484 unsigned lane2 = lane2_temp;
4485 assert(lane2 < 4);
4486
4487 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
4488 assert(not1 < 2);
4489
4490 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
4491 assert(not_result < 2);
4492
4493 return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
4494 }
4495
4496 static inline unsigned
pan_pack_add_ld_var(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4497 pan_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4498 {
4499 unsigned src0 = bi_get_src(ins, regs, 0);
4500 unsigned src1 = bi_get_src(ins, regs, 1);
4501
4502 unsigned vecsize = ins->vector_channels - 1;
4503 assert(vecsize < 4);
4504
4505 unsigned update = (ins->constant.u64 >= 20) ? 3 : 0;
4506 assert(update < 4);
4507
4508 unsigned register_format_temp = 0;
4509 if (ins->format == nir_type_float32) register_format_temp = 0;
4510 else if (ins->format == nir_type_float16) register_format_temp = 1;
4511 else unreachable("Could not pattern match register format");
4512 unsigned register_format = register_format_temp;
4513 assert(register_format < 4);
4514
4515 unsigned sample = ins->load_vary.interp_mode;
4516 assert(sample < 8);
4517
4518 bi_write_staging_register(clause, ins);
4519 if (register_format != 2) {
4520 unsigned derived_19 = 0;
4521 if (register_format == 0) derived_19 = 0;
4522 else if (register_format == 1) derived_19 = 1;
4523 else unreachable("No pattern match at pos 19");
4524
4525 unsigned derived_10 = 0;
4526 if ((sample == 0) && (update == 0)) derived_10 = 0;
4527 else if ((sample == 1) && (update == 0)) derived_10 = 1;
4528 else if ((sample == 2) && (update == 0)) derived_10 = 2;
4529 else if ((sample == 3) && (update == 0)) derived_10 = 3;
4530 else if ((sample == 4) && (update == 1)) derived_10 = 4;
4531 else if ((sample == 0) && (update == 2)) derived_10 = 8;
4532 else if ((sample == 1) && (update == 2)) derived_10 = 9;
4533 else if ((sample == 0) && (update == 3)) derived_10 = 10;
4534 else if ((sample == 1) && (update == 3)) derived_10 = 11;
4535 else if ((sample == 2) && (update == 3)) derived_10 = 12;
4536 else if ((sample == 3) && (update == 3)) derived_10 = 13;
4537 else unreachable("No pattern match at pos 10");
4538
4539 return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10);
4540 } else if (register_format == 2) {
4541 unsigned derived_10 = 0;
4542 if ((sample == 0) && (update == 0)) derived_10 = 0;
4543 else if ((sample == 1) && (update == 0)) derived_10 = 1;
4544 else if ((sample == 2) && (update == 0)) derived_10 = 2;
4545 else if ((sample == 3) && (update == 0)) derived_10 = 3;
4546 else if ((sample == 4) && (update == 1)) derived_10 = 4;
4547 else if ((sample == 0) && (update == 2)) derived_10 = 8;
4548 else if ((sample == 1) && (update == 2)) derived_10 = 9;
4549 else if ((sample == 0) && (update == 3)) derived_10 = 10;
4550 else if ((sample == 1) && (update == 3)) derived_10 = 11;
4551 else if ((sample == 2) && (update == 3)) derived_10 = 12;
4552 else if ((sample == 3) && (update == 3)) derived_10 = 13;
4553 else unreachable("No pattern match at pos 10");
4554
4555 return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10);
4556 } else {
4557 unreachable("No matching state found in add_ld_var");
4558 }
4559 }
4560
4561 static inline unsigned
pan_pack_add_hadd_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4562 pan_pack_add_hadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4563 {
4564 unsigned src0 = bi_get_src(ins, regs, 0);
4565 unsigned src1 = bi_get_src(ins, regs, 1);
4566
4567 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
4568 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
4569 assert(round < 2);
4570
4571 unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4572 unsigned swap1_temp = 0;
4573 if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
4574 else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
4575 else unreachable("Could not pattern match widen");
4576 unsigned swap1 = swap1_temp;
4577 assert(swap1 < 2);
4578
4579 unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4580 unsigned swap0_temp = 0;
4581 if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
4582 else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
4583 else unreachable("Could not pattern match widen");
4584 unsigned swap0 = swap0_temp;
4585 assert(swap0 < 2);
4586
4587 return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10);
4588 }
4589
4590 static inline unsigned
pan_pack_add_swz_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4591 pan_pack_add_swz_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4592 {
4593 unsigned src0 = bi_get_src(ins, regs, 0);
4594
4595 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4596 unsigned swz0_temp = 0;
4597 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4598 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4599 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4600 else unreachable("Could not pattern match widen");
4601 unsigned swz0 = swz0_temp;
4602 assert(swz0 < 4);
4603
4604 return 0x3d948 | (src0 << 0) | (swz0 << 4);
4605 }
4606
4607 static inline unsigned
pan_pack_add_atest(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4608 pan_pack_add_atest(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4609 {
4610 unsigned src0 = bi_get_src(ins, regs, 0);
4611 assert((1 << src0) & 0xf7);
4612 unsigned src1 = bi_get_src(ins, regs, 1);
4613 assert((1 << src1) & 0xf7);
4614
4615 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4616 unsigned widen1_temp = 0;
4617 if (widen1_sz == 32) widen1_temp = 1;
4618 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 2;
4619 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 3;
4620 else unreachable("Could not pattern match widen");
4621 unsigned widen1 = widen1_temp;
4622 assert(widen1 < 4);
4623
4624 bi_write_staging_register(clause, ins);
4625 return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6);
4626 }
4627
4628 static inline unsigned
pan_pack_add_ldexp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4629 pan_pack_add_ldexp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4630 {
4631 unsigned src0 = bi_get_src(ins, regs, 0);
4632 unsigned src1 = bi_get_src(ins, regs, 1);
4633
4634 unsigned round = ins->roundmode;
4635 assert(round < 8);
4636
4637 return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6);
4638 }
4639
4640 static inline unsigned
pan_pack_fma_bitrev_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4641 pan_pack_fma_bitrev_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4642 {
4643 unsigned src0 = bi_get_src(ins, regs, 0);
4644 assert((1 << src0) & 0xfb);
4645
4646 return 0x701fc0 | (src0 << 0);
4647 }
4648
4649 static inline unsigned
pan_pack_add_icmpi_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4650 pan_pack_add_icmpi_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4651 {
4652 unsigned src0 = bi_get_src(ins, regs, 0);
4653 unsigned src1 = bi_get_src(ins, regs, 1);
4654
4655 unsigned result_type = 1;
4656
4657 unsigned cmpf_table[] = {
4658 ~0, ~0, ~0, ~0, ~0, 0, 1
4659 };
4660 unsigned cmpf = cmpf_table[ins->cond];
4661 assert(cmpf < 2);
4662
4663 return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
4664 }
4665
4666 static inline unsigned
pan_pack_add_mov_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4667 pan_pack_add_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4668 {
4669 unsigned src0 = bi_get_src(ins, regs, 0);
4670
4671 return 0x3d968 | (src0 << 0);
4672 }
4673
4674 static inline unsigned
pan_pack_fma_frexpm_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4675 pan_pack_fma_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4676 {
4677 unsigned src0 = bi_get_src(ins, regs, 0);
4678 assert((1 << src0) & 0xfb);
4679
4680 unsigned abs0 = ins->src_abs[0];
4681 assert(abs0 < 2);
4682
4683 unsigned sqrt = 0;
4684
4685 unsigned log = 1;
4686
4687 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4688 unsigned widen0_temp = 0;
4689 if (widen0_sz == 32) widen0_temp = 1;
4690 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2;
4691 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3;
4692 else unreachable("Could not pattern match widen");
4693 unsigned widen0 = widen0_temp;
4694 assert(widen0 < 4);
4695
4696 unsigned neg0 = ins->src_neg[0];
4697 assert(neg0 < 2);
4698
4699 if ((log == 0) && (neg0 == 0)) {
4700 return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3);
4701 } else if ((log == 1) && (sqrt == 0)) {
4702 return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7);
4703 } else {
4704 unreachable("No matching state found in fma_frexpm_f32");
4705 }
4706 }
4707
4708 static inline unsigned
pan_pack_add_atom_cx(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4709 pan_pack_add_atom_cx(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4710 {
4711 unsigned src0 = bi_get_src(ins, regs, 1);
4712 unsigned src1 = bi_get_src(ins, regs, 2);
4713 unsigned src2 = bi_get_src(ins, regs, 3);
4714
4715 bi_read_staging_register(clause, ins);
4716 assert(ins->src[0] == ins->dest);
4717 return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6);
4718 }
4719
4720 static inline unsigned
pan_pack_add_fadd_rscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4721 pan_pack_add_fadd_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4722 {
4723 unsigned src0 = bi_get_src(ins, regs, 0);
4724 unsigned src1 = bi_get_src(ins, regs, 1);
4725 unsigned src2 = bi_get_src(ins, regs, 2);
4726
4727 assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT);
4728 unsigned clamp = (ins->outmod == BIFROST_SAT) ? 1 : 0;
4729 assert(clamp < 2);
4730
4731 unsigned special = 0;
4732
4733 unsigned round = ins->roundmode;
4734 assert(round < 8);
4735
4736 unsigned abs1 = ins->src_abs[1];
4737 assert(abs1 < 2);
4738
4739 unsigned neg0 = ins->src_neg[0];
4740 assert(neg0 < 2);
4741
4742 unsigned neg1 = ins->src_neg[1];
4743 assert(neg1 < 2);
4744
4745 unsigned abs0 = ins->src_abs[0];
4746 assert(abs0 < 2);
4747
4748 unsigned derived_9 = 0;
4749 if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0;
4750 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2;
4751 else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3;
4752 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4;
4753 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5;
4754 else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6;
4755 else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7;
4756 else unreachable("No pattern match at pos 9");
4757
4758 return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9);
4759 }
4760
4761 static inline unsigned
pan_pack_fma_atom_post_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4762 pan_pack_fma_atom_post_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4763 {
4764 unsigned src0 = bi_get_src(ins, regs, 0);
4765 assert((1 << src0) & 0xfb);
4766 unsigned src1 = bi_get_src(ins, regs, 1);
4767 assert((1 << src1) & 0xfb);
4768
4769 unsigned atom_opc = 2;
4770
4771 return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
4772 }
4773
4774 static inline unsigned
pan_pack_fma_imul_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4775 pan_pack_fma_imul_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4776 {
4777 unsigned src0 = bi_get_src(ins, regs, 0);
4778 assert((1 << src0) & 0xfb);
4779 unsigned src1 = bi_get_src(ins, regs, 1);
4780 assert((1 << src1) & 0xfb);
4781
4782 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4783 unsigned widen1_temp = 0;
4784 if (widen1_sz == 32) widen1_temp = 0;
4785 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4786 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4787 else if (widen1_sz == 8 && ins->swizzle[1][0] == 0) widen1_temp = 3;
4788 else if (widen1_sz == 8 && ins->swizzle[1][0] == 1) widen1_temp = 4;
4789 else if (widen1_sz == 8 && ins->swizzle[1][0] == 2) widen1_temp = 5;
4790 else if (widen1_sz == 8 && ins->swizzle[1][0] == 3) widen1_temp = 6;
4791 else unreachable("Could not pattern match widen");
4792 unsigned widen1 = widen1_temp;
4793 assert(widen1 < 8);
4794
4795 ASSERTED bool extend1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16;
4796 bool extend1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int;
4797 unsigned extend1 = extend1_small ? (extend1_signed ? 1 : 2) : 0;
4798 assert(extend1 < 4);
4799
4800 if ((extend1 == 0) && (widen1 == 0)) {
4801 return 0x73c0c0 | (src0 << 0) | (src1 << 3);
4802 } else if ((extend1 != 0) && ((widen1 == 1) || (widen1 == 2))) {
4803 unsigned derived_9 = 0;
4804 if (widen1 == 1) derived_9 = 0;
4805 else if (widen1 == 2) derived_9 = 1;
4806 else unreachable("No pattern match at pos 9");
4807
4808 unsigned derived_10 = 0;
4809 if (extend1 == 2) derived_10 = 0;
4810 else if (extend1 == 1) derived_10 = 1;
4811 else unreachable("No pattern match at pos 10");
4812
4813 return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10);
4814 } else if ((extend1 != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) {
4815 unsigned derived_9 = 0;
4816 if (widen1 == 3) derived_9 = 0;
4817 else if (widen1 == 4) derived_9 = 1;
4818 else if (widen1 == 5) derived_9 = 2;
4819 else if (widen1 == 6) derived_9 = 3;
4820 else unreachable("No pattern match at pos 9");
4821
4822 unsigned derived_11 = 0;
4823 if (extend1 == 2) derived_11 = 0;
4824 else if (extend1 == 1) derived_11 = 1;
4825 else unreachable("No pattern match at pos 11");
4826
4827 return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11);
4828 } else {
4829 unreachable("No matching state found in fma_imul_i32");
4830 }
4831 }
4832
4833 static inline unsigned
pan_pack_add_flogd_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4834 pan_pack_add_flogd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4835 {
4836 unsigned src0 = bi_get_src(ins, regs, 0);
4837 assert((1 << src0) & 0xf7);
4838
4839 return 0x66340 | (src0 << 0);
4840 }
4841
4842 static inline unsigned
pan_pack_fma_frexpm_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4843 pan_pack_fma_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4844 {
4845 unsigned src0 = bi_get_src(ins, regs, 0);
4846 assert((1 << src0) & 0xfb);
4847
4848 unsigned abs0 = ins->src_abs[0];
4849 assert(abs0 < 2);
4850
4851 unsigned sqrt = 0;
4852
4853 unsigned log = 1;
4854
4855 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4856 unsigned swz0_temp = 0;
4857 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
4858 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
4859 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
4860 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
4861 else unreachable("Could not pattern match widen");
4862 unsigned swz0 = swz0_temp;
4863 assert(swz0 < 4);
4864
4865 unsigned neg0 = ins->src_neg[0];
4866 assert(neg0 < 2);
4867
4868 if ((log == 0) && (neg0 == 0)) {
4869 return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3);
4870 } else if ((log == 1) && (sqrt == 0)) {
4871 return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7);
4872 } else {
4873 unreachable("No matching state found in fma_frexpm_v2f16");
4874 }
4875 }
4876
4877 static inline unsigned
pan_pack_add_s8_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4878 pan_pack_add_s8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4879 {
4880 unsigned src0 = bi_get_src(ins, regs, 0);
4881
4882 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4883 unsigned lane0_temp = 0;
4884 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
4885 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
4886 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
4887 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
4888 else unreachable("Could not pattern match widen");
4889 unsigned lane0 = lane0_temp;
4890 assert(lane0 < 4);
4891
4892 return 0x3cb80 | (src0 << 0) | (lane0 << 4);
4893 }
4894
4895 static inline unsigned
pan_pack_add_zs_emit(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4896 pan_pack_add_zs_emit(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4897 {
4898 unsigned src0 = bi_get_src(ins, regs, 0);
4899 unsigned src1 = bi_get_src(ins, regs, 1);
4900 unsigned src2 = bi_get_src(ins, regs, 2);
4901
4902 unsigned stencil = (ins->src[1] != 0);
4903 assert(stencil < 2);
4904
4905 unsigned z = (ins->src[0] != 0);
4906 assert(z < 2);
4907
4908 bi_write_staging_register(clause, ins);
4909 unsigned derived_9 = 0;
4910 if ((stencil == 1) && (z == 0)) derived_9 = 1;
4911 else if ((stencil == 0) && (z == 1)) derived_9 = 2;
4912 else if ((stencil == 1) && (z == 1)) derived_9 = 3;
4913 else unreachable("No pattern match at pos 9");
4914
4915 return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
4916 }
4917
4918 static inline unsigned
pan_pack_add_load_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4919 pan_pack_add_load_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4920 {
4921 unsigned src0 = bi_get_src(ins, regs, 0);
4922 unsigned src1 = bi_get_src(ins, regs, 1);
4923
4924 assert(ins->segment);
4925 unsigned seg = ins->segment;
4926 assert(seg < 8);
4927
4928 bi_write_staging_register(clause, ins);
4929 return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6);
4930 }
4931
4932 static inline unsigned
pan_pack_add_branchz_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4933 pan_pack_add_branchz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4934 {
4935 unsigned src0 = bi_get_src(ins, regs, 0);
4936 unsigned src1 = bi_get_src(ins, regs, 1);
4937 assert((1 << src1) & 0xf7);
4938
4939 unsigned cmpf_table[] = {
4940 ~0, 2, 3, 1, 0, ~0, ~0
4941 };
4942 unsigned cmpf = cmpf_table[ins->cond];
4943 assert(cmpf < 4);
4944
4945 unsigned derived_9 = 0;
4946 if (cmpf == 2) derived_9 = 0;
4947 else if (cmpf == 3) derived_9 = 1;
4948 else if (cmpf == 1) derived_9 = 2;
4949 else if (cmpf == 0) derived_9 = 3;
4950 else unreachable("No pattern match at pos 9");
4951
4952 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9);
4953 }
4954
4955 static inline unsigned
pan_pack_add_fcmp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)4956 pan_pack_add_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
4957 {
4958 unsigned src0 = bi_get_src(ins, regs, 0);
4959 unsigned src1 = bi_get_src(ins, regs, 1);
4960
4961 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
4962 unsigned widen0_temp = 0;
4963 if (widen0_sz == 32) widen0_temp = 0;
4964 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
4965 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
4966 else unreachable("Could not pattern match widen");
4967 unsigned widen0 = widen0_temp;
4968 assert(widen0 < 4);
4969
4970 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
4971 unsigned widen1_temp = 0;
4972 if (widen1_sz == 32) widen1_temp = 0;
4973 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
4974 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
4975 else unreachable("Could not pattern match widen");
4976 unsigned widen1 = widen1_temp;
4977 assert(widen1 < 4);
4978
4979 unsigned neg0 = ins->src_neg[0];
4980 assert(neg0 < 2);
4981
4982 unsigned neg1 = ins->src_neg[1];
4983 assert(neg1 < 2);
4984
4985 unsigned cmpf_table[] = {
4986 ~0, 4, 5, 2, 1, 0, 3
4987 };
4988 unsigned cmpf = cmpf_table[ins->cond];
4989 assert(cmpf < 8);
4990
4991 unsigned abs0 = ins->src_abs[0];
4992 assert(abs0 < 2);
4993
4994 unsigned abs1 = ins->src_abs[1];
4995 assert(abs1 < 2);
4996
4997 unsigned result_type = 2;
4998
4999 if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) {
5000 { unsigned temp = src0; src0 = src1; src1 = temp; }
5001 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
5002 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5003 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5004 if (cmpf == 1) cmpf = 4;
5005 else if (cmpf == 5) cmpf = 2;
5006 else if (cmpf == 4) cmpf = 1;
5007 else if (cmpf == 2) cmpf = 5;
5008 }
5009
5010 unsigned derived_9 = 0;
5011 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
5012 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
5013 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
5014 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
5015 else unreachable("No pattern match at pos 9");
5016
5017 unsigned derived_13 = 0;
5018 if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0;
5019 else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1;
5020 else unreachable("No pattern match at pos 13");
5021
5022 return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13);
5023 }
5024
5025 static inline unsigned
pan_pack_fma_atom_c1_return_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5026 pan_pack_fma_atom_c1_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5027 {
5028 unsigned src0 = bi_get_src(ins, regs, 0);
5029 assert((1 << src0) & 0xf3);
5030 unsigned src1 = bi_get_src(ins, regs, 1);
5031 assert((1 << src1) & 0xf3);
5032
5033 unsigned atom_opc = 2;
5034
5035 return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
5036 }
5037
5038 static inline unsigned
pan_pack_add_hadd_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5039 pan_pack_add_hadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5040 {
5041 unsigned src0 = bi_get_src(ins, regs, 0);
5042 unsigned src1 = bi_get_src(ins, regs, 1);
5043
5044 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
5045 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
5046 assert(round < 2);
5047
5048 unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5049 unsigned swap1_temp = 0;
5050 if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
5051 else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
5052 else unreachable("Could not pattern match widen");
5053 unsigned swap1 = swap1_temp;
5054 assert(swap1 < 2);
5055
5056 unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5057 unsigned swap0_temp = 0;
5058 if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
5059 else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
5060 else unreachable("Could not pattern match widen");
5061 unsigned swap0 = swap0_temp;
5062 assert(swap0 < 2);
5063
5064 return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10);
5065 }
5066
5067 static inline unsigned
pan_pack_add_acmpstore_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5068 pan_pack_add_acmpstore_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5069 {
5070 unsigned src0 = bi_get_src(ins, regs, 1);
5071 unsigned src1 = bi_get_src(ins, regs, 2);
5072
5073 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
5074 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
5075 assert(seg < 2);
5076
5077 bi_read_staging_register(clause, ins);
5078 return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9);
5079 }
5080
5081 static inline unsigned
pan_pack_add_frcp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5082 pan_pack_add_frcp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5083 {
5084 unsigned src0 = bi_get_src(ins, regs, 0);
5085 assert((1 << src0) & 0xf7);
5086
5087 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5088 unsigned widen0_temp = 0;
5089 if (widen0_sz == 32) widen0_temp = 0;
5090 else unreachable("Could not pattern match widen");
5091 unsigned widen0 = widen0_temp;
5092 assert(widen0 < 4);
5093
5094 unsigned neg = ins->src_neg[0];
5095 assert(neg < 2);
5096
5097 unsigned abs0 = ins->src_abs[0];
5098 assert(abs0 < 2);
5099
5100 unsigned derived_6 = 0;
5101 if (widen0 == 0) derived_6 = 0;
5102 else unreachable("No pattern match at pos 6");
5103
5104 return 0x66000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6);
5105 }
5106
5107 static inline unsigned
pan_pack_fma_fadd_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5108 pan_pack_fma_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5109 {
5110 unsigned src0 = bi_get_src(ins, regs, 0);
5111 assert((1 << src0) & 0xfb);
5112 unsigned src1 = bi_get_src(ins, regs, 1);
5113 assert((1 << src1) & 0xfb);
5114
5115 unsigned abs0 = ins->src_abs[0];
5116 assert(abs0 < 2);
5117
5118 unsigned abs1 = ins->src_abs[1];
5119 assert(abs1 < 2);
5120
5121 unsigned neg0 = ins->src_neg[0];
5122 assert(neg0 < 2);
5123
5124 unsigned neg1 = ins->src_neg[1];
5125 assert(neg1 < 2);
5126
5127 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5128 unsigned swz0_temp = 0;
5129 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5130 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5131 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5132 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5133 else unreachable("Could not pattern match widen");
5134 unsigned swz0 = swz0_temp;
5135 assert(swz0 < 4);
5136
5137 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5138 unsigned swz1_temp = 0;
5139 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
5140 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
5141 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
5142 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
5143 else unreachable("Could not pattern match widen");
5144 unsigned swz1 = swz1_temp;
5145 assert(swz1 < 4);
5146
5147 unsigned round = ins->roundmode;
5148 assert(round < 4);
5149
5150 unsigned clamp = ins->outmod;
5151 assert(clamp < 4);
5152
5153 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
5154 { unsigned temp = src0; src0 = src1; src1 = temp; }
5155 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5156 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5157 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
5158 }
5159
5160 unsigned derived_6 = 0;
5161 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
5162 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
5163 else unreachable("No pattern match at pos 6");
5164
5165 return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6);
5166 }
5167
5168 static inline unsigned
pan_pack_add_var_tex_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5169 pan_pack_add_var_tex_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5170 {
5171
5172 unsigned update = 0;
5173
5174 unsigned skip = ins->skip;
5175 assert(skip < 2);
5176
5177 unsigned lod_mode = 1 - ins->texture.compute_lod;
5178 assert(lod_mode < 2);
5179
5180 unsigned sample = ins->load_vary.interp_mode;
5181 assert(sample < 2);
5182
5183 unsigned varying_index = bi_get_immediate(ins, 0);
5184 unsigned texture_index = ins->texture.texture_index;
5185 bi_write_staging_register(clause, ins);
5186 unsigned derived_5 = 0;
5187 if ((sample == 0) && (update == 0)) derived_5 = 0;
5188 else if ((sample == 1) && (update == 1)) derived_5 = 1;
5189 else unreachable("No pattern match at pos 5");
5190
5191 return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5);
5192 }
5193
5194 static inline unsigned
pan_pack_fma_lshift_and_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5195 pan_pack_fma_lshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5196 {
5197 unsigned src0 = bi_get_src(ins, regs, 0);
5198 assert((1 << src0) & 0xfb);
5199 unsigned src1 = bi_get_src(ins, regs, 1);
5200 assert((1 << src1) & 0xfb);
5201 unsigned src2 = bi_get_src(ins, regs, 2);
5202
5203 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5204 unsigned lanes2_temp = 0;
5205 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
5206 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
5207 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
5208 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
5209 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
5210 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
5211 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
5212 else unreachable("Could not pattern match widen");
5213 unsigned lanes2 = lanes2_temp;
5214 assert(lanes2 < 8);
5215
5216 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
5217 assert(not1 < 2);
5218
5219 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
5220 assert(not_result < 2);
5221
5222 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
5223 unsigned derived_9 = 0;
5224 if (lanes2 == 0) derived_9 = 0;
5225 else if (lanes2 == 1) derived_9 = 1;
5226 else if (lanes2 == 2) derived_9 = 2;
5227 else if (lanes2 == 3) derived_9 = 3;
5228 else unreachable("No pattern match at pos 9");
5229
5230 return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5231 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
5232 unsigned derived_9 = 0;
5233 if (lanes2 == 4) derived_9 = 1;
5234 else if (lanes2 == 5) derived_9 = 2;
5235 else if (lanes2 == 6) derived_9 = 3;
5236 else unreachable("No pattern match at pos 9");
5237
5238 return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5239 } else {
5240 unreachable("No matching state found in fma_lshift_and_v2i16");
5241 }
5242 }
5243
5244 static inline unsigned
pan_pack_add_quiet_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5245 pan_pack_add_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5246 {
5247 unsigned src0 = bi_get_src(ins, regs, 0);
5248
5249 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5250 unsigned swz0_temp = 0;
5251 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5252 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5253 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5254 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5255 else unreachable("Could not pattern match widen");
5256 unsigned swz0 = swz0_temp;
5257 assert(swz0 < 4);
5258
5259 return 0x3d900 | (src0 << 0) | (swz0 << 4);
5260 }
5261
5262 static inline unsigned
pan_pack_add_iabs_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5263 pan_pack_add_iabs_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5264 {
5265 unsigned src0 = bi_get_src(ins, regs, 0);
5266
5267 return 0x3deb0 | (src0 << 0);
5268 }
5269
5270 static inline unsigned
pan_pack_add_u16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5271 pan_pack_add_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5272 {
5273 unsigned src0 = bi_get_src(ins, regs, 0);
5274
5275 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5276 unsigned lane0_temp = 0;
5277 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5278 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5279 else unreachable("Could not pattern match widen");
5280 unsigned lane0 = lane0_temp;
5281 assert(lane0 < 2);
5282
5283 return 0x3ccc8 | (src0 << 0) | (lane0 << 4);
5284 }
5285
5286 static inline unsigned
pan_pack_fma_csel_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5287 pan_pack_fma_csel_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5288 {
5289 unsigned src0 = bi_get_src(ins, regs, 0);
5290 assert((1 << src0) & 0xfb);
5291 unsigned src1 = bi_get_src(ins, regs, 1);
5292 assert((1 << src1) & 0xfb);
5293 unsigned src2 = bi_get_src(ins, regs, 2);
5294 unsigned src3 = bi_get_src(ins, regs, 3);
5295
5296 unsigned cmpf_table[] = {
5297 ~0, 2, 3, 1, 0, ~0, ~0
5298 };
5299 unsigned cmpf = cmpf_table[ins->cond];
5300 assert(cmpf < 4);
5301
5302 if ((cmpf == 2) || (cmpf == 3)) {
5303 { unsigned temp = src0; src0 = src1; src1 = temp; }
5304 if (cmpf == 3) cmpf = 1;
5305 else if (cmpf == 2) cmpf = 0;
5306 }
5307
5308 unsigned derived_12 = 0;
5309 if (cmpf == 0) derived_12 = 0;
5310 else if (cmpf == 1) derived_12 = 1;
5311 else unreachable("No pattern match at pos 12");
5312
5313 return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12);
5314 }
5315
5316 static inline unsigned
pan_pack_fma_shaddxl_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5317 pan_pack_fma_shaddxl_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5318 {
5319 unsigned src0 = bi_get_src(ins, regs, 0);
5320 assert((1 << src0) & 0xfb);
5321 unsigned src1 = bi_get_src(ins, regs, 1);
5322 assert((1 << src1) & 0xfb);
5323
5324 unsigned shift = 0;
5325 return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6);
5326 }
5327
5328 static inline unsigned
pan_pack_add_s32_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5329 pan_pack_add_s32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5330 {
5331 unsigned src0 = bi_get_src(ins, regs, 0);
5332
5333 unsigned round = ins->roundmode;
5334 assert(round < 8);
5335
5336 if (round != 4) {
5337 unsigned derived_4 = 0;
5338 if (round == 0) derived_4 = 0;
5339 else if (round == 1) derived_4 = 1;
5340 else if (round == 2) derived_4 = 2;
5341 else if (round == 3) derived_4 = 3;
5342 else unreachable("No pattern match at pos 4");
5343
5344 return 0x3cbc0 | (src0 << 0) | (derived_4 << 4);
5345 } else if (round == 4) {
5346 return 0x3cd00 | (src0 << 0);
5347 } else {
5348 unreachable("No matching state found in add_s32_to_f32");
5349 }
5350 }
5351
5352 static inline unsigned
pan_pack_add_fmax_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5353 pan_pack_add_fmax_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5354 {
5355 unsigned src0 = bi_get_src(ins, regs, 0);
5356 unsigned src1 = bi_get_src(ins, regs, 1);
5357
5358 unsigned abs1 = ins->src_abs[1];
5359 assert(abs1 < 2);
5360
5361 unsigned neg0 = ins->src_neg[0];
5362 assert(neg0 < 2);
5363
5364 unsigned neg1 = ins->src_neg[1];
5365 assert(neg1 < 2);
5366
5367 unsigned clamp = ins->outmod;
5368 assert(clamp < 4);
5369
5370 unsigned sem = 0;
5371
5372 unsigned abs0 = ins->src_abs[0];
5373 assert(abs0 < 2);
5374
5375 return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15);
5376 }
5377
5378 static inline unsigned
pan_pack_fma_lshift_xor_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5379 pan_pack_fma_lshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5380 {
5381 unsigned src0 = bi_get_src(ins, regs, 0);
5382 assert((1 << src0) & 0xfb);
5383 unsigned src1 = bi_get_src(ins, regs, 1);
5384 assert((1 << src1) & 0xfb);
5385 unsigned src2 = bi_get_src(ins, regs, 2);
5386
5387 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5388 unsigned lane2_temp = 0;
5389 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5390 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
5391 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
5392 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
5393 else unreachable("Could not pattern match widen");
5394 unsigned lane2 = lane2_temp;
5395 assert(lane2 < 4);
5396
5397 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
5398 assert(not_result < 2);
5399
5400 return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13);
5401 }
5402
5403 static inline unsigned
pan_pack_add_shift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5404 pan_pack_add_shift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5405 {
5406 unsigned src0 = bi_get_src(ins, regs, 0);
5407 unsigned src1 = bi_get_src(ins, regs, 1);
5408 unsigned src2 = bi_get_src(ins, regs, 2);
5409
5410 return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
5411 }
5412
5413 static inline unsigned
pan_pack_add_jump(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5414 pan_pack_add_jump(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5415 {
5416 unsigned src0 = bi_get_src(ins, regs, 0);
5417 assert((1 << src0) & 0xf7);
5418
5419 return 0x6fe34 | (src0 << 6);
5420 }
5421
5422 static inline unsigned
pan_pack_add_branchz_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5423 pan_pack_add_branchz_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5424 {
5425 unsigned src0 = bi_get_src(ins, regs, 0);
5426 unsigned src1 = bi_get_src(ins, regs, 1);
5427 assert((1 << src1) & 0xf7);
5428
5429 unsigned cmpf_table[] = {
5430 ~0, 2, 3, 1, 0, ~0, ~0
5431 };
5432 unsigned cmpf = cmpf_table[ins->cond];
5433 assert(cmpf < 4);
5434
5435 unsigned derived_9 = 0;
5436 if (cmpf == 2) derived_9 = 0;
5437 else if (cmpf == 3) derived_9 = 1;
5438 else if (cmpf == 1) derived_9 = 2;
5439 else if (cmpf == 0) derived_9 = 3;
5440 else unreachable("No pattern match at pos 9");
5441
5442 return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9);
5443 }
5444
5445 static inline unsigned
pan_pack_add_branch_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5446 pan_pack_add_branch_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5447 {
5448 unsigned src0 = bi_get_src(ins, regs, 0);
5449 unsigned src1 = bi_get_src(ins, regs, 1);
5450 unsigned src2 = bi_get_src(ins, regs, 2);
5451 assert((1 << src2) & 0xf7);
5452
5453 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5454 unsigned widen0_temp = 0;
5455 if (widen0_sz == 32) widen0_temp = 0;
5456 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
5457 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
5458 else unreachable("Could not pattern match widen");
5459 unsigned widen0 = widen0_temp;
5460 assert(widen0 < 4);
5461
5462 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5463 unsigned widen1_temp = 0;
5464 if (widen1_sz == 32) widen1_temp = 0;
5465 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
5466 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
5467 else unreachable("Could not pattern match widen");
5468 unsigned widen1 = widen1_temp;
5469 assert(widen1 < 4);
5470
5471 unsigned cmpf_table[] = {
5472 ~0, 2, 3, 1, 0, ~0, ~0
5473 };
5474 unsigned cmpf = cmpf_table[ins->cond];
5475 assert(cmpf < 4);
5476
5477 if (src0 < src1) {
5478 { unsigned temp = src0; src0 = src1; src1 = temp; }
5479 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
5480 if (cmpf == 0) cmpf = 2;
5481 else if (cmpf == 3) cmpf = 1;
5482 else if (cmpf == 2) cmpf = 0;
5483 else if (cmpf == 1) cmpf = 3;
5484 }
5485
5486 unsigned derived_12 = 0;
5487 if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0;
5488 else unreachable("No pattern match at pos 12");
5489
5490 unsigned derived_9 = 0;
5491 if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0;
5492 else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1;
5493 else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2;
5494 else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3;
5495 else unreachable("No pattern match at pos 9");
5496
5497 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
5498 }
5499
5500 static inline unsigned
pan_pack_add_mux_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5501 pan_pack_add_mux_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5502 {
5503 unsigned src0 = bi_get_src(ins, regs, 0);
5504 unsigned src1 = bi_get_src(ins, regs, 1);
5505 unsigned src2 = bi_get_src(ins, regs, 2);
5506
5507 unsigned mux = 1;
5508
5509 return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9);
5510 }
5511
5512 static inline unsigned
pan_pack_add_texs_2d_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5513 pan_pack_add_texs_2d_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5514 {
5515 unsigned src0 = bi_get_src(ins, regs, 0);
5516 unsigned src1 = bi_get_src(ins, regs, 1);
5517
5518 unsigned skip = ins->skip;
5519 assert(skip < 2);
5520
5521 unsigned lod_mode = 1 - ins->texture.compute_lod;
5522 assert(lod_mode < 2);
5523
5524 unsigned texture_index = ins->texture.texture_index;
5525 unsigned sampler_index = ins->texture.sampler_index;
5526 bi_write_staging_register(clause, ins);
5527 return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10);
5528 }
5529
5530 static inline unsigned
pan_pack_add_s8_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5531 pan_pack_add_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5532 {
5533 unsigned src0 = bi_get_src(ins, regs, 0);
5534
5535 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5536 unsigned lane0_temp = 0;
5537 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5538 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5539 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5540 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5541 else unreachable("Could not pattern match widen");
5542 unsigned lane0 = lane0_temp;
5543 assert(lane0 < 4);
5544
5545 return 0x3cb40 | (src0 << 0) | (lane0 << 4);
5546 }
5547
5548 static inline unsigned
pan_pack_fma_u8_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5549 pan_pack_fma_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5550 {
5551 unsigned src0 = bi_get_src(ins, regs, 0);
5552 assert((1 << src0) & 0xfb);
5553
5554 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5555 unsigned lane0_temp = 0;
5556 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5557 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5558 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5559 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5560 else unreachable("Could not pattern match widen");
5561 unsigned lane0 = lane0_temp;
5562 assert(lane0 < 4);
5563
5564 return 0x700b48 | (src0 << 0) | (lane0 << 4);
5565 }
5566
5567 static inline unsigned
pan_pack_add_cube_tsel(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5568 pan_pack_add_cube_tsel(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5569 {
5570 unsigned src0 = bi_get_src(ins, regs, 0);
5571 unsigned src1 = bi_get_src(ins, regs, 1);
5572 unsigned src2 = bi_get_src(ins, regs, 2);
5573
5574 unsigned neg0 = ins->src_neg[0];
5575 assert(neg0 < 2);
5576
5577 unsigned neg1 = ins->src_neg[1];
5578 assert(neg1 < 2);
5579
5580 unsigned derived_9 = 0;
5581 if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0;
5582 else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1;
5583 else unreachable("No pattern match at pos 9");
5584
5585 return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9);
5586 }
5587
5588 static inline unsigned
pan_pack_add_fpow_sc_det_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5589 pan_pack_add_fpow_sc_det_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5590 {
5591 unsigned src0 = bi_get_src(ins, regs, 0);
5592 assert((1 << src0) & 0xf7);
5593 unsigned src1 = bi_get_src(ins, regs, 1);
5594 assert((1 << src1) & 0xf7);
5595
5596 unsigned func = 0;
5597
5598 return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7);
5599 }
5600
5601 static inline unsigned
pan_pack_fma_mkvec_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5602 pan_pack_fma_mkvec_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5603 {
5604 unsigned src0 = bi_get_src(ins, regs, 0);
5605 assert((1 << src0) & 0xfb);
5606 unsigned src1 = bi_get_src(ins, regs, 1);
5607 assert((1 << src1) & 0xfb);
5608 unsigned src2 = bi_get_src(ins, regs, 2);
5609 unsigned src3 = bi_get_src(ins, regs, 3);
5610
5611 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5612 unsigned lane0_temp = 0;
5613 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5614 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 1;
5615 else unreachable("Could not pattern match widen");
5616 unsigned lane0 = lane0_temp;
5617 assert(lane0 < 2);
5618
5619 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5620 unsigned lane1_temp = 0;
5621 if (lane1_sz == 8 && ins->swizzle[1][0] == 0) lane1_temp = 0;
5622 else if (lane1_sz == 8 && ins->swizzle[1][0] == 2) lane1_temp = 1;
5623 else unreachable("Could not pattern match widen");
5624 unsigned lane1 = lane1_temp;
5625 assert(lane1 < 2);
5626
5627 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5628 unsigned lane2_temp = 0;
5629 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5630 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
5631 else unreachable("Could not pattern match widen");
5632 unsigned lane2 = lane2_temp;
5633 assert(lane2 < 2);
5634
5635 unsigned lane3_sz = nir_alu_type_get_type_size(ins->src_types[3]);
5636 unsigned lane3_temp = 0;
5637 if (lane3_sz == 8 && ins->swizzle[3][0] == 0) lane3_temp = 0;
5638 else if (lane3_sz == 8 && ins->swizzle[3][0] == 2) lane3_temp = 1;
5639 else unreachable("Could not pattern match widen");
5640 unsigned lane3 = lane3_temp;
5641 assert(lane3 < 2);
5642
5643 return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15);
5644 }
5645
5646 static inline unsigned
pan_pack_add_fmin_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5647 pan_pack_add_fmin_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5648 {
5649 unsigned src0 = bi_get_src(ins, regs, 0);
5650 unsigned src1 = bi_get_src(ins, regs, 1);
5651
5652 unsigned abs1 = ins->src_abs[1];
5653 assert(abs1 < 2);
5654
5655 unsigned neg0 = ins->src_neg[0];
5656 assert(neg0 < 2);
5657
5658 unsigned neg1 = ins->src_neg[1];
5659 assert(neg1 < 2);
5660
5661 unsigned clamp = ins->outmod;
5662 assert(clamp < 4);
5663
5664 unsigned sem = 0;
5665
5666 unsigned abs0 = ins->src_abs[0];
5667 assert(abs0 < 2);
5668
5669 return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15);
5670 }
5671
5672 static inline unsigned
pan_pack_fma_fcmp_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5673 pan_pack_fma_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5674 {
5675 unsigned src0 = bi_get_src(ins, regs, 0);
5676 assert((1 << src0) & 0xfb);
5677 unsigned src1 = bi_get_src(ins, regs, 1);
5678 assert((1 << src1) & 0xfb);
5679
5680 unsigned abs0 = ins->src_abs[0];
5681 assert(abs0 < 2);
5682
5683 unsigned abs1 = ins->src_abs[1];
5684 assert(abs1 < 2);
5685
5686 unsigned cmpf_table[] = {
5687 ~0, 4, 5, 2, 1, 0, 3
5688 };
5689 unsigned cmpf = cmpf_table[ins->cond];
5690 assert(cmpf < 8);
5691
5692 unsigned neg0 = ins->src_neg[0];
5693 assert(neg0 < 2);
5694
5695 unsigned neg1 = ins->src_neg[1];
5696 assert(neg1 < 2);
5697
5698 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5699 unsigned swz0_temp = 0;
5700 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
5701 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
5702 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
5703 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
5704 else unreachable("Could not pattern match widen");
5705 unsigned swz0 = swz0_temp;
5706 assert(swz0 < 4);
5707
5708 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5709 unsigned swz1_temp = 0;
5710 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
5711 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
5712 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
5713 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
5714 else unreachable("Could not pattern match widen");
5715 unsigned swz1 = swz1_temp;
5716 assert(swz1 < 4);
5717
5718 unsigned result_type = 2;
5719
5720 if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) {
5721 { unsigned temp = src0; src0 = src1; src1 = temp; }
5722 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
5723 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
5724 { unsigned temp = swz0; swz0 = swz1; swz1 = temp; }
5725 if (cmpf == 1) cmpf = 4;
5726 else if (cmpf == 5) cmpf = 2;
5727 else if (cmpf == 4) cmpf = 1;
5728 else if (cmpf == 2) cmpf = 5;
5729 }
5730
5731 unsigned derived_6 = 0;
5732 if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0;
5733 else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1;
5734 else unreachable("No pattern match at pos 6");
5735
5736 unsigned derived_13 = 0;
5737 if (cmpf == 0) derived_13 = 0;
5738 else if (cmpf == 1) derived_13 = 1;
5739 else if (cmpf == 2) derived_13 = 2;
5740 else if (cmpf == 3) derived_13 = 3;
5741 else if (cmpf == 4) derived_13 = 4;
5742 else if (cmpf == 5) derived_13 = 5;
5743 else if (cmpf == 6) derived_13 = 6;
5744 else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7;
5745 else unreachable("No pattern match at pos 13");
5746
5747 return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13);
5748 }
5749
5750 static inline unsigned
pan_pack_add_acmpxchg_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5751 pan_pack_add_acmpxchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5752 {
5753 unsigned src0 = bi_get_src(ins, regs, 1);
5754 unsigned src1 = bi_get_src(ins, regs, 2);
5755
5756 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
5757 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
5758 assert(seg < 2);
5759
5760 bi_read_staging_register(clause, ins);
5761 assert(ins->src[0] == ins->dest);
5762 return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9);
5763 }
5764
5765 static inline unsigned
pan_pack_fma_rshift_and_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5766 pan_pack_fma_rshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5767 {
5768 unsigned src0 = bi_get_src(ins, regs, 0);
5769 assert((1 << src0) & 0xfb);
5770 unsigned src1 = bi_get_src(ins, regs, 1);
5771 assert((1 << src1) & 0xfb);
5772 unsigned src2 = bi_get_src(ins, regs, 2);
5773
5774 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5775 unsigned lanes2_temp = 0;
5776 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
5777 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
5778 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
5779 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
5780 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
5781 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
5782 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
5783 else unreachable("Could not pattern match widen");
5784 unsigned lanes2 = lanes2_temp;
5785 assert(lanes2 < 8);
5786
5787 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
5788 assert(not1 < 2);
5789
5790 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
5791 assert(not_result < 2);
5792
5793 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
5794 unsigned derived_9 = 0;
5795 if (lanes2 == 0) derived_9 = 0;
5796 else if (lanes2 == 1) derived_9 = 1;
5797 else if (lanes2 == 2) derived_9 = 2;
5798 else if (lanes2 == 3) derived_9 = 3;
5799 else unreachable("No pattern match at pos 9");
5800
5801 return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5802 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
5803 unsigned derived_9 = 0;
5804 if (lanes2 == 4) derived_9 = 1;
5805 else if (lanes2 == 5) derived_9 = 2;
5806 else if (lanes2 == 6) derived_9 = 3;
5807 else unreachable("No pattern match at pos 9");
5808
5809 return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
5810 } else {
5811 unreachable("No matching state found in fma_rshift_and_v2i16");
5812 }
5813 }
5814
5815 static inline unsigned
pan_pack_add_fpow_sc_det_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5816 pan_pack_add_fpow_sc_det_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5817 {
5818 unsigned src0 = bi_get_src(ins, regs, 0);
5819 assert((1 << src0) & 0xf7);
5820 unsigned src1 = bi_get_src(ins, regs, 1);
5821 assert((1 << src1) & 0xf7);
5822
5823 unsigned func = 0;
5824
5825 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5826 unsigned lane1_temp = 0;
5827 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
5828 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
5829 else if (lane1_sz == 32) lane1_temp = 2;
5830 else unreachable("Could not pattern match widen");
5831 unsigned lane1 = lane1_temp;
5832 assert(lane1 < 4);
5833
5834 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5835 unsigned lane0_temp = 0;
5836 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5837 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5838 else unreachable("Could not pattern match widen");
5839 unsigned lane0 = lane0_temp;
5840 assert(lane0 < 2);
5841
5842 if ((func == 0) || (func == 1)) {
5843 unsigned derived_6 = 0;
5844 if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0;
5845 else if (lane1 == 1) derived_6 = 1;
5846 else unreachable("No pattern match at pos 6");
5847
5848 unsigned derived_8 = 0;
5849 if (func == 0) derived_8 = 0;
5850 else if (func == 1) derived_8 = 1;
5851 else unreachable("No pattern match at pos 8");
5852
5853 return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8);
5854 } else if (((func == 2) || (func == 3)) && (lane1 == 2)) {
5855 unsigned derived_8 = 0;
5856 if (func == 2) derived_8 = 0;
5857 else if (func == 3) derived_8 = 1;
5858 else unreachable("No pattern match at pos 8");
5859
5860 return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8);
5861 } else {
5862 unreachable("No matching state found in add_fpow_sc_det_f16");
5863 }
5864 }
5865
5866 static inline unsigned
pan_pack_add_iadd_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5867 pan_pack_add_iadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5868 {
5869 unsigned src0 = bi_get_src(ins, regs, 0);
5870 unsigned src1 = bi_get_src(ins, regs, 1);
5871
5872 unsigned saturate = 0;
5873
5874 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5875 unsigned lanes0_temp = 0;
5876 if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0;
5877 else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1;
5878 else unreachable("Could not pattern match widen");
5879 unsigned lanes0 = lanes0_temp;
5880 assert(lanes0 < 2);
5881
5882 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
5883 unsigned lanes1_temp = 0;
5884 if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0;
5885 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1;
5886 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2;
5887 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3;
5888 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4;
5889 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5;
5890 else unreachable("Could not pattern match widen");
5891 unsigned lanes1 = lanes1_temp;
5892 assert(lanes1 < 8);
5893
5894 if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) {
5895 unsigned derived_9 = 0;
5896 if (lanes1 == 0) derived_9 = 0;
5897 else if (lanes1 == 1) derived_9 = 1;
5898 else unreachable("No pattern match at pos 9");
5899
5900 unsigned derived_10 = 0;
5901 if (lanes0 == 0) derived_10 = 0;
5902 else if (lanes0 == 1) derived_10 = 1;
5903 else unreachable("No pattern match at pos 10");
5904
5905 return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10);
5906 } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) {
5907 unsigned derived_9 = 0;
5908 if (lanes1 == 2) derived_9 = 0;
5909 else if (lanes1 == 3) derived_9 = 1;
5910 else unreachable("No pattern match at pos 9");
5911
5912 return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
5913 } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) {
5914 unsigned derived_9 = 0;
5915 if (lanes1 == 4) derived_9 = 0;
5916 else if (lanes1 == 5) derived_9 = 1;
5917 else unreachable("No pattern match at pos 9");
5918
5919 return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
5920 } else {
5921 unreachable("No matching state found in add_iadd_v2s16");
5922 }
5923 }
5924
5925 static inline unsigned
pan_pack_fma_arshift_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5926 pan_pack_fma_arshift_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5927 {
5928 unsigned src0 = bi_get_src(ins, regs, 0);
5929 assert((1 << src0) & 0xfb);
5930 unsigned src1 = bi_get_src(ins, regs, 1);
5931 assert((1 << src1) & 0x8);
5932 unsigned src2 = bi_get_src(ins, regs, 2);
5933
5934 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
5935 unsigned lane2_temp = 0;
5936 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
5937 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
5938 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
5939 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
5940 else unreachable("Could not pattern match widen");
5941 unsigned lane2 = lane2_temp;
5942 assert(lane2 < 4);
5943
5944 return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9);
5945 }
5946
5947 static inline unsigned
pan_pack_add_store_i128(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5948 pan_pack_add_store_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5949 {
5950 unsigned src0 = bi_get_src(ins, regs, 1);
5951 unsigned src1 = bi_get_src(ins, regs, 2);
5952
5953 assert(ins->segment);
5954 unsigned seg = ins->segment;
5955 assert(seg < 8);
5956
5957 bi_read_staging_register(clause, ins);
5958 return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6);
5959 }
5960
5961 static inline unsigned
pan_pack_add_fpclass_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5962 pan_pack_add_fpclass_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5963 {
5964 unsigned src0 = bi_get_src(ins, regs, 0);
5965 assert((1 << src0) & 0xf7);
5966
5967 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5968 unsigned lane0_temp = 0;
5969 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5970 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5971 else unreachable("Could not pattern match widen");
5972 unsigned lane0 = lane0_temp;
5973 assert(lane0 < 2);
5974
5975 return 0x67c40 | (src0 << 0) | (lane0 << 3);
5976 }
5977
5978 static inline unsigned
pan_pack_add_u8_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5979 pan_pack_add_u8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5980 {
5981 unsigned src0 = bi_get_src(ins, regs, 0);
5982
5983 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
5984 unsigned lane0_temp = 0;
5985 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
5986 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
5987 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
5988 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
5989 else unreachable("Could not pattern match widen");
5990 unsigned lane0 = lane0_temp;
5991 assert(lane0 < 4);
5992
5993 return 0x3cb88 | (src0 << 0) | (lane0 << 4);
5994 }
5995
5996 static inline unsigned
pan_pack_fma_nop_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)5997 pan_pack_fma_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
5998 {
5999
6000 return 0x701963;
6001 }
6002
6003 static inline unsigned
pan_pack_add_lea_attr_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6004 pan_pack_add_lea_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6005 {
6006 unsigned src0 = bi_get_src(ins, regs, 0);
6007 unsigned src1 = bi_get_src(ins, regs, 1);
6008 unsigned src2 = bi_get_src(ins, regs, 2);
6009
6010 unsigned register_format_temp = 0;
6011 if (ins->format == nir_type_float16) register_format_temp = 0;
6012 else if (ins->format == nir_type_float32) register_format_temp = 1;
6013 else if (ins->format == nir_type_int32) register_format_temp = 2;
6014 else if (ins->format == nir_type_uint32) register_format_temp = 3;
6015 else if (ins->format == nir_type_int16) register_format_temp = 4;
6016 else if (ins->format == nir_type_uint16) register_format_temp = 5;
6017 else if (ins->format == nir_type_float64) register_format_temp = 6;
6018 else if (ins->format == nir_type_int64) register_format_temp = 7;
6019 else unreachable("Could not pattern match register format");
6020 unsigned register_format = register_format_temp;
6021 assert(register_format < 16);
6022
6023 bi_write_staging_register(clause, ins);
6024 if (register_format != 8) {
6025 unsigned derived_11 = 0;
6026 if (register_format == 0) derived_11 = 0;
6027 else if (register_format == 1) derived_11 = 1;
6028 else if (register_format == 2) derived_11 = 2;
6029 else if (register_format == 3) derived_11 = 3;
6030 else if (register_format == 4) derived_11 = 4;
6031 else if (register_format == 5) derived_11 = 5;
6032 else if (register_format == 6) derived_11 = 6;
6033 else if (register_format == 7) derived_11 = 7;
6034 else unreachable("No pattern match at pos 11");
6035
6036 return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11);
6037 } else if (register_format == 8) {
6038 return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6);
6039 } else {
6040 unreachable("No matching state found in add_lea_attr_tex");
6041 }
6042 }
6043
6044 static inline unsigned
pan_pack_fma_mkvec_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6045 pan_pack_fma_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6046 {
6047 unsigned src0 = bi_get_src(ins, regs, 0);
6048 assert((1 << src0) & 0xfb);
6049 unsigned src1 = bi_get_src(ins, regs, 1);
6050 assert((1 << src1) & 0xfb);
6051
6052 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6053 unsigned lane0_temp = 0;
6054 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6055 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6056 else unreachable("Could not pattern match widen");
6057 unsigned lane0 = lane0_temp;
6058 assert(lane0 < 2);
6059
6060 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6061 unsigned lane1_temp = 0;
6062 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
6063 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
6064 else unreachable("Could not pattern match widen");
6065 unsigned lane1 = lane1_temp;
6066 assert(lane1 < 2);
6067
6068 return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7);
6069 }
6070
6071 static inline unsigned
pan_pack_fma_fadd_lscale_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6072 pan_pack_fma_fadd_lscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6073 {
6074 unsigned src0 = bi_get_src(ins, regs, 0);
6075 assert((1 << src0) & 0xfb);
6076 unsigned src1 = bi_get_src(ins, regs, 1);
6077 assert((1 << src1) & 0xfb);
6078
6079 unsigned abs0 = ins->src_abs[0];
6080 assert(abs0 < 2);
6081
6082 unsigned neg0 = ins->src_neg[0];
6083 assert(neg0 < 2);
6084
6085 unsigned abs1 = ins->src_abs[1];
6086 assert(abs1 < 2);
6087
6088 unsigned neg1 = ins->src_neg[1];
6089 assert(neg1 < 2);
6090
6091 return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9);
6092 }
6093
6094 static inline unsigned
pan_pack_add_v2f16_to_v2u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6095 pan_pack_add_v2f16_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6096 {
6097 unsigned src0 = bi_get_src(ins, regs, 0);
6098
6099 unsigned round = ins->roundmode;
6100 assert(round < 8);
6101
6102 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6103 unsigned swz0_temp = 0;
6104 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6105 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6106 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6107 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6108 else unreachable("Could not pattern match widen");
6109 unsigned swz0 = swz0_temp;
6110 assert(swz0 < 4);
6111
6112 if (round != 4) {
6113 unsigned derived_4 = 0;
6114 if (round == 0) derived_4 = 0;
6115 else if (round == 1) derived_4 = 1;
6116 else if (round == 2) derived_4 = 2;
6117 else if (round == 3) derived_4 = 3;
6118 else unreachable("No pattern match at pos 4");
6119
6120 return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
6121 } else if (round == 4) {
6122 return 0x3ca88 | (src0 << 0) | (swz0 << 4);
6123 } else {
6124 unreachable("No matching state found in add_v2f16_to_v2u16");
6125 }
6126 }
6127
6128 static inline unsigned
pan_pack_fma_fcmp_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6129 pan_pack_fma_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6130 {
6131 unsigned src0 = bi_get_src(ins, regs, 0);
6132 assert((1 << src0) & 0xfb);
6133 unsigned src1 = bi_get_src(ins, regs, 1);
6134 assert((1 << src1) & 0xfb);
6135
6136 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6137 unsigned widen0_temp = 0;
6138 if (widen0_sz == 32) widen0_temp = 0;
6139 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6140 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6141 else unreachable("Could not pattern match widen");
6142 unsigned widen0 = widen0_temp;
6143 assert(widen0 < 4);
6144
6145 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6146 unsigned widen1_temp = 0;
6147 if (widen1_sz == 32) widen1_temp = 0;
6148 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
6149 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
6150 else unreachable("Could not pattern match widen");
6151 unsigned widen1 = widen1_temp;
6152 assert(widen1 < 4);
6153
6154 unsigned abs1 = ins->src_abs[1];
6155 assert(abs1 < 2);
6156
6157 unsigned neg0 = ins->src_neg[0];
6158 assert(neg0 < 2);
6159
6160 unsigned neg1 = ins->src_neg[1];
6161 assert(neg1 < 2);
6162
6163 unsigned abs0 = ins->src_abs[0];
6164 assert(abs0 < 2);
6165
6166 unsigned cmpf_table[] = {
6167 ~0, 4, 5, 2, 1, 0, 3
6168 };
6169 unsigned cmpf = cmpf_table[ins->cond];
6170 assert(cmpf < 8);
6171
6172 unsigned result_type = 2;
6173
6174 if ((widen0 == 2) && (widen1 == 1)) {
6175 { unsigned temp = src0; src0 = src1; src1 = temp; }
6176 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
6177 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
6178 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
6179 if (cmpf == 1) cmpf = 4;
6180 else if (cmpf == 5) cmpf = 2;
6181 else if (cmpf == 4) cmpf = 1;
6182 else if (cmpf == 2) cmpf = 5;
6183 }
6184
6185 unsigned derived_9 = 0;
6186 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
6187 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
6188 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
6189 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
6190 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
6191 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
6192 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
6193 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
6194 else unreachable("No pattern match at pos 9");
6195
6196 return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9);
6197 }
6198
6199 static inline unsigned
pan_pack_add_fpclass_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6200 pan_pack_add_fpclass_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6201 {
6202 unsigned src0 = bi_get_src(ins, regs, 0);
6203 assert((1 << src0) & 0xf7);
6204
6205 return 0x67c50 | (src0 << 0);
6206 }
6207
6208 static inline unsigned
pan_pack_add_ld_attr(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6209 pan_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6210 {
6211 unsigned src0 = bi_get_src(ins, regs, 0);
6212 unsigned src1 = bi_get_src(ins, regs, 1);
6213 unsigned src2 = bi_get_src(ins, regs, 2);
6214
6215 unsigned register_format_temp = 0;
6216 if (ins->format == nir_type_float16) register_format_temp = 0;
6217 else if (ins->format == nir_type_float32) register_format_temp = 1;
6218 else if (ins->format == nir_type_int32) register_format_temp = 2;
6219 else if (ins->format == nir_type_uint32) register_format_temp = 3;
6220 else if (ins->format == nir_type_int16) register_format_temp = 4;
6221 else if (ins->format == nir_type_uint16) register_format_temp = 5;
6222 else if (ins->format == nir_type_float64) register_format_temp = 6;
6223 else if (ins->format == nir_type_int64) register_format_temp = 7;
6224 else unreachable("Could not pattern match register format");
6225 unsigned register_format = register_format_temp;
6226 assert(register_format < 16);
6227
6228 unsigned vecsize = ins->vector_channels - 1;
6229 assert(vecsize < 4);
6230
6231 bi_write_staging_register(clause, ins);
6232 if (register_format != 8) {
6233 unsigned derived_13 = 0;
6234 if (register_format == 0) derived_13 = 0;
6235 else if (register_format == 1) derived_13 = 1;
6236 else if (register_format == 2) derived_13 = 2;
6237 else if (register_format == 3) derived_13 = 3;
6238 else if (register_format == 4) derived_13 = 4;
6239 else if (register_format == 5) derived_13 = 5;
6240 else if (register_format == 6) derived_13 = 6;
6241 else if (register_format == 7) derived_13 = 7;
6242 else unreachable("No pattern match at pos 13");
6243
6244 return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13);
6245 } else if (register_format == 8) {
6246 return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11);
6247 } else {
6248 unreachable("No matching state found in add_ld_attr");
6249 }
6250 }
6251
6252 static inline unsigned
pan_pack_fma_rshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6253 pan_pack_fma_rshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6254 {
6255 unsigned src0 = bi_get_src(ins, regs, 0);
6256 assert((1 << src0) & 0xfb);
6257 unsigned src1 = bi_get_src(ins, regs, 1);
6258 assert((1 << src1) & 0xfb);
6259 unsigned src2 = bi_get_src(ins, regs, 2);
6260
6261 unsigned bytes2 = 0;
6262
6263 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6264 unsigned lane2_temp = 0;
6265 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
6266 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
6267 else unreachable("Could not pattern match widen");
6268 unsigned lane2 = lane2_temp;
6269 assert(lane2 < 2);
6270
6271 unsigned result_word = 0;
6272
6273 return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
6274 }
6275
6276 static inline unsigned
pan_pack_add_branchz_u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6277 pan_pack_add_branchz_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6278 {
6279 unsigned src0 = bi_get_src(ins, regs, 0);
6280 unsigned src1 = bi_get_src(ins, regs, 1);
6281 assert((1 << src1) & 0xf7);
6282
6283 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6284 unsigned widen0_temp = 0;
6285 if (widen0_sz == 32) widen0_temp = 0;
6286 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6287 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6288 else unreachable("Could not pattern match widen");
6289 unsigned widen0 = widen0_temp;
6290 assert(widen0 < 4);
6291
6292 unsigned cmpf_table[] = {
6293 ~0, 2, 3, 1, 0, ~0, ~0
6294 };
6295 unsigned cmpf = cmpf_table[ins->cond];
6296 assert(cmpf < 4);
6297
6298 unsigned derived_4 = 0;
6299 if (widen0 == 2) derived_4 = 1;
6300 else if (widen0 == 1) derived_4 = 2;
6301 else unreachable("No pattern match at pos 4");
6302
6303 unsigned derived_9 = 0;
6304 if (cmpf == 2) derived_9 = 0;
6305 else if (cmpf == 3) derived_9 = 1;
6306 else if (cmpf == 1) derived_9 = 2;
6307 else if (cmpf == 0) derived_9 = 3;
6308 else unreachable("No pattern match at pos 9");
6309
6310 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9);
6311 }
6312
6313 static inline unsigned
pan_pack_fma_atom_c1_return_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6314 pan_pack_fma_atom_c1_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6315 {
6316 unsigned src0 = bi_get_src(ins, regs, 0);
6317 assert((1 << src0) & 0xf3);
6318 unsigned src1 = bi_get_src(ins, regs, 1);
6319 assert((1 << src1) & 0xf3);
6320
6321 unsigned atom_opc = 2;
6322
6323 return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6);
6324 }
6325
6326 static inline unsigned
pan_pack_add_store_i48(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6327 pan_pack_add_store_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6328 {
6329 unsigned src0 = bi_get_src(ins, regs, 1);
6330 unsigned src1 = bi_get_src(ins, regs, 2);
6331
6332 assert(ins->segment);
6333 unsigned seg = ins->segment;
6334 assert(seg < 8);
6335
6336 bi_read_staging_register(clause, ins);
6337 return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6);
6338 }
6339
6340 static inline unsigned
pan_pack_fma_rshift_and_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6341 pan_pack_fma_rshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6342 {
6343 unsigned src0 = bi_get_src(ins, regs, 0);
6344 assert((1 << src0) & 0xfb);
6345 unsigned src1 = bi_get_src(ins, regs, 1);
6346 assert((1 << src1) & 0xfb);
6347 unsigned src2 = bi_get_src(ins, regs, 2);
6348
6349 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6350 unsigned lanes2_temp = 0;
6351 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0;
6352 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1;
6353 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2;
6354 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3;
6355 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4;
6356 else unreachable("Could not pattern match widen");
6357 unsigned lanes2 = lanes2_temp;
6358 assert(lanes2 < 8);
6359
6360 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
6361 assert(not1 < 2);
6362
6363 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
6364 assert(not_result < 2);
6365
6366 if (lanes2 != 0) {
6367 unsigned derived_9 = 0;
6368 if (lanes2 == 1) derived_9 = 0;
6369 else if (lanes2 == 2) derived_9 = 1;
6370 else if (lanes2 == 3) derived_9 = 2;
6371 else if (lanes2 == 4) derived_9 = 3;
6372 else unreachable("No pattern match at pos 9");
6373
6374 return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
6375 } else if (lanes2 == 0) {
6376 return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15);
6377 } else {
6378 unreachable("No matching state found in fma_rshift_and_v4i8");
6379 }
6380 }
6381
6382 static inline unsigned
pan_pack_add_frsq_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6383 pan_pack_add_frsq_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6384 {
6385 unsigned src0 = bi_get_src(ins, regs, 0);
6386 assert((1 << src0) & 0xf7);
6387
6388 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6389 unsigned widen0_temp = 0;
6390 if (widen0_sz == 32) widen0_temp = 0;
6391 else unreachable("Could not pattern match widen");
6392 unsigned widen0 = widen0_temp;
6393 assert(widen0 < 4);
6394
6395 unsigned neg = ins->src_neg[0];
6396 assert(neg < 2);
6397
6398 unsigned abs0 = ins->src_abs[0];
6399 assert(abs0 < 2);
6400
6401 unsigned derived_6 = 0;
6402 if (widen0 == 0) derived_6 = 0;
6403 else unreachable("No pattern match at pos 6");
6404
6405 return 0x66100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6);
6406 }
6407
6408 static inline unsigned
pan_pack_add_icmpf_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6409 pan_pack_add_icmpf_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6410 {
6411 unsigned src0 = bi_get_src(ins, regs, 0);
6412 unsigned src1 = bi_get_src(ins, regs, 1);
6413 unsigned src2 = bi_get_src(ins, regs, 2);
6414
6415 return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6);
6416 }
6417
6418 static inline unsigned
pan_pack_add_lea_tex_imm(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6419 pan_pack_add_lea_tex_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6420 {
6421 unsigned src0 = bi_get_src(ins, regs, 0);
6422 unsigned src1 = bi_get_src(ins, regs, 1);
6423
6424 unsigned format = 1;
6425
6426 unsigned texture_index = ins->texture.texture_index;
6427 bi_write_staging_register(clause, ins);
6428 return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6);
6429 }
6430
6431 static inline unsigned
pan_pack_add_f16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6432 pan_pack_add_f16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6433 {
6434 unsigned src0 = bi_get_src(ins, regs, 0);
6435
6436 unsigned round = ins->roundmode;
6437 assert(round < 8);
6438
6439 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6440 unsigned lane0_temp = 0;
6441 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6442 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6443 else unreachable("Could not pattern match widen");
6444 unsigned lane0 = lane0_temp;
6445 assert(lane0 < 2);
6446
6447 if (round != 4) {
6448 unsigned derived_4 = 0;
6449 if (round == 0) derived_4 = 0;
6450 else if (round == 1) derived_4 = 1;
6451 else if (round == 2) derived_4 = 2;
6452 else if (round == 3) derived_4 = 3;
6453 else unreachable("No pattern match at pos 4");
6454
6455 return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4);
6456 } else if (round == 4) {
6457 return 0x3cc48 | (src0 << 0) | (lane0 << 5);
6458 } else {
6459 unreachable("No matching state found in add_f16_to_u32");
6460 }
6461 }
6462
6463 static inline unsigned
pan_pack_add_isub_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6464 pan_pack_add_isub_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6465 {
6466 unsigned src0 = bi_get_src(ins, regs, 0);
6467 unsigned src1 = bi_get_src(ins, regs, 1);
6468
6469 unsigned saturate = 0;
6470
6471 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6472 unsigned lanes1_temp = 0;
6473 if (lanes1_sz == 32) lanes1_temp = 0;
6474 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1;
6475 else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2;
6476 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3;
6477 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4;
6478 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5;
6479 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6;
6480 else unreachable("Could not pattern match widen");
6481 unsigned lanes1 = lanes1_temp;
6482 assert(lanes1 < 8);
6483
6484 if (lanes1 == 0) {
6485 unsigned derived_7 = 0;
6486 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6487 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6488 else unreachable("No pattern match at pos 7");
6489
6490 return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
6491 } else if ((lanes1 == 1) || (lanes1 == 2)) {
6492 unsigned derived_7 = 0;
6493 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6494 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6495 else unreachable("No pattern match at pos 7");
6496
6497 unsigned derived_9 = 0;
6498 if (lanes1 == 1) derived_9 = 0;
6499 else if (lanes1 == 2) derived_9 = 1;
6500 else unreachable("No pattern match at pos 9");
6501
6502 return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
6503 } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) {
6504 unsigned derived_7 = 0;
6505 if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0;
6506 else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1;
6507 else unreachable("No pattern match at pos 7");
6508
6509 unsigned derived_9 = 0;
6510 if (lanes1 == 3) derived_9 = 0;
6511 else if (lanes1 == 4) derived_9 = 1;
6512 else if (lanes1 == 5) derived_9 = 2;
6513 else if (lanes1 == 6) derived_9 = 3;
6514 else unreachable("No pattern match at pos 9");
6515
6516 return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
6517 } else {
6518 unreachable("No matching state found in add_isub_u32");
6519 }
6520 }
6521
6522 static inline unsigned
pan_pack_fma_v2f32_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6523 pan_pack_fma_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6524 {
6525 unsigned src0 = bi_get_src(ins, regs, 0);
6526 assert((1 << src0) & 0xfb);
6527 unsigned src1 = bi_get_src(ins, regs, 1);
6528 assert((1 << src1) & 0xfb);
6529
6530 unsigned abs0 = ins->src_abs[0];
6531 assert(abs0 < 2);
6532
6533 unsigned abs1 = ins->src_abs[1];
6534 assert(abs1 < 2);
6535
6536 unsigned neg0 = ins->src_neg[0];
6537 assert(neg0 < 2);
6538
6539 unsigned neg1 = ins->src_neg[1];
6540 assert(neg1 < 2);
6541
6542 unsigned clamp = ins->outmod;
6543 assert(clamp < 4);
6544
6545 unsigned round = ins->roundmode;
6546 assert(round < 8);
6547
6548 unsigned derived_6 = 0;
6549 if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0;
6550 else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1;
6551 else unreachable("No pattern match at pos 6");
6552
6553 unsigned derived_7 = 0;
6554 if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0;
6555 else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1;
6556 else unreachable("No pattern match at pos 7");
6557
6558 return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7);
6559 }
6560
6561 static inline unsigned
pan_pack_add_u8_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6562 pan_pack_add_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6563 {
6564 unsigned src0 = bi_get_src(ins, regs, 0);
6565
6566 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6567 unsigned lane0_temp = 0;
6568 if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6569 else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6570 else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2;
6571 else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3;
6572 else unreachable("Could not pattern match widen");
6573 unsigned lane0 = lane0_temp;
6574 assert(lane0 < 4);
6575
6576 return 0x3cb48 | (src0 << 0) | (lane0 << 4);
6577 }
6578
6579 static inline unsigned
pan_pack_add_kaboom(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6580 pan_pack_add_kaboom(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6581 {
6582 unsigned src0 = bi_get_src(ins, regs, 0);
6583
6584 return 0xd7858 | (src0 << 0);
6585 }
6586
6587 static inline unsigned
pan_pack_fma_mov_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6588 pan_pack_fma_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6589 {
6590 unsigned src0 = bi_get_src(ins, regs, 0);
6591 assert((1 << src0) & 0xfb);
6592
6593 return 0x701968 | (src0 << 0);
6594 }
6595
6596 static inline unsigned
pan_pack_add_nop_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6597 pan_pack_add_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6598 {
6599
6600 return 0x3d964;
6601 }
6602
6603 static inline unsigned
pan_pack_fma_frexpe_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6604 pan_pack_fma_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6605 {
6606 unsigned src0 = bi_get_src(ins, regs, 0);
6607 assert((1 << src0) & 0xfb);
6608
6609 unsigned neg = ins->src_neg[0];
6610 assert(neg < 2);
6611
6612 unsigned sqrt = 0;
6613
6614 unsigned log = 1;
6615
6616 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6617 unsigned swz0_temp = 0;
6618 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6619 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6620 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6621 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6622 else unreachable("Could not pattern match widen");
6623 unsigned swz0 = swz0_temp;
6624 assert(swz0 < 4);
6625
6626 if (log == 0) {
6627 return 0x701c00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3);
6628 } else if ((log == 1) && (sqrt == 0) && (neg == 0)) {
6629 return 0x701e00 | (src0 << 0) | (swz0 << 3);
6630 } else {
6631 unreachable("No matching state found in fma_frexpe_v2f16");
6632 }
6633 }
6634
6635 static inline unsigned
pan_pack_add_store_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6636 pan_pack_add_store_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6637 {
6638 unsigned src0 = bi_get_src(ins, regs, 1);
6639 unsigned src1 = bi_get_src(ins, regs, 2);
6640
6641 assert(ins->segment);
6642 unsigned seg = ins->segment;
6643 assert(seg < 8);
6644
6645 bi_read_staging_register(clause, ins);
6646 return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6);
6647 }
6648
6649 static inline unsigned
pan_pack_add_frexpm_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6650 pan_pack_add_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6651 {
6652 unsigned src0 = bi_get_src(ins, regs, 0);
6653
6654 unsigned abs0 = ins->src_abs[0];
6655 assert(abs0 < 2);
6656
6657 unsigned sqrt = 0;
6658
6659 unsigned log = 1;
6660
6661 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6662 unsigned swz0_temp = 0;
6663 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6664 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6665 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
6666 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
6667 else unreachable("Could not pattern match widen");
6668 unsigned swz0 = swz0_temp;
6669 assert(swz0 < 4);
6670
6671 unsigned neg0 = ins->src_neg[0];
6672 assert(neg0 < 2);
6673
6674 if ((log == 0) && (neg0 == 0)) {
6675 return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3);
6676 } else if ((log == 1) && (sqrt == 0)) {
6677 return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7);
6678 } else {
6679 unreachable("No matching state found in add_frexpm_v2f16");
6680 }
6681 }
6682
6683 static inline unsigned
pan_pack_add_branchz_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6684 pan_pack_add_branchz_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6685 {
6686 unsigned src0 = bi_get_src(ins, regs, 0);
6687 unsigned src1 = bi_get_src(ins, regs, 1);
6688 assert((1 << src1) & 0xf7);
6689
6690 unsigned cmpf_table[] = {
6691 ~0, ~0, ~0, ~0, ~0, 0, 1
6692 };
6693 unsigned cmpf = cmpf_table[ins->cond];
6694 assert(cmpf < 2);
6695
6696 unsigned derived_3 = 0;
6697 if (cmpf == 1) derived_3 = 0;
6698 else if (cmpf == 0) derived_3 = 1;
6699 else unreachable("No pattern match at pos 3");
6700
6701 return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3);
6702 }
6703
6704 static inline unsigned
pan_pack_add_swz_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6705 pan_pack_add_swz_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6706 {
6707 unsigned src0 = bi_get_src(ins, regs, 0);
6708
6709 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6710 unsigned swz0_temp = 0;
6711 if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 0 && ins->swizzle[0][3] == 0) swz0_temp = 0;
6712 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 1;
6713 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 2) swz0_temp = 2;
6714 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 3;
6715 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 4;
6716 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 5;
6717 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 2) swz0_temp = 6;
6718 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 0) swz0_temp = 7;
6719 else unreachable("Could not pattern match widen");
6720 unsigned swz0 = swz0_temp;
6721 assert(swz0 < 8);
6722
6723 return 0x3df40 | (src0 << 0) | (swz0 << 3);
6724 }
6725
6726 static inline unsigned
pan_pack_add_branchz_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6727 pan_pack_add_branchz_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6728 {
6729 unsigned src0 = bi_get_src(ins, regs, 0);
6730 unsigned src1 = bi_get_src(ins, regs, 1);
6731 assert((1 << src1) & 0xf7);
6732
6733 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6734 unsigned widen0_temp = 0;
6735 if (widen0_sz == 32) widen0_temp = 0;
6736 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6737 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6738 else unreachable("Could not pattern match widen");
6739 unsigned widen0 = widen0_temp;
6740 assert(widen0 < 4);
6741
6742 unsigned cmpf_table[] = {
6743 ~0, 4, 5, 2, 1, 0, 3
6744 };
6745 unsigned cmpf = cmpf_table[ins->cond];
6746 assert(cmpf < 8);
6747
6748 unsigned derived_4 = 0;
6749 if (widen0 == 2) derived_4 = 1;
6750 else if (widen0 == 1) derived_4 = 2;
6751 else unreachable("No pattern match at pos 4");
6752
6753 unsigned derived_3 = 0;
6754 if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0;
6755 else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1;
6756 else unreachable("No pattern match at pos 3");
6757
6758 unsigned derived_9 = 0;
6759 if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5;
6760 else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6;
6761 else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7;
6762 else unreachable("No pattern match at pos 9");
6763
6764 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9);
6765 }
6766
6767 static inline unsigned
pan_pack_add_u16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6768 pan_pack_add_u16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6769 {
6770 unsigned src0 = bi_get_src(ins, regs, 0);
6771
6772 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6773 unsigned lane0_temp = 0;
6774 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6775 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6776 else unreachable("Could not pattern match widen");
6777 unsigned lane0 = lane0_temp;
6778 assert(lane0 < 2);
6779
6780 return 0x3cce8 | (src0 << 0) | (lane0 << 4);
6781 }
6782
6783 static inline unsigned
pan_pack_add_icmp_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6784 pan_pack_add_icmp_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6785 {
6786 unsigned src0 = bi_get_src(ins, regs, 0);
6787 unsigned src1 = bi_get_src(ins, regs, 1);
6788
6789 unsigned result_type = 1;
6790
6791 unsigned cmpf_table[] = {
6792 ~0, 2, 3, 1, 0, ~0, ~0
6793 };
6794 unsigned cmpf = cmpf_table[ins->cond];
6795 assert(cmpf < 4);
6796
6797 if ((cmpf == 2) || (cmpf == 3)) {
6798 { unsigned temp = src0; src0 = src1; src1 = temp; }
6799 if (cmpf == 3) cmpf = 1;
6800 else if (cmpf == 2) cmpf = 0;
6801 }
6802
6803 unsigned derived_6 = 0;
6804 if (cmpf == 0) derived_6 = 0;
6805 else if (cmpf == 1) derived_6 = 1;
6806 else unreachable("No pattern match at pos 6");
6807
6808 return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6);
6809 }
6810
6811 static inline unsigned
pan_pack_fma_frshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6812 pan_pack_fma_frshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6813 {
6814 unsigned src0 = bi_get_src(ins, regs, 0);
6815 assert((1 << src0) & 0xfb);
6816 unsigned src1 = bi_get_src(ins, regs, 1);
6817 assert((1 << src1) & 0xfb);
6818 unsigned src2 = bi_get_src(ins, regs, 2);
6819
6820 unsigned bytes2 = 0;
6821
6822 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
6823 unsigned lane2_temp = 0;
6824 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
6825 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
6826 else unreachable("Could not pattern match widen");
6827 unsigned lane2 = lane2_temp;
6828 assert(lane2 < 2);
6829
6830 return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10);
6831 }
6832
6833 static inline unsigned
pan_pack_add_frcbrt_approx_c_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6834 pan_pack_add_frcbrt_approx_c_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6835 {
6836 unsigned src0 = bi_get_src(ins, regs, 0);
6837 assert((1 << src0) & 0xf7);
6838
6839 return 0x67ab8 | (src0 << 0);
6840 }
6841
6842 static inline unsigned
pan_pack_add_hadd_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6843 pan_pack_add_hadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6844 {
6845 unsigned src0 = bi_get_src(ins, regs, 0);
6846 unsigned src1 = bi_get_src(ins, regs, 1);
6847
6848 assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP);
6849 unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0;
6850 assert(round < 2);
6851
6852 return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12);
6853 }
6854
6855 static inline unsigned
pan_pack_add_s16_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6856 pan_pack_add_s16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6857 {
6858 unsigned src0 = bi_get_src(ins, regs, 0);
6859
6860 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6861 unsigned lane0_temp = 0;
6862 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6863 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6864 else unreachable("Could not pattern match widen");
6865 unsigned lane0 = lane0_temp;
6866 assert(lane0 < 2);
6867
6868 return 0x3cce0 | (src0 << 0) | (lane0 << 4);
6869 }
6870
6871 static inline unsigned
pan_pack_add_v2u8_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6872 pan_pack_add_v2u8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6873 {
6874 unsigned src0 = bi_get_src(ins, regs, 0);
6875
6876 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6877 unsigned swz0_temp = 0;
6878 if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
6879 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
6880 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
6881 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
6882 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
6883 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
6884 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
6885 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
6886 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
6887 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
6888 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
6889 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
6890 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
6891 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
6892 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
6893 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
6894 else unreachable("Could not pattern match widen");
6895 unsigned swz0 = swz0_temp;
6896 assert(swz0 < 16);
6897
6898 return 0x3c808 | (src0 << 0) | (swz0 << 4);
6899 }
6900
6901 static inline unsigned
pan_pack_add_branchz_s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6902 pan_pack_add_branchz_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6903 {
6904 unsigned src0 = bi_get_src(ins, regs, 0);
6905 unsigned src1 = bi_get_src(ins, regs, 1);
6906 assert((1 << src1) & 0xf7);
6907
6908 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6909 unsigned widen0_temp = 0;
6910 if (widen0_sz == 32) widen0_temp = 0;
6911 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
6912 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
6913 else unreachable("Could not pattern match widen");
6914 unsigned widen0 = widen0_temp;
6915 assert(widen0 < 4);
6916
6917 unsigned cmpf_table[] = {
6918 ~0, 2, 3, 1, 0, ~0, ~0
6919 };
6920 unsigned cmpf = cmpf_table[ins->cond];
6921 assert(cmpf < 4);
6922
6923 unsigned derived_4 = 0;
6924 if (widen0 == 2) derived_4 = 1;
6925 else if (widen0 == 1) derived_4 = 2;
6926 else unreachable("No pattern match at pos 4");
6927
6928 unsigned derived_9 = 0;
6929 if (cmpf == 2) derived_9 = 0;
6930 else if (cmpf == 3) derived_9 = 1;
6931 else if (cmpf == 1) derived_9 = 2;
6932 else if (cmpf == 0) derived_9 = 3;
6933 else unreachable("No pattern match at pos 9");
6934
6935 return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9);
6936 }
6937
6938 static inline unsigned
pan_pack_fma_imul_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6939 pan_pack_fma_imul_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6940 {
6941 unsigned src0 = bi_get_src(ins, regs, 0);
6942 assert((1 << src0) & 0xfb);
6943 unsigned src1 = bi_get_src(ins, regs, 1);
6944 assert((1 << src1) & 0xfb);
6945
6946 unsigned replicate0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6947 unsigned replicate0_temp = 0;
6948 if (replicate0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) replicate0_temp = 0;
6949 else unreachable("Could not pattern match widen");
6950 unsigned replicate0 = replicate0_temp;
6951 assert(replicate0 < 8);
6952
6953 unsigned replicate1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
6954 unsigned replicate1_temp = 0;
6955 if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) replicate1_temp = 0;
6956 else if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) replicate1_temp = 1;
6957 else if (replicate1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) replicate1_temp = 2;
6958 else if (replicate1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) replicate1_temp = 3;
6959 else if (replicate1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) replicate1_temp = 4;
6960 else unreachable("Could not pattern match widen");
6961 unsigned replicate1 = replicate1_temp;
6962 assert(replicate1 < 8);
6963
6964 if ((replicate0 == 0) && (replicate1 == 0)) {
6965 return 0x73e0c0 | (src0 << 0) | (src1 << 3);
6966 } else if ((replicate0 == 0) && (replicate1 != 0)) {
6967 unsigned derived_9 = 0;
6968 if (replicate1 == 1) derived_9 = 0;
6969 else if (replicate1 == 2) derived_9 = 1;
6970 else if (replicate1 == 3) derived_9 = 2;
6971 else if (replicate1 == 4) derived_9 = 3;
6972 else unreachable("No pattern match at pos 9");
6973
6974 return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9);
6975 } else {
6976 unreachable("No matching state found in fma_imul_v4i8");
6977 }
6978 }
6979
6980 static inline unsigned
pan_pack_add_s16_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6981 pan_pack_add_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6982 {
6983 unsigned src0 = bi_get_src(ins, regs, 0);
6984
6985 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
6986 unsigned lane0_temp = 0;
6987 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
6988 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
6989 else unreachable("Could not pattern match widen");
6990 unsigned lane0 = lane0_temp;
6991 assert(lane0 < 2);
6992
6993 return 0x3ccc0 | (src0 << 0) | (lane0 << 4);
6994 }
6995
6996 static inline unsigned
pan_pack_add_f32_to_s32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)6997 pan_pack_add_f32_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
6998 {
6999 unsigned src0 = bi_get_src(ins, regs, 0);
7000
7001 unsigned round = ins->roundmode;
7002 assert(round < 8);
7003
7004 if (round != 4) {
7005 unsigned derived_4 = 0;
7006 if (round == 0) derived_4 = 0;
7007 else if (round == 1) derived_4 = 1;
7008 else if (round == 2) derived_4 = 2;
7009 else if (round == 3) derived_4 = 3;
7010 else unreachable("No pattern match at pos 4");
7011
7012 return 0x3c980 | (src0 << 0) | (derived_4 << 4);
7013 } else if (round == 4) {
7014 return 0x3cca0 | (src0 << 0);
7015 } else {
7016 unreachable("No matching state found in add_f32_to_s32");
7017 }
7018 }
7019
7020 static inline unsigned
pan_pack_fma_rshift_xor_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7021 pan_pack_fma_rshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7022 {
7023 unsigned src0 = bi_get_src(ins, regs, 0);
7024 assert((1 << src0) & 0xfb);
7025 unsigned src1 = bi_get_src(ins, regs, 1);
7026 assert((1 << src1) & 0xfb);
7027 unsigned src2 = bi_get_src(ins, regs, 2);
7028
7029 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7030 unsigned lane2_temp = 0;
7031 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7032 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7033 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7034 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7035 else unreachable("Could not pattern match widen");
7036 unsigned lane2 = lane2_temp;
7037 assert(lane2 < 4);
7038
7039 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7040 assert(not_result < 2);
7041
7042 return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13);
7043 }
7044
7045 static inline unsigned
pan_pack_add_fatan_assist_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7046 pan_pack_add_fatan_assist_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7047 {
7048 unsigned src0 = bi_get_src(ins, regs, 0);
7049 assert((1 << src0) & 0xf7);
7050 unsigned src1 = bi_get_src(ins, regs, 1);
7051 assert((1 << src1) & 0xf7);
7052
7053 return 0x67a00 | (src0 << 0) | (src1 << 3);
7054 }
7055
7056 static inline unsigned
pan_pack_add_mux_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7057 pan_pack_add_mux_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7058 {
7059 unsigned src0 = bi_get_src(ins, regs, 0);
7060 unsigned src1 = bi_get_src(ins, regs, 1);
7061 unsigned src2 = bi_get_src(ins, regs, 2);
7062
7063 unsigned mux = 1;
7064
7065 return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9);
7066 }
7067
7068 static inline unsigned
pan_pack_fma_lshift_xor_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7069 pan_pack_fma_lshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7070 {
7071 unsigned src0 = bi_get_src(ins, regs, 0);
7072 assert((1 << src0) & 0xfb);
7073 unsigned src1 = bi_get_src(ins, regs, 1);
7074 assert((1 << src1) & 0xfb);
7075 unsigned src2 = bi_get_src(ins, regs, 2);
7076
7077 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7078 unsigned lanes2_temp = 0;
7079 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
7080 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
7081 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
7082 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
7083 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
7084 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
7085 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
7086 else unreachable("Could not pattern match widen");
7087 unsigned lanes2 = lanes2_temp;
7088 assert(lanes2 < 8);
7089
7090 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7091 assert(not_result < 2);
7092
7093 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
7094 unsigned derived_9 = 0;
7095 if (lanes2 == 0) derived_9 = 0;
7096 else if (lanes2 == 1) derived_9 = 1;
7097 else if (lanes2 == 2) derived_9 = 2;
7098 else if (lanes2 == 3) derived_9 = 3;
7099 else unreachable("No pattern match at pos 9");
7100
7101 return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
7102 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
7103 unsigned derived_9 = 0;
7104 if (lanes2 == 4) derived_9 = 1;
7105 else if (lanes2 == 5) derived_9 = 2;
7106 else if (lanes2 == 6) derived_9 = 3;
7107 else unreachable("No pattern match at pos 9");
7108
7109 return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9);
7110 } else {
7111 unreachable("No matching state found in fma_lshift_xor_v2i16");
7112 }
7113 }
7114
7115 static inline unsigned
pan_pack_add_load_i96(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7116 pan_pack_add_load_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7117 {
7118 unsigned src0 = bi_get_src(ins, regs, 0);
7119 unsigned src1 = bi_get_src(ins, regs, 1);
7120
7121 assert(ins->segment);
7122 unsigned seg = ins->segment;
7123 assert(seg < 8);
7124
7125 bi_write_staging_register(clause, ins);
7126 return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6);
7127 }
7128
7129 static inline unsigned
pan_pack_fma_lshift_or_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7130 pan_pack_fma_lshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7131 {
7132 unsigned src0 = bi_get_src(ins, regs, 0);
7133 assert((1 << src0) & 0xfb);
7134 unsigned src1 = bi_get_src(ins, regs, 1);
7135 assert((1 << src1) & 0xfb);
7136 unsigned src2 = bi_get_src(ins, regs, 2);
7137
7138 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7139 unsigned lane2_temp = 0;
7140 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7141 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7142 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7143 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7144 else unreachable("Could not pattern match widen");
7145 unsigned lane2 = lane2_temp;
7146 assert(lane2 < 4);
7147
7148 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
7149 assert(not1 < 2);
7150
7151 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7152 assert(not_result < 2);
7153
7154 return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
7155 }
7156
7157 static inline unsigned
pan_pack_fma_lshift_or_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7158 pan_pack_fma_lshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7159 {
7160 unsigned src0 = bi_get_src(ins, regs, 0);
7161 assert((1 << src0) & 0xfb);
7162 unsigned src1 = bi_get_src(ins, regs, 1);
7163 assert((1 << src1) & 0xfb);
7164 unsigned src2 = bi_get_src(ins, regs, 2);
7165
7166 unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7167 unsigned lanes2_temp = 0;
7168 if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0;
7169 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1;
7170 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2;
7171 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3;
7172 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4;
7173 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5;
7174 else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6;
7175 else unreachable("Could not pattern match widen");
7176 unsigned lanes2 = lanes2_temp;
7177 assert(lanes2 < 8);
7178
7179 unsigned not1 = ins->bitwise.src1_invert ? 0 : 1;
7180 assert(not1 < 2);
7181
7182 unsigned not_result = ins->bitwise.dest_invert ? 1 : 0;
7183 assert(not_result < 2);
7184
7185 if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) {
7186 unsigned derived_9 = 0;
7187 if (lanes2 == 0) derived_9 = 0;
7188 else if (lanes2 == 1) derived_9 = 1;
7189 else if (lanes2 == 2) derived_9 = 2;
7190 else if (lanes2 == 3) derived_9 = 3;
7191 else unreachable("No pattern match at pos 9");
7192
7193 return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
7194 } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) {
7195 unsigned derived_9 = 0;
7196 if (lanes2 == 4) derived_9 = 1;
7197 else if (lanes2 == 5) derived_9 = 2;
7198 else if (lanes2 == 6) derived_9 = 3;
7199 else unreachable("No pattern match at pos 9");
7200
7201 return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9);
7202 } else {
7203 unreachable("No matching state found in fma_lshift_or_v2i16");
7204 }
7205 }
7206
7207 static inline unsigned
pan_pack_add_ld_gclk_u64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7208 pan_pack_add_ld_gclk_u64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7209 {
7210
7211 unsigned source = 7;
7212
7213 bi_write_staging_register(clause, ins);
7214 return 0xd7800 | (source << 0);
7215 }
7216
7217 static inline unsigned
pan_pack_add_seg_add(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7218 pan_pack_add_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7219 {
7220 unsigned src0 = bi_get_src(ins, regs, 0);
7221
7222 assert(ins->segment);
7223 unsigned seg = ins->segment;
7224 assert(seg < 8);
7225
7226 unsigned preserve_null = 0;
7227
7228 return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
7229 }
7230
7231 static inline unsigned
pan_pack_add_axchg_i64(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7232 pan_pack_add_axchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7233 {
7234 unsigned src0 = bi_get_src(ins, regs, 1);
7235 unsigned src1 = bi_get_src(ins, regs, 2);
7236
7237 assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS);
7238 unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0;
7239 assert(seg < 2);
7240
7241 bi_read_staging_register(clause, ins);
7242 assert(ins->src[0] == ins->dest);
7243 return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9);
7244 }
7245
7246 static inline unsigned
pan_pack_add_isub_v4s8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7247 pan_pack_add_isub_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7248 {
7249 unsigned src0 = bi_get_src(ins, regs, 0);
7250 unsigned src1 = bi_get_src(ins, regs, 1);
7251
7252 unsigned saturate = 0;
7253
7254 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7255 unsigned lanes0_temp = 0;
7256 if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
7257 else unreachable("Could not pattern match widen");
7258 unsigned lanes0 = lanes0_temp;
7259 assert(lanes0 < 8);
7260
7261 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7262 unsigned lanes1_temp = 0;
7263 if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
7264 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
7265 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
7266 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
7267 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
7268 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
7269 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
7270 else unreachable("Could not pattern match widen");
7271 unsigned lanes1 = lanes1_temp;
7272 assert(lanes1 < 8);
7273
7274 if ((lanes0 == 0) && (lanes1 == 0)) {
7275 return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8);
7276 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
7277 unsigned derived_9 = 0;
7278 if (lanes1 == 1) derived_9 = 0;
7279 else if (lanes1 == 2) derived_9 = 1;
7280 else if (lanes1 == 3) derived_9 = 2;
7281 else if (lanes1 == 4) derived_9 = 3;
7282 else unreachable("No pattern match at pos 9");
7283
7284 return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
7285 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
7286 unsigned derived_9 = 0;
7287 if (lanes1 == 5) derived_9 = 0;
7288 else if (lanes1 == 6) derived_9 = 1;
7289 else unreachable("No pattern match at pos 9");
7290
7291 return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9);
7292 } else {
7293 unreachable("No matching state found in add_isub_v4s8");
7294 }
7295 }
7296
7297 static inline unsigned
pan_pack_fma_fma_rscale_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7298 pan_pack_fma_fma_rscale_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7299 {
7300 unsigned src0 = bi_get_src(ins, regs, 0);
7301 assert((1 << src0) & 0xfb);
7302 unsigned src1 = bi_get_src(ins, regs, 1);
7303 assert((1 << src1) & 0xfb);
7304 unsigned src2 = bi_get_src(ins, regs, 2);
7305 unsigned src3 = bi_get_src(ins, regs, 3);
7306
7307 assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ);
7308 unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0;
7309 assert(round < 2);
7310
7311 unsigned clamp = ins->outmod;
7312 assert(clamp < 4);
7313
7314 unsigned neg0 = ins->src_neg[0];
7315 assert(neg0 < 2);
7316
7317 unsigned neg1 = ins->src_neg[1];
7318 assert(neg1 < 2);
7319
7320 unsigned abs0 = ins->src_abs[0];
7321 assert(abs0 < 2);
7322
7323 unsigned neg2 = ins->src_neg[2];
7324 assert(neg2 < 2);
7325
7326 unsigned special = 0;
7327
7328 unsigned derived_16 = 0;
7329 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0;
7330 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1;
7331 else unreachable("No pattern match at pos 16");
7332
7333 unsigned derived_12 = 0;
7334 if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0;
7335 else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1;
7336 else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2;
7337 else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3;
7338 else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4;
7339 else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5;
7340 else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7;
7341 else unreachable("No pattern match at pos 12");
7342
7343 return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12);
7344 }
7345
7346 static inline unsigned
pan_pack_add_fpow_sc_apply(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7347 pan_pack_add_fpow_sc_apply(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7348 {
7349 unsigned src0 = bi_get_src(ins, regs, 0);
7350 unsigned src1 = bi_get_src(ins, regs, 1);
7351
7352 return 0x75080 | (src0 << 0) | (src1 << 3);
7353 }
7354
7355 static inline unsigned
pan_pack_add_v2f16_to_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7356 pan_pack_add_v2f16_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7357 {
7358 unsigned src0 = bi_get_src(ins, regs, 0);
7359
7360 unsigned round = ins->roundmode;
7361 assert(round < 8);
7362
7363 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7364 unsigned swz0_temp = 0;
7365 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7366 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7367 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7368 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7369 else unreachable("Could not pattern match widen");
7370 unsigned swz0 = swz0_temp;
7371 assert(swz0 < 4);
7372
7373 if (round != 4) {
7374 unsigned derived_4 = 0;
7375 if (round == 0) derived_4 = 0;
7376 else if (round == 1) derived_4 = 1;
7377 else if (round == 2) derived_4 = 2;
7378 else if (round == 3) derived_4 = 3;
7379 else unreachable("No pattern match at pos 4");
7380
7381 return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
7382 } else if (round == 4) {
7383 return 0x3ca80 | (src0 << 0) | (swz0 << 4);
7384 } else {
7385 unreachable("No matching state found in add_v2f16_to_v2s16");
7386 }
7387 }
7388
7389 static inline unsigned
pan_pack_add_icmp_v4i8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7390 pan_pack_add_icmp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7391 {
7392 unsigned src0 = bi_get_src(ins, regs, 0);
7393 unsigned src1 = bi_get_src(ins, regs, 1);
7394
7395 unsigned result_type = 1;
7396
7397 unsigned cmpf_table[] = {
7398 ~0, ~0, ~0, ~0, ~0, 0, 1
7399 };
7400 unsigned cmpf = cmpf_table[ins->cond];
7401 assert(cmpf < 2);
7402
7403 return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6);
7404 }
7405
7406 static inline unsigned
pan_pack_add_eureka(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7407 pan_pack_add_eureka(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7408 {
7409 unsigned src0 = bi_get_src(ins, regs, 0);
7410
7411 return 0xd7850 | (src0 << 0);
7412 }
7413
7414 static inline unsigned
pan_pack_add_branch_u16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7415 pan_pack_add_branch_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7416 {
7417 unsigned src0 = bi_get_src(ins, regs, 0);
7418 unsigned src1 = bi_get_src(ins, regs, 1);
7419 unsigned src2 = bi_get_src(ins, regs, 2);
7420 assert((1 << src2) & 0xf7);
7421
7422 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7423 unsigned widen0_temp = 0;
7424 if (widen0_sz == 32) widen0_temp = 0;
7425 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7426 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7427 else unreachable("Could not pattern match widen");
7428 unsigned widen0 = widen0_temp;
7429 assert(widen0 < 4);
7430
7431 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7432 unsigned widen1_temp = 0;
7433 if (widen1_sz == 32) widen1_temp = 0;
7434 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
7435 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
7436 else unreachable("Could not pattern match widen");
7437 unsigned widen1 = widen1_temp;
7438 assert(widen1 < 4);
7439
7440 unsigned cmpf_table[] = {
7441 ~0, 2, 3, 1, 0, ~0, ~0
7442 };
7443 unsigned cmpf = cmpf_table[ins->cond];
7444 assert(cmpf < 4);
7445
7446 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) {
7447 { unsigned temp = src0; src0 = src1; src1 = temp; }
7448 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
7449 if (cmpf == 0) cmpf = 2;
7450 else if (cmpf == 3) cmpf = 1;
7451 else if (cmpf == 2) cmpf = 0;
7452 else if (cmpf == 1) cmpf = 3;
7453 }
7454
7455 unsigned derived_12 = 0;
7456 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
7457 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
7458 else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3;
7459 else unreachable("No pattern match at pos 12");
7460
7461 unsigned derived_9 = 0;
7462 if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0;
7463 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1;
7464 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2;
7465 else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3;
7466 else unreachable("No pattern match at pos 9");
7467
7468 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
7469 }
7470
7471 static inline unsigned
pan_pack_add_v2f32_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7472 pan_pack_add_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7473 {
7474 unsigned src0 = bi_get_src(ins, regs, 0);
7475 unsigned src1 = bi_get_src(ins, regs, 1);
7476
7477 unsigned abs0 = ins->src_abs[0];
7478 assert(abs0 < 2);
7479
7480 unsigned abs1 = ins->src_abs[1];
7481 assert(abs1 < 2);
7482
7483 unsigned neg0 = ins->src_neg[0];
7484 assert(neg0 < 2);
7485
7486 unsigned neg1 = ins->src_neg[1];
7487 assert(neg1 < 2);
7488
7489 unsigned clamp = ins->outmod;
7490 assert(clamp < 4);
7491
7492 unsigned round = ins->roundmode;
7493 assert(round < 8);
7494
7495 unsigned derived_6 = 0;
7496 if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0;
7497 else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1;
7498 else unreachable("No pattern match at pos 6");
7499
7500 unsigned derived_7 = 0;
7501 if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0;
7502 else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1;
7503 else unreachable("No pattern match at pos 7");
7504
7505 return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7);
7506 }
7507
7508 static inline unsigned
pan_pack_add_frcbrt_approx_a_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7509 pan_pack_add_frcbrt_approx_a_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7510 {
7511 unsigned src0 = bi_get_src(ins, regs, 0);
7512 assert((1 << src0) & 0xf7);
7513
7514 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7515 unsigned widen0_temp = 0;
7516 if (widen0_sz == 32) widen0_temp = 0;
7517 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7518 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7519 else unreachable("Could not pattern match widen");
7520 unsigned widen0 = widen0_temp;
7521 assert(widen0 < 4);
7522
7523 unsigned neg = ins->src_neg[0];
7524 assert(neg < 2);
7525
7526 unsigned abs0 = ins->src_abs[0];
7527 assert(abs0 < 2);
7528
7529 unsigned divzero = 0;
7530
7531 if (widen0 == 0) {
7532 return 0x67200 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
7533 } else if (widen0 != 0) {
7534 unsigned derived_7 = 0;
7535 if (widen0 == 1) derived_7 = 0;
7536 else if (widen0 == 2) derived_7 = 1;
7537 else unreachable("No pattern match at pos 7");
7538
7539 return 0x67240 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
7540 } else {
7541 unreachable("No matching state found in add_frcbrt_approx_a_f32");
7542 }
7543 }
7544
7545 static inline unsigned
pan_pack_fma_atom_c_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7546 pan_pack_fma_atom_c_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7547 {
7548 unsigned src0 = bi_get_src(ins, regs, 0);
7549 assert((1 << src0) & 0xf3);
7550 unsigned src1 = bi_get_src(ins, regs, 1);
7551 assert((1 << src1) & 0xf3);
7552 unsigned src2 = bi_get_src(ins, regs, 2);
7553 assert((1 << src2) & 0xf7);
7554
7555 unsigned atom_opc = 2;
7556
7557 return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9);
7558 }
7559
7560 static inline unsigned
pan_pack_fma_seg_add(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7561 pan_pack_fma_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7562 {
7563 unsigned src0 = bi_get_src(ins, regs, 0);
7564 assert((1 << src0) & 0xfb);
7565
7566 assert(ins->segment);
7567 unsigned seg = ins->segment;
7568 assert(seg < 8);
7569
7570 unsigned preserve_null = 0;
7571
7572 return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7);
7573 }
7574
7575 static inline unsigned
pan_pack_add_store_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7576 pan_pack_add_store_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7577 {
7578 unsigned src0 = bi_get_src(ins, regs, 1);
7579 unsigned src1 = bi_get_src(ins, regs, 2);
7580
7581 assert(ins->segment);
7582 unsigned seg = ins->segment;
7583 assert(seg < 8);
7584
7585 bi_read_staging_register(clause, ins);
7586 return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
7587 }
7588
7589 static inline unsigned
pan_pack_add_fatan_assist_f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7590 pan_pack_add_fatan_assist_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7591 {
7592 unsigned src0 = bi_get_src(ins, regs, 0);
7593 assert((1 << src0) & 0xf7);
7594 unsigned src1 = bi_get_src(ins, regs, 1);
7595 assert((1 << src1) & 0xf7);
7596
7597 unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7598 unsigned lane1_temp = 0;
7599 if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0;
7600 else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1;
7601 else unreachable("Could not pattern match widen");
7602 unsigned lane1 = lane1_temp;
7603 assert(lane1 < 2);
7604
7605 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7606 unsigned lane0_temp = 0;
7607 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
7608 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
7609 else unreachable("Could not pattern match widen");
7610 unsigned lane0 = lane0_temp;
7611 assert(lane0 < 2);
7612
7613 return 0x67800 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7);
7614 }
7615
7616 static inline unsigned
pan_pack_add_v2u16_to_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7617 pan_pack_add_v2u16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7618 {
7619 unsigned src0 = bi_get_src(ins, regs, 0);
7620
7621 unsigned round = ins->roundmode;
7622 assert(round < 8);
7623
7624 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7625 unsigned swz0_temp = 0;
7626 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7627 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7628 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7629 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7630 else unreachable("Could not pattern match widen");
7631 unsigned swz0 = swz0_temp;
7632 assert(swz0 < 4);
7633
7634 if (round != 4) {
7635 unsigned derived_4 = 0;
7636 if (round == 0) derived_4 = 0;
7637 else if (round == 1) derived_4 = 1;
7638 else if (round == 2) derived_4 = 2;
7639 else if (round == 3) derived_4 = 3;
7640 else unreachable("No pattern match at pos 4");
7641
7642 return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4);
7643 } else if (round == 4) {
7644 return 0x3cb08 | (src0 << 0) | (swz0 << 4);
7645 } else {
7646 unreachable("No matching state found in add_v2u16_to_v2f16");
7647 }
7648 }
7649
7650 static inline unsigned
pan_pack_add_iadd_v4u8(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7651 pan_pack_add_iadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7652 {
7653 unsigned src0 = bi_get_src(ins, regs, 0);
7654 unsigned src1 = bi_get_src(ins, regs, 1);
7655
7656 unsigned saturate = 0;
7657
7658 unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7659 unsigned lanes0_temp = 0;
7660 if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0;
7661 else unreachable("Could not pattern match widen");
7662 unsigned lanes0 = lanes0_temp;
7663 assert(lanes0 < 8);
7664
7665 unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7666 unsigned lanes1_temp = 0;
7667 if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0;
7668 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1;
7669 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2;
7670 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3;
7671 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4;
7672 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5;
7673 else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6;
7674 else unreachable("Could not pattern match widen");
7675 unsigned lanes1 = lanes1_temp;
7676 assert(lanes1 < 8);
7677
7678 if ((lanes0 == 0) && (lanes1 == 0)) {
7679 unsigned derived_7 = 0;
7680 if (saturate == 0) derived_7 = 0;
7681 else if (saturate == 1) derived_7 = 1;
7682 else unreachable("No pattern match at pos 7");
7683
7684 return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7);
7685 } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) {
7686 unsigned derived_7 = 0;
7687 if (saturate == 0) derived_7 = 0;
7688 else if (saturate == 1) derived_7 = 1;
7689 else unreachable("No pattern match at pos 7");
7690
7691 unsigned derived_9 = 0;
7692 if (lanes1 == 1) derived_9 = 0;
7693 else if (lanes1 == 2) derived_9 = 1;
7694 else if (lanes1 == 3) derived_9 = 2;
7695 else if (lanes1 == 4) derived_9 = 3;
7696 else unreachable("No pattern match at pos 9");
7697
7698 return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
7699 } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) {
7700 unsigned derived_7 = 0;
7701 if (saturate == 0) derived_7 = 0;
7702 else if (saturate == 1) derived_7 = 1;
7703 else unreachable("No pattern match at pos 7");
7704
7705 unsigned derived_9 = 0;
7706 if (lanes1 == 5) derived_9 = 0;
7707 else if (lanes1 == 6) derived_9 = 1;
7708 else unreachable("No pattern match at pos 9");
7709
7710 return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9);
7711 } else {
7712 unreachable("No matching state found in add_iadd_v4u8");
7713 }
7714 }
7715
7716 static inline unsigned
pan_pack_add_store_i96(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7717 pan_pack_add_store_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7718 {
7719 unsigned src0 = bi_get_src(ins, regs, 1);
7720 unsigned src1 = bi_get_src(ins, regs, 2);
7721
7722 assert(ins->segment);
7723 unsigned seg = ins->segment;
7724 assert(seg < 8);
7725
7726 bi_read_staging_register(clause, ins);
7727 return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6);
7728 }
7729
7730 static inline unsigned
pan_pack_fma_lshift_and_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7731 pan_pack_fma_lshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7732 {
7733 unsigned src0 = bi_get_src(ins, regs, 0);
7734 assert((1 << src0) & 0xfb);
7735 unsigned src1 = bi_get_src(ins, regs, 1);
7736 assert((1 << src1) & 0xfb);
7737 unsigned src2 = bi_get_src(ins, regs, 2);
7738
7739 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
7740 unsigned lane2_temp = 0;
7741 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
7742 else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1;
7743 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2;
7744 else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3;
7745 else unreachable("Could not pattern match widen");
7746 unsigned lane2 = lane2_temp;
7747 assert(lane2 < 4);
7748
7749 unsigned not1 = ins->bitwise.src1_invert ? 1 : 0;
7750 assert(not1 < 2);
7751
7752 unsigned not_result = ins->bitwise.dest_invert ? 0 : 1;
7753 assert(not_result < 2);
7754
7755 return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15);
7756 }
7757
7758 static inline unsigned
pan_pack_fma_u16_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7759 pan_pack_fma_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7760 {
7761 unsigned src0 = bi_get_src(ins, regs, 0);
7762 assert((1 << src0) & 0xfb);
7763
7764 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7765 unsigned lane0_temp = 0;
7766 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
7767 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
7768 else unreachable("Could not pattern match widen");
7769 unsigned lane0 = lane0_temp;
7770 assert(lane0 < 2);
7771
7772 return 0x700cc8 | (src0 << 0) | (lane0 << 4);
7773 }
7774
7775 static inline unsigned
pan_pack_add_wmask(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7776 pan_pack_add_wmask(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7777 {
7778 unsigned src0 = bi_get_src(ins, regs, 0);
7779
7780 unsigned subgroup = 1;
7781
7782 unsigned fill = 0;
7783 return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3);
7784 }
7785
7786 static inline unsigned
pan_pack_add_fadd_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7787 pan_pack_add_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7788 {
7789 unsigned src0 = bi_get_src(ins, regs, 0);
7790 unsigned src1 = bi_get_src(ins, regs, 1);
7791
7792 unsigned abs1 = ins->src_abs[1];
7793 assert(abs1 < 2);
7794
7795 unsigned neg0 = ins->src_neg[0];
7796 assert(neg0 < 2);
7797
7798 unsigned neg1 = ins->src_neg[1];
7799 assert(neg1 < 2);
7800
7801 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7802 unsigned swz0_temp = 0;
7803 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7804 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7805 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7806 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7807 else unreachable("Could not pattern match widen");
7808 unsigned swz0 = swz0_temp;
7809 assert(swz0 < 4);
7810
7811 unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
7812 unsigned swz1_temp = 0;
7813 if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0;
7814 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1;
7815 else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2;
7816 else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3;
7817 else unreachable("Could not pattern match widen");
7818 unsigned swz1 = swz1_temp;
7819 assert(swz1 < 4);
7820
7821 unsigned round = ins->roundmode;
7822 assert(round < 4);
7823
7824 unsigned abs0 = ins->src_abs[0];
7825 assert(abs0 < 2);
7826
7827 return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15);
7828 }
7829
7830 static inline unsigned
pan_pack_add_flog_table_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7831 pan_pack_add_flog_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7832 {
7833 unsigned src0 = bi_get_src(ins, regs, 0);
7834 assert((1 << src0) & 0xf7);
7835
7836 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7837 unsigned widen0_temp = 0;
7838 if (widen0_sz == 32) widen0_temp = 0;
7839 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7840 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7841 else unreachable("Could not pattern match widen");
7842 unsigned widen0 = widen0_temp;
7843 assert(widen0 < 4);
7844
7845 unsigned mode = 0;
7846
7847 unsigned precision = 0;
7848
7849 unsigned neg = ins->src_neg[0];
7850 assert(neg < 2);
7851
7852 unsigned abs0 = ins->src_abs[0];
7853 assert(abs0 < 2);
7854
7855 unsigned divzero = 0;
7856
7857 if ((mode == 0) && (widen0 == 0) && (precision == 0)) {
7858 return 0x67300 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5);
7859 } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) {
7860 unsigned derived_7 = 0;
7861 if (widen0 == 1) derived_7 = 0;
7862 else if (widen0 == 2) derived_7 = 1;
7863 else unreachable("No pattern match at pos 7");
7864
7865 return 0x67340 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7);
7866 } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) {
7867 unsigned derived_5 = 0;
7868 if (mode == 1) derived_5 = 0;
7869 else if (mode == 2) derived_5 = 1;
7870 else unreachable("No pattern match at pos 5");
7871
7872 return 0x67b00 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5);
7873 } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) {
7874 unsigned derived_5 = 0;
7875 if (mode == 1) derived_5 = 0;
7876 else if (mode == 2) derived_5 = 1;
7877 else unreachable("No pattern match at pos 5");
7878
7879 unsigned derived_7 = 0;
7880 if (widen0 == 1) derived_7 = 0;
7881 else if (widen0 == 2) derived_7 = 1;
7882 else unreachable("No pattern match at pos 7");
7883
7884 return 0x67b40 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7);
7885 } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg == 0)) {
7886 unsigned derived_3 = 0;
7887 if (mode == 2) derived_3 = 0;
7888 else if (mode == 1) derived_3 = 1;
7889 else unreachable("No pattern match at pos 3");
7890
7891 unsigned derived_4 = 0;
7892 if (precision == 1) derived_4 = 0;
7893 else if (precision == 2) derived_4 = 1;
7894 else unreachable("No pattern match at pos 4");
7895
7896 return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4);
7897 } else {
7898 unreachable("No matching state found in add_flog_table_f32");
7899 }
7900 }
7901
7902 static inline unsigned
pan_pack_add_branchz_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7903 pan_pack_add_branchz_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7904 {
7905 unsigned src0 = bi_get_src(ins, regs, 0);
7906 unsigned src1 = bi_get_src(ins, regs, 1);
7907 assert((1 << src1) & 0xf7);
7908
7909 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7910 unsigned widen0_temp = 0;
7911 if (widen0_sz == 32) widen0_temp = 0;
7912 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
7913 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
7914 else unreachable("Could not pattern match widen");
7915 unsigned widen0 = widen0_temp;
7916 assert(widen0 < 4);
7917
7918 unsigned cmpf_table[] = {
7919 ~0, ~0, ~0, ~0, ~0, 0, 1
7920 };
7921 unsigned cmpf = cmpf_table[ins->cond];
7922 assert(cmpf < 2);
7923
7924 unsigned derived_4 = 0;
7925 if (widen0 == 2) derived_4 = 1;
7926 else if (widen0 == 1) derived_4 = 2;
7927 else unreachable("No pattern match at pos 4");
7928
7929 unsigned derived_3 = 0;
7930 if (cmpf == 1) derived_3 = 0;
7931 else if (cmpf == 0) derived_3 = 1;
7932 else unreachable("No pattern match at pos 3");
7933
7934 return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3);
7935 }
7936
7937 static inline unsigned
pan_pack_add_ilogb_v2f16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7938 pan_pack_add_ilogb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7939 {
7940 unsigned src0 = bi_get_src(ins, regs, 0);
7941
7942 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7943 unsigned swz0_temp = 0;
7944 if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7945 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7946 else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2;
7947 else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3;
7948 else unreachable("Could not pattern match widen");
7949 unsigned swz0 = swz0_temp;
7950 assert(swz0 < 4);
7951
7952 return 0x3d9c0 | (src0 << 0) | (swz0 << 3);
7953 }
7954
7955 static inline unsigned
pan_pack_add_v2s8_to_v2s16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7956 pan_pack_add_v2s8_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7957 {
7958 unsigned src0 = bi_get_src(ins, regs, 0);
7959
7960 unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
7961 unsigned swz0_temp = 0;
7962 if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0;
7963 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1;
7964 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2;
7965 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3;
7966 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4;
7967 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5;
7968 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6;
7969 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7;
7970 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8;
7971 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9;
7972 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10;
7973 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11;
7974 else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12;
7975 else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13;
7976 else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14;
7977 else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15;
7978 else unreachable("Could not pattern match widen");
7979 unsigned swz0 = swz0_temp;
7980 assert(swz0 < 16);
7981
7982 return 0x3c700 | (src0 << 0) | (swz0 << 4);
7983 }
7984
7985 static inline unsigned
pan_pack_add_u32_to_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)7986 pan_pack_add_u32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
7987 {
7988 unsigned src0 = bi_get_src(ins, regs, 0);
7989
7990 unsigned round = ins->roundmode;
7991 assert(round < 8);
7992
7993 if (round != 4) {
7994 unsigned derived_4 = 0;
7995 if (round == 0) derived_4 = 0;
7996 else if (round == 1) derived_4 = 1;
7997 else if (round == 2) derived_4 = 2;
7998 else if (round == 3) derived_4 = 3;
7999 else unreachable("No pattern match at pos 4");
8000
8001 return 0x3cbc8 | (src0 << 0) | (derived_4 << 4);
8002 } else if (round == 4) {
8003 return 0x3cd08 | (src0 << 0);
8004 } else {
8005 unreachable("No matching state found in add_u32_to_f32");
8006 }
8007 }
8008
8009 static inline unsigned
pan_pack_add_blend(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8010 pan_pack_add_blend(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8011 {
8012 unsigned src0 = bi_get_src(ins, regs, 1);
8013 unsigned src1 = bi_get_src(ins, regs, 2);
8014 assert((1 << src1) & 0xf7);
8015 unsigned src2 = bi_get_src(ins, regs, 3);
8016 assert((1 << src2) & 0xf7);
8017
8018 bi_read_staging_register(clause, ins);
8019 return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6);
8020 }
8021
8022 static inline unsigned
pan_pack_fma_fma_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8023 pan_pack_fma_fma_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8024 {
8025 unsigned src0 = bi_get_src(ins, regs, 0);
8026 assert((1 << src0) & 0xfb);
8027 unsigned src1 = bi_get_src(ins, regs, 1);
8028 assert((1 << src1) & 0xfb);
8029 unsigned src2 = bi_get_src(ins, regs, 2);
8030
8031 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8032 unsigned widen0_temp = 0;
8033 if (widen0_sz == 32) widen0_temp = 0;
8034 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
8035 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
8036 else unreachable("Could not pattern match widen");
8037 unsigned widen0 = widen0_temp;
8038 assert(widen0 < 4);
8039
8040 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8041 unsigned widen1_temp = 0;
8042 if (widen1_sz == 32) widen1_temp = 0;
8043 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
8044 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
8045 else unreachable("Could not pattern match widen");
8046 unsigned widen1 = widen1_temp;
8047 assert(widen1 < 4);
8048
8049 unsigned neg0 = ins->src_neg[0];
8050 assert(neg0 < 2);
8051
8052 unsigned neg1 = ins->src_neg[1];
8053 assert(neg1 < 2);
8054
8055 unsigned abs0 = ins->src_abs[0];
8056 assert(abs0 < 2);
8057
8058 unsigned round = ins->roundmode;
8059 assert(round < 4);
8060
8061 unsigned clamp = ins->outmod;
8062 assert(clamp < 4);
8063
8064 unsigned abs1 = ins->src_abs[1];
8065 assert(abs1 < 2);
8066
8067 unsigned neg2 = ins->src_neg[2];
8068 assert(neg2 < 2);
8069
8070 unsigned abs2 = ins->src_abs[2];
8071 assert(abs2 < 2);
8072
8073 if ((widen0 == 2) && (widen1 == 1)) {
8074 { unsigned temp = src0; src0 = src1; src1 = temp; }
8075 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
8076 { unsigned temp = neg0; neg0 = neg1; neg1 = temp; }
8077 { unsigned temp = abs0; abs0 = abs1; abs1 = temp; }
8078 }
8079
8080 unsigned derived_9 = 0;
8081 if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0;
8082 else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1;
8083 else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2;
8084 else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3;
8085 else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4;
8086 else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5;
8087 else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6;
8088 else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7;
8089 else unreachable("No pattern match at pos 9");
8090
8091 unsigned derived_17 = 0;
8092 if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0;
8093 else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1;
8094 else unreachable("No pattern match at pos 17");
8095
8096 return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17);
8097 }
8098
8099 static inline unsigned
pan_pack_add_branchz_f32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8100 pan_pack_add_branchz_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8101 {
8102 unsigned src0 = bi_get_src(ins, regs, 0);
8103 unsigned src1 = bi_get_src(ins, regs, 1);
8104 assert((1 << src1) & 0xf7);
8105
8106 unsigned cmpf_table[] = {
8107 ~0, 4, 5, 2, 1, 0, 3
8108 };
8109 unsigned cmpf = cmpf_table[ins->cond];
8110 assert(cmpf < 8);
8111
8112 unsigned derived_3 = 0;
8113 if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0;
8114 else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1;
8115 else unreachable("No pattern match at pos 3");
8116
8117 unsigned derived_9 = 0;
8118 if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5;
8119 else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6;
8120 else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7;
8121 else unreachable("No pattern match at pos 9");
8122
8123 return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9);
8124 }
8125
8126 static inline unsigned
pan_pack_add_lea_tex(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8127 pan_pack_add_lea_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8128 {
8129 unsigned src0 = bi_get_src(ins, regs, 0);
8130 unsigned src1 = bi_get_src(ins, regs, 1);
8131 unsigned src2 = bi_get_src(ins, regs, 2);
8132
8133 unsigned format = 1;
8134
8135 bi_write_staging_register(clause, ins);
8136 return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11);
8137 }
8138
8139 static inline unsigned
pan_pack_add_branch_diverg(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8140 pan_pack_add_branch_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8141 {
8142 unsigned src0 = bi_get_src(ins, regs, 0);
8143 assert((1 << src0) & 0xf7);
8144
8145 return 0x6f83c | (src0 << 6);
8146 }
8147
8148 static inline unsigned
pan_pack_fma_lrot_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8149 pan_pack_fma_lrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8150 {
8151 unsigned src0 = bi_get_src(ins, regs, 0);
8152 assert((1 << src0) & 0xfb);
8153 unsigned src1 = bi_get_src(ins, regs, 1);
8154 assert((1 << src1) & 0xfb);
8155 unsigned src2 = bi_get_src(ins, regs, 2);
8156
8157 unsigned bytes2 = 0;
8158
8159 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8160 unsigned lane2_temp = 0;
8161 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
8162 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
8163 else unreachable("Could not pattern match widen");
8164 unsigned lane2 = lane2_temp;
8165 assert(lane2 < 2);
8166
8167 unsigned result_word = 0;
8168
8169 return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11);
8170 }
8171
8172 static inline unsigned
pan_pack_fma_flshift_double_i32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8173 pan_pack_fma_flshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8174 {
8175 unsigned src0 = bi_get_src(ins, regs, 0);
8176 assert((1 << src0) & 0xfb);
8177 unsigned src1 = bi_get_src(ins, regs, 1);
8178 assert((1 << src1) & 0xfb);
8179 unsigned src2 = bi_get_src(ins, regs, 2);
8180
8181 unsigned bytes2 = 0;
8182
8183 unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8184 unsigned lane2_temp = 0;
8185 if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0;
8186 else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1;
8187 else unreachable("Could not pattern match widen");
8188 unsigned lane2 = lane2_temp;
8189 assert(lane2 < 2);
8190
8191 return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10);
8192 }
8193
8194 static inline unsigned
pan_pack_fma_fmul_cslice(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8195 pan_pack_fma_fmul_cslice(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8196 {
8197 unsigned src0 = bi_get_src(ins, regs, 0);
8198 assert((1 << src0) & 0xfb);
8199 unsigned src1 = bi_get_src(ins, regs, 1);
8200 assert((1 << src1) & 0xfb);
8201
8202 unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8203 unsigned lane0_temp = 0;
8204 if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0;
8205 else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1;
8206 else unreachable("Could not pattern match widen");
8207 unsigned lane0 = lane0_temp;
8208 assert(lane0 < 2);
8209
8210 unsigned abs0 = ins->src_abs[0];
8211 assert(abs0 < 2);
8212
8213 unsigned neg0 = ins->src_neg[0];
8214 assert(neg0 < 2);
8215
8216 return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8);
8217 }
8218
8219 static inline unsigned
pan_pack_add_branch_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8220 pan_pack_add_branch_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8221 {
8222 unsigned src0 = bi_get_src(ins, regs, 0);
8223 unsigned src1 = bi_get_src(ins, regs, 1);
8224 unsigned src2 = bi_get_src(ins, regs, 2);
8225 assert((1 << src2) & 0xf7);
8226
8227 unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8228 unsigned widen0_temp = 0;
8229 if (widen0_sz == 32) widen0_temp = 0;
8230 else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1;
8231 else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2;
8232 else unreachable("Could not pattern match widen");
8233 unsigned widen0 = widen0_temp;
8234 assert(widen0 < 4);
8235
8236 unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8237 unsigned widen1_temp = 0;
8238 if (widen1_sz == 32) widen1_temp = 0;
8239 else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1;
8240 else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2;
8241 else unreachable("Could not pattern match widen");
8242 unsigned widen1 = widen1_temp;
8243 assert(widen1 < 4);
8244
8245 unsigned cmpf_table[] = {
8246 ~0, ~0, ~0, ~0, ~0, 0, 1
8247 };
8248 unsigned cmpf = cmpf_table[ins->cond];
8249 assert(cmpf < 2);
8250
8251 if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) {
8252 { unsigned temp = src0; src0 = src1; src1 = temp; }
8253 { unsigned temp = widen0; widen0 = widen1; widen1 = temp; }
8254 }
8255
8256 unsigned derived_12 = 0;
8257 if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1;
8258 else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2;
8259 else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3;
8260 else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4;
8261 else unreachable("No pattern match at pos 12");
8262
8263 unsigned derived_9 = 0;
8264 if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1;
8265 else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4;
8266 else unreachable("No pattern match at pos 9");
8267
8268 return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9);
8269 }
8270
8271 static inline unsigned
pan_pack_add_f32_to_u32(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8272 pan_pack_add_f32_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8273 {
8274 unsigned src0 = bi_get_src(ins, regs, 0);
8275
8276 unsigned round = ins->roundmode;
8277 assert(round < 8);
8278
8279 if (round != 4) {
8280 unsigned derived_4 = 0;
8281 if (round == 0) derived_4 = 0;
8282 else if (round == 1) derived_4 = 1;
8283 else if (round == 2) derived_4 = 2;
8284 else if (round == 3) derived_4 = 3;
8285 else unreachable("No pattern match at pos 4");
8286
8287 return 0x3c988 | (src0 << 0) | (derived_4 << 4);
8288 } else if (round == 4) {
8289 return 0x3cca8 | (src0 << 0);
8290 } else {
8291 unreachable("No matching state found in add_f32_to_u32");
8292 }
8293 }
8294
8295 static inline unsigned
pan_pack_add_load_i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8296 pan_pack_add_load_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8297 {
8298 unsigned src0 = bi_get_src(ins, regs, 0);
8299 unsigned src1 = bi_get_src(ins, regs, 1);
8300
8301 assert(ins->segment);
8302 unsigned seg = ins->segment;
8303 assert(seg < 8);
8304
8305 unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8306 unsigned lane_temp = 0;
8307 if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 0;
8308 else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 1;
8309 else if (lane_sz == 32) lane_temp = 2;
8310 else if (lane_sz == 64) lane_temp = 3;
8311 else unreachable("Could not pattern match widen");
8312 unsigned lane = lane_temp;
8313 assert(lane < 4);
8314
8315 ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16;
8316 bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int;
8317 unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0;
8318 assert(extend < 4);
8319
8320 bi_write_staging_register(clause, ins);
8321 if ((extend == 0) && ((lane == 0) || (lane == 1))) {
8322 unsigned derived_9 = 0;
8323 if (lane == 0) derived_9 = 0;
8324 else if (lane == 1) derived_9 = 1;
8325 else unreachable("No pattern match at pos 9");
8326
8327 return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8328 } else if ((extend != 0) && (lane == 2)) {
8329 unsigned derived_9 = 0;
8330 if (extend == 1) derived_9 = 0;
8331 else if (extend == 2) derived_9 = 1;
8332 else unreachable("No pattern match at pos 9");
8333
8334 return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8335 } else if ((extend != 0) && (lane == 3)) {
8336 unsigned derived_9 = 0;
8337 if (extend == 1) derived_9 = 0;
8338 else if (extend == 2) derived_9 = 1;
8339 else unreachable("No pattern match at pos 9");
8340
8341 return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9);
8342 } else {
8343 unreachable("No matching state found in add_load_i16");
8344 }
8345 }
8346
8347 static inline unsigned
pan_pack_add_mux_v2i16(bi_clause * clause,bi_instruction * ins,bi_registers * regs)8348 pan_pack_add_mux_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs)
8349 {
8350 unsigned src0 = bi_get_src(ins, regs, 0);
8351 unsigned src1 = bi_get_src(ins, regs, 1);
8352 unsigned src2 = bi_get_src(ins, regs, 2);
8353
8354 unsigned mux = 1;
8355
8356 unsigned swap2_sz = nir_alu_type_get_type_size(ins->src_types[2]);
8357 unsigned swap2_temp = 0;
8358 if (swap2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swap2_temp = 0;
8359 else if (swap2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swap2_temp = 1;
8360 else unreachable("Could not pattern match widen");
8361 unsigned swap2 = swap2_temp;
8362 assert(swap2 < 2);
8363
8364 unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]);
8365 unsigned swap1_temp = 0;
8366 if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0;
8367 else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1;
8368 else unreachable("Could not pattern match widen");
8369 unsigned swap1 = swap1_temp;
8370 assert(swap1 < 2);
8371
8372 unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]);
8373 unsigned swap0_temp = 0;
8374 if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0;
8375 else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1;
8376 else unreachable("Could not pattern match widen");
8377 unsigned swap0 = swap0_temp;
8378 assert(swap0 < 2);
8379
8380 return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13);
8381 }
8382
8383 #endif
8384