/* * Copyright (C) 2020 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ /* Autogenerated file, do not edit */ #ifndef _BI_GENERATED_PACK_H #define _BI_GENERATED_PACK_H #include "compiler.h" #include "bi_pack_helpers.h" static inline unsigned pan_pack_fma_rshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); return 0x301000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned pan_pack_add_iadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 32) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (lanes1 == 0) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_u32"); } } static inline unsigned pan_pack_add_ld_var_flat(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned register_format_temp = 0; if (ins->format == nir_type_float32) register_format_temp = 0; else if (ins->format == nir_type_float16) register_format_temp = 1; else if (ins->format == nir_type_uint32) register_format_temp = 2; else if (ins->format == nir_type_int32) register_format_temp = 3; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 8); unsigned function = 3; bi_write_staging_register(clause, ins); if (register_format != 4) { unsigned derived_10 = 0; if ((register_format == 0) || (register_format == 1)) derived_10 = 0; else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; else unreachable("No pattern match at pos 10"); unsigned derived_19 = 0; if ((register_format == 0) || (register_format == 2)) derived_19 = 0; else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; else unreachable("No pattern match at pos 19"); return 0x538c0 | (src0 << 3) | (vecsize << 8) | (function << 0) | (derived_10 << 10) | (derived_19 << 19); } else if (register_format == 4) { return 0xcf8c0 | (src0 << 3) | (vecsize << 8) | (function << 0); } else { unreachable("No matching state found in add_ld_var_flat"); } } static inline unsigned pan_pack_add_store_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x65800 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_clz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned mask = 0; return 0x701fd0 | (src0 << 0) | (mask << 3); } static inline unsigned pan_pack_fma_clz_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned mask = 0; unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x701ec0 | (src0 << 0) | (mask << 3) | (swz0 << 4); } static inline unsigned pan_pack_fma_popcount_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); return 0x73c6d8 | (src0 << 0); } static inline unsigned pan_pack_add_fatan_table_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 2); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x67900 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7); } static inline unsigned pan_pack_fma_rrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned result_word = 0; return 0x33a000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned pan_pack_fma_isubb_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); return 0x27fe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_frcbrt_approx_b_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x67ab0 | (src0 << 0); } static inline unsigned pan_pack_fma_lshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x324000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); } else { unreachable("No matching state found in fma_lshift_xor_v4i8"); } } static inline unsigned pan_pack_add_texs_cube_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned skip = ins->skip; assert(skip < 2); unsigned sampler_index = ins->texture.sampler_index; unsigned texture_index = ins->texture.texture_index; bi_write_staging_register(clause, ins); return 0x5c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); } static inline unsigned pan_pack_add_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned round = ins->roundmode; assert(round < 4); return 0x3e820 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (round << 9); } static inline unsigned pan_pack_add_fexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); return 0x66ac0 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_doorbell(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0xd7860 | (src0 << 0); } static inline unsigned pan_pack_add_logb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x3d980 | (src0 << 0) | (swz0 << 3); } static inline unsigned pan_pack_add_store_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x62800 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_arshift_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0x8); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x334018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in fma_arshift_v4i8"); } } static inline unsigned pan_pack_fma_vn_asst1_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); return 0x27c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (neg2 << 12); } static inline unsigned pan_pack_add_ldexp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned round = ins->roundmode; assert(round < 8); return 0x74c00 | (src0 << 0) | (src1 << 3) | (round << 6); } static inline unsigned pan_pack_add_isub_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 2); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v2u16"); } } static inline unsigned pan_pack_add_branchc_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned combine = 0; unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); unsigned derived_9 = 0; if (lane0 == 0) derived_9 = 0; else if (lane0 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_3 = 0; if (lane0 == 1) derived_3 = 0; else if (lane0 == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f030 | (src0 << 0) | (src1 << 6) | (combine << 10) | (derived_9 << 9) | (derived_3 << 3); } static inline unsigned pan_pack_fma_fround_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_9 = 0; if (round == 0) derived_9 = 0; else if (round == 1) derived_9 = 1; else if (round == 2) derived_9 = 2; else if (round == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x70c020 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3) | (derived_9 << 9); } else if (round == 4) { return 0x707620 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (widen0 << 3); } else { unreachable("No matching state found in fma_fround_f32"); } } static inline unsigned pan_pack_add_vn_asst2_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); return 0x3dfa0 | (src0 << 0) | (neg0 << 3); } static inline unsigned pan_pack_add_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned round = ins->roundmode; assert(round < 4); return 0x3e800 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (round << 9); } static inline unsigned pan_pack_fma_atom_c_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned atom_opc = 2; return 0x2f2000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned pan_pack_add_icmpi_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7b800 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned pan_pack_fma_fma_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned round = ins->roundmode; assert(round < 4); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); unsigned swz2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned swz2_temp = 0; if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) swz2_temp = 0; else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swz2_temp = 1; else if (swz2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swz2_temp = 2; else if (swz2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) swz2_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz2 = swz2_temp; assert(swz2 < 4); unsigned derived_17 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; else unreachable("No pattern match at pos 17"); return 0x400000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (neg2 << 18) | (swz2 << 19) | (derived_17 << 17); } static inline unsigned pan_pack_fma_arshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned result_word = 0; return 0x33e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned pan_pack_fma_fmul_slice_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); return 0x70cb40 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_ld_var_flat_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned register_format_temp = 0; if (ins->format == nir_type_float32) register_format_temp = 0; else if (ins->format == nir_type_float16) register_format_temp = 1; else if (ins->format == nir_type_uint32) register_format_temp = 2; else if (ins->format == nir_type_int32) register_format_temp = 3; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 8); unsigned function = 3; unsigned index = bi_get_immediate(ins, 0); bi_write_staging_register(clause, ins); if (register_format != 4) { unsigned derived_10 = 0; if ((register_format == 0) || (register_format == 1)) derived_10 = 0; else if ((register_format == 2) || (register_format == 3)) derived_10 = 1; else unreachable("No pattern match at pos 10"); unsigned derived_19 = 0; if ((register_format == 0) || (register_format == 2)) derived_19 = 0; else if ((register_format == 1) || (register_format == 3)) derived_19 = 1; else unreachable("No pattern match at pos 19"); return 0x53800 | (vecsize << 8) | (function << 0) | (index << 3) | (derived_10 << 10) | (derived_19 << 19); } else if (register_format == 4) { return 0xcf800 | (vecsize << 8) | (function << 0) | (index << 3); } else { unreachable("No matching state found in add_ld_var_flat_imm"); } } static inline unsigned pan_pack_fma_csel_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); if ((cmpf == 4) || (cmpf == 5)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; } if (cmpf == 3) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 3) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else if (cmpf == 2) derived_12 = 2; else unreachable("No pattern match at pos 12"); return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_fma_csel_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); if (cmpf == 1) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 1) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 3; else unreachable("No pattern match at pos 12"); return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_load_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane_temp = 0; if (lane_sz == 32) lane_temp = 0; else if (lane_sz == 64) lane_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane = lane_temp; assert(lane < 2); ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; assert(extend < 4); bi_write_staging_register(clause, ins); if ((extend == 0) && (lane == 0)) { return 0x60c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } else if ((extend != 0) && (lane == 1)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61c00 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i32"); } } static inline unsigned pan_pack_add_st_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned src2 = bi_get_src(ins, regs, 3); assert((1 << src2) & 0xf7); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_read_staging_register(clause, ins); return 0xcb800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned pan_pack_fma_rshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x302000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_rshift_or_v4i8"); } } static inline unsigned pan_pack_fma_csel_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x2e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 2); return 0x75300 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); } static inline unsigned pan_pack_fma_atom_pre_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned atom_opc = 2; return 0x6ec000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned pan_pack_fma_shaddxl_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else if (lane1_sz == 32) lane1_temp = 2; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 4); unsigned shift = 0; return 0x70e000 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); } static inline unsigned pan_pack_add_branch_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); if (((src0 > src1) && (cmpf == 0)) || ((src0 < src1) && (cmpf == 1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 == src1) && (cmpf == 0)) derived_9 = 1; else if (((src0 < src1) && (cmpf == 0)) || ((src0 >= src1) && (cmpf == 1))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_fma_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x701900 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_fma_lshift_or_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x312000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_lshift_or_v4i8"); } } static inline unsigned pan_pack_add_ld_var_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; assert(update < 4); unsigned register_format_temp = 0; if (ins->format == nir_type_float32) register_format_temp = 0; else if (ins->format == nir_type_float16) register_format_temp = 1; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 4); unsigned sample = ins->load_vary.interp_mode; assert(sample < 8); unsigned index = bi_get_immediate(ins, 0); bi_write_staging_register(clause, ins); if (register_format != 2) { unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x50000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc000 | (src0 << 0) | (vecsize << 8) | (index << 3) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var_imm"); } } static inline unsigned pan_pack_fma_atom_c_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned atom_opc = 2; return 0x2f6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned pan_pack_add_barrier(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { return 0xd7874; } static inline unsigned pan_pack_add_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0x3d970 | (src0 << 0); } static inline unsigned pan_pack_fma_atom_c_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned atom_opc = 2; return 0x2f0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned pan_pack_add_v2s8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 16); return 0x3c800 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_fsincos_offset_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned scale = 0; return 0x67aa0 | (src0 << 0) | (scale << 3); } static inline unsigned pan_pack_add_lea_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8400 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in add_lea_attr"); } } static inline unsigned pan_pack_add_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned round = ins->roundmode; assert(round < 8); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); if (((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } if (round != 4) { unsigned derived_13 = 0; if (round == 0) derived_13 = 0; else if (round == 1) derived_13 = 1; else if (round == 2) derived_13 = 2; else if (round == 3) derived_13 = 3; else unreachable("No pattern match at pos 13"); unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x20000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (abs0 << 15) | (derived_13 << 13) | (derived_9 << 9); } else if ((round == 4) && (widen0 == 0) && (widen1 == 0) && (abs0 == 0) && (abs1 == 0) && (neg0 == 0) && (neg1 == 0) && (clamp == 0)) { return 0x75200 | (src0 << 0) | (src1 << 3); } else { unreachable("No matching state found in add_fadd_f32"); } } static inline unsigned pan_pack_fma_atom_post_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned atom_opc = 2; return 0x6ee000 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_fma_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned preserve_null = 0; return 0x701540 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned pan_pack_add_seg_sub(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned preserve_null = 0; return 0x3d540 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned pan_pack_fma_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned sqrt = 0; unsigned log = 1; unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); if (log == 0) { return 0x701c20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { return 0x701e20 | (src0 << 0) | (widen0 << 3); } else { unreachable("No matching state found in fma_frexpe_f32"); } } static inline unsigned pan_pack_add_frsq_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x67280 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); } static inline unsigned pan_pack_fma_lshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x310000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_lshift_and_v4i8"); } } static inline unsigned pan_pack_add_branch_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == widen1) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5))) derived_12 = 3; else if ((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 0) || (cmpf == 3))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 1) || (cmpf == 2))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; else if (((widen0 == 2) && (widen1 == 1) && ((cmpf == 4) || (cmpf == 5))) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 4)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == widen1) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_clper_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0x7); unsigned src1 = bi_get_src(ins, regs, 1); unsigned lane_op = 0; unsigned subgroup = 1; unsigned inactive_result = 0; return 0x7c000 | (src0 << 0) | (src1 << 3) | (lane_op << 6) | (subgroup << 8) | (inactive_result << 10); } static inline unsigned pan_pack_add_v2s16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c600 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3cb00 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2s16_to_v2f16"); } } static inline unsigned pan_pack_fma_atom_c1_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned atom_opc = 2; return 0x2f5e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_add_axchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0x640c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_fma_vn_asst1_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned h = 0; unsigned l = 0; unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); return 0x6eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (h << 9) | (l << 10) | (neg2 << 11); } static inline unsigned pan_pack_fma_fma_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ); unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0; assert(round < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); unsigned special = 0; unsigned derived_16 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; else unreachable("No pattern match at pos 16"); unsigned derived_12 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; else if ((clamp == 0) && (special == 3) && (round == 0)) derived_12 = 6; else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; else unreachable("No pattern match at pos 12"); return 0x280000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); } static inline unsigned pan_pack_add_hadd_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); return 0xbc6c0 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned pan_pack_fma_imul_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); return 0x7240c0 | (src0 << 0) | (src1 << 3) | (swz0 << 9) | (swz1 << 11); } static inline unsigned pan_pack_add_load_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_write_staging_register(clause, ins); return 0x65200 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_hadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); return 0xbc640 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned pan_pack_add_imov_fma(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned threads = 0; return 0xd7820 | (threads << 3); } static inline unsigned pan_pack_add_icmpi_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7b880 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned pan_pack_add_store_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x62000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_jump_ex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned test_mode = 0; unsigned stack_mode = 2; return 0x2eb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (test_mode << 9) | (stack_mode << 10); } static inline unsigned pan_pack_add_iadd_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 32) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (lanes1 == 0) { return 0xbc600 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_s32"); } } static inline unsigned pan_pack_fma_rshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x320800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_xor_v2i16"); } } static inline unsigned pan_pack_fma_csel_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x6e4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_shaddxh_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); return 0x3f8c0 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_isub_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 8); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v4u8"); } } static inline unsigned pan_pack_add_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned sqrt = 0; unsigned log = 1; unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x3db20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x3da20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in add_frexpm_f32"); } } static inline unsigned pan_pack_add_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned sqrt = 0; unsigned log = 1; unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (log == 0) { return 0x3dc00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { return 0x3de00 | (src0 << 0) | (swz0 << 3); } else { unreachable("No matching state found in add_frexpe_v2f16"); } } static inline unsigned pan_pack_add_logb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); return 0x3d9a0 | (src0 << 0) | (widen0 << 3); } static inline unsigned pan_pack_fma_rshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x302800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x303800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_or_v2i16"); } } static inline unsigned pan_pack_fma_shaddxl_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else if (lane1_sz == 32) lane1_temp = 2; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 4); unsigned shift = 0; return 0x70e800 | (src0 << 0) | (src1 << 3) | (lane1 << 9) | (shift << 6); } static inline unsigned pan_pack_add_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned result_type = 2; if ((neg0 == 0) && (neg1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_13 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; else unreachable("No pattern match at pos 13"); return 0xb0000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (swz0 << 9) | (swz1 << 11) | (result_type << 14) | (derived_13 << 13); } static inline unsigned pan_pack_fma_clz_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned mask = 0; return 0x701f90 | (src0 << 0) | (mask << 3); } static inline unsigned pan_pack_add_frexpe_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned sqrt = 0; unsigned log = 1; unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); if (log == 0) { return 0x3dc20 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { return 0x3de20 | (src0 << 0) | (widen0 << 3); } else { unreachable("No matching state found in add_frexpe_f32"); } } static inline unsigned pan_pack_fma_quiet_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); return 0x701970 | (src0 << 0); } static inline unsigned pan_pack_add_fmin_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned sem = 0; if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (sem == 3) sem = 2; else if (sem == 2) sem = 3; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x90000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); } static inline unsigned pan_pack_add_var_tex_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned update = 0; unsigned skip = ins->skip; assert(skip < 2); unsigned lod_mode = 1 - ins->texture.compute_lod; assert(lod_mode < 2); unsigned sample = ins->load_vary.interp_mode; assert(sample < 2); unsigned varying_index = bi_get_immediate(ins, 0); unsigned texture_index = ins->texture.texture_index; bi_write_staging_register(clause, ins); unsigned derived_5 = 0; if ((sample == 0) && (update == 0)) derived_5 = 0; else if ((sample == 1) && (update == 1)) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0xca000 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); } static inline unsigned pan_pack_add_branch_lowbits_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); return 0x6fa38 | (src0 << 0) | (src1 << 6); } static inline unsigned pan_pack_fma_lshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned result_word = 0; return 0x33c000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned pan_pack_fma_idp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); ASSERTED bool sign0_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; bool sign0_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; assert(sign0_small); unsigned sign0 = sign0_signed ? 1 : 0; assert(sign0 < 2); ASSERTED bool sign1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16; bool sign1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int; assert(sign1_small); unsigned sign1 = sign1_signed ? 1 : 0; assert(sign1 < 2); return 0x73e8c0 | (src0 << 0) | (src1 << 3) | (sign0 << 9) | (sign1 << 10); } static inline unsigned pan_pack_add_icmp_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b080 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned pan_pack_add_acmpstore_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); return 0x64900 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_add_hadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); return 0xbc4c0 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned pan_pack_add_cubeface2(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0x3de58 | (src0 << 0); } static inline unsigned pan_pack_fma_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x700cc0 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_fma_fround_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_9 = 0; if (round == 0) derived_9 = 0; else if (round == 1) derived_9 = 1; else if (round == 2) derived_9 = 2; else if (round == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x70c000 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3) | (derived_9 << 9); } else if (round == 4) { return 0x707600 | (src0 << 0) | (abs0 << 7) | (neg0 << 8) | (swz0 << 3); } else { unreachable("No matching state found in fma_fround_v2f16"); } } static inline unsigned pan_pack_add_v2u8_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 16); return 0x3c708 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_fmax_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned sem = 0; if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (sem == 3) sem = 2; else if (sem == 2) sem = 3; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x80000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (sem << 13) | (derived_6 << 6); } static inline unsigned pan_pack_fma_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x700d10 | (src0 << 0) | (lane0 << 3); } static inline unsigned pan_pack_add_branchc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned combine = 0; return 0x6f238 | (src0 << 0) | (src1 << 6) | (combine << 10); } static inline unsigned pan_pack_fma_dtsel_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned table = 63; return 0x70f200 | (src0 << 0) | (table << 3); } static inline unsigned pan_pack_add_iadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 8); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v4s8"); } } static inline unsigned pan_pack_add_texs_2d_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned skip = ins->skip; assert(skip < 2); unsigned lod_mode = 1 - ins->texture.compute_lod; assert(lod_mode < 2); unsigned texture_index = ins->texture.texture_index; unsigned sampler_index = ins->texture.sampler_index; bi_write_staging_register(clause, ins); return 0x58000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); } static inline unsigned pan_pack_add_vn_asst2_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned scale = 0; unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); if (scale == 0) { return 0x3df80 | (src0 << 0) | (neg0 << 3); } else if (scale == 1) { return 0x3de80 | (src0 << 0) | (neg0 << 4); } else { unreachable("No matching state found in add_vn_asst2_f32"); } } static inline unsigned pan_pack_add_texc(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned src2 = bi_get_src(ins, regs, 3); assert((1 << src2) & 0xf7); unsigned skip = ins->skip; assert(skip < 2); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0xd7000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9); } static inline unsigned pan_pack_add_lea_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); unsigned attribute_index = bi_get_immediate(ins, 0); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8000 | (src0 << 0) | (src1 << 3) | (attribute_index << 6); } else { unreachable("No matching state found in add_lea_attr_imm"); } } static inline unsigned pan_pack_add_f16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c500 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); } else if (round == 4) { return 0x3cc40 | (src0 << 0) | (lane0 << 5); } else { unreachable("No matching state found in add_f16_to_s32"); } } static inline unsigned pan_pack_add_st_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned src2 = bi_get_src(ins, regs, 3); assert((1 << src2) & 0xf7); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_read_staging_register(clause, ins); return 0xc9800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned pan_pack_add_load_i24(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_write_staging_register(clause, ins); return 0x65000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x700b40 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_fma_cubeface1(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0) && (neg2 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1) && (neg2 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x706800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_branch_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if (src0 > src1) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 0) cmpf = 2; else if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 < src1) && (cmpf == 2)) derived_9 = 0; else if (((src0 < src1) && (cmpf == 3)) || ((src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; else if ((src0 < src1) && (cmpf == 1)) derived_9 = 2; else if ((src0 < src1) && (cmpf == 0)) derived_9 = 3; else if ((src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_iabs_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0x3dea0 | (src0 << 0); } static inline unsigned pan_pack_add_iadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 2); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 != 4) && (lanes1 != 5)) derived_7 = 0; else if ((saturate == 1) || (lanes1 == 4) || (lanes1 == 5)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v2u16"); } } static inline unsigned pan_pack_add_icmp_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b200 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned pan_pack_add_fsin_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned offset = 0; return 0x67a80 | (src0 << 0) | (offset << 4); } static inline unsigned pan_pack_add_cube_ssel(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x3e000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_fatan_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); return 0x67a40 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_isub_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 2); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbd800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v2s16"); } } static inline unsigned pan_pack_fma_atom_c1_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned atom_opc = 2; return 0x2f1e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_add_isub_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 32) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (lanes1 == 0) { return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_s32"); } } static inline unsigned pan_pack_add_ld_attr_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned attribute_index = bi_get_immediate(ins, 0); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4000 | (src0 << 0) | (src1 << 3) | (vecsize << 11) | (attribute_index << 6); } else { unreachable("No matching state found in add_ld_attr_imm"); } } static inline unsigned pan_pack_fma_rshift_xor_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x320000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x321800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13); } else { unreachable("No matching state found in fma_rshift_xor_v4i8"); } } static inline unsigned pan_pack_add_icmpm_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); return 0x7ba00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_icmp_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7b300 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned pan_pack_add_branch_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 0) cmpf = 2; else if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 2))) derived_9 = 0; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 == src1) && ((cmpf == 3) || (cmpf == 1)))) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) derived_9 = 2; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0))) derived_9 = 3; else if ((widen0 == widen1) && (src0 == src1) && ((cmpf == 2) || (cmpf == 0))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_load_i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane_temp = 0; if (lane_sz == 8 && ins->swizzle[0][0] == 0) lane_temp = 0; else if (lane_sz == 8 && ins->swizzle[0][0] == 1) lane_temp = 1; else if (lane_sz == 8 && ins->swizzle[0][0] == 2) lane_temp = 2; else if (lane_sz == 8 && ins->swizzle[0][0] == 3) lane_temp = 3; else if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 4; else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 5; else if (lane_sz == 32) lane_temp = 6; else if (lane_sz == 64) lane_temp = 7; else unreachable("Could not pattern match widen"); unsigned lane = lane_temp; assert(lane < 8); ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; assert(extend < 4); bi_write_staging_register(clause, ins); if ((extend == 0) && ((lane == 0) || (lane == 1) || (lane == 2) || (lane == 3))) { unsigned derived_9 = 0; if (lane == 0) derived_9 = 0; else if (lane == 1) derived_9 = 1; else if (lane == 2) derived_9 = 2; else if (lane == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x60000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && ((lane == 4) || (lane == 5))) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lane == 4) derived_10 = 0; else if (lane == 5) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0x63800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9) | (derived_10 << 10); } else if ((extend != 0) && (lane == 6)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x63400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane == 7)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61400 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i8"); } } static inline unsigned pan_pack_fma_csel_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); if ((cmpf == 4) || (cmpf == 5)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; } if (cmpf == 3) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 3) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else if (cmpf == 2) derived_12 = 2; else unreachable("No pattern match at pos 12"); return 0x2e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_frsq_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; if (widen0 == 0) { return 0x67100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); } else if (widen0 != 0) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67140 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else { unreachable("No matching state found in add_frsq_approx_f32"); } } static inline unsigned pan_pack_add_iabs_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x3de88 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_ld_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); } else { unreachable("No matching state found in add_ld_attr_tex"); } } static inline unsigned pan_pack_fma_imuld(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0x33); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0x33); unsigned threads = 0; return 0x70f100 | (src0 << 0) | (src1 << 3) | (threads << 6); } static inline unsigned pan_pack_add_ld_var_special(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned varying_name = ins->constant.u64 & 0x3; assert(varying_name < 32); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; assert(update < 4); unsigned register_format_temp = 0; if (ins->format == nir_type_float32) register_format_temp = 0; else if (ins->format == nir_type_float16) register_format_temp = 1; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 4); unsigned sample = ins->load_vary.interp_mode; assert(sample < 8); bi_write_staging_register(clause, ins); if (register_format != 2) { unsigned derived_3 = 0; if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; else unreachable("No pattern match at pos 3"); unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x500a0 | (src0 << 0) | (derived_3 << 3) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_3 = 0; if ((varying_name == 0) && (vecsize == 1) && (update == 3)) derived_3 = 0; else if ((varying_name == 2) && (vecsize == 0) && (update == 3)) derived_3 = 2; else if ((varying_name == 3) && (vecsize == 0) && (update == 3) && (sample != 3) && (register_format != 2)) derived_3 = 3; else unreachable("No pattern match at pos 3"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc0a0 | (src0 << 0) | (derived_3 << 3) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var_special"); } } static inline unsigned pan_pack_add_fcos_table_u6(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned offset = 0; return 0x67a88 | (src0 << 0) | (offset << 4); } static inline unsigned pan_pack_add_ld_cvt(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_write_staging_register(clause, ins); return 0xc9000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned pan_pack_fma_arshift_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0x8); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x334818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x335818 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } else { unreachable("No matching state found in fma_arshift_v2i16"); } } static inline unsigned pan_pack_fma_csel_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); if (cmpf == 1) { { unsigned temp = src2; src2 = src3; src3 = temp; } if (cmpf == 1) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 3; else unreachable("No pattern match at pos 12"); return 0x6e0000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_ld_tile(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_write_staging_register(clause, ins); return 0xcb000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 9); } static inline unsigned pan_pack_add_icmp_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x78000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); } static inline unsigned pan_pack_add_load_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_write_staging_register(clause, ins); return 0x61000 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_ilogb_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); return 0x3d9e0 | (src0 << 0) | (widen0 << 3); } static inline unsigned pan_pack_add_frcp_approx_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; if (widen0 == 0) { return 0x67000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); } else if (widen0 != 0) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67040 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else { unreachable("No matching state found in add_frcp_approx_f32"); } } static inline unsigned pan_pack_add_frcp_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x67080 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (lane0 << 8); } static inline unsigned pan_pack_add_discard_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); if ((cmpf == 1) || (cmpf == 2)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 2) cmpf = 5; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 3) derived_6 = 1; else if (cmpf == 4) derived_6 = 2; else if (cmpf == 5) derived_6 = 3; else unreachable("No pattern match at pos 6"); unsigned derived_8 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_8 = 0; else if ((widen0 == 2) && (widen1 == 1)) derived_8 = 1; else if ((widen0 == 1) && (widen1 == 2)) derived_8 = 2; else if ((widen0 == 2) && (widen1 == 2)) derived_8 = 3; else if ((widen0 == 0) && (widen1 == 0)) derived_8 = 4; else unreachable("No pattern match at pos 8"); return 0xc8800 | (src0 << 0) | (src1 << 3) | (derived_6 << 6) | (derived_8 << 8); } static inline unsigned pan_pack_fma_iaddc_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); return 0x27fc00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_f16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x3cd10 | (src0 << 0) | (lane0 << 3); } static inline unsigned pan_pack_add_fexp_table_u4(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned adj = 0; return 0x67ac0 | (src0 << 0) | (adj << 3); } static inline unsigned pan_pack_add_branch_no_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x6fa34 | (src0 << 6); } static inline unsigned pan_pack_add_acmpxchg_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0x644c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_add_icmp_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7a000 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (cmpf << 11); } static inline unsigned pan_pack_fma_fadd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned round = ins->roundmode; assert(round < 4); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x2c0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (round << 13) | (clamp << 15) | (derived_9 << 9); } static inline unsigned pan_pack_add_icmp_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x78800 | (src0 << 0) | (src1 << 3) | (swz0 << 6) | (swz1 << 8) | (result_type << 10) | (derived_12 << 12); } static inline unsigned pan_pack_fma_csel_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x6e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_add_branch_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); if (((widen0 != 0) && (widen1 == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 > src1) && ((cmpf == 0) || (cmpf == 1) || (cmpf == 4))) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_12 = 5; else if ((widen0 == 0) && (widen1 == 2)) derived_12 = 6; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 != 0) && (cmpf == 3)) derived_9 = 1; else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 2)) derived_9 = 2; else if ((widen0 == 0) && (widen1 != 0) && (cmpf == 5)) derived_9 = 3; else if ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && ((cmpf == 1) || (cmpf == 4))) derived_9 = 4; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 0)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 3))) derived_9 = 5; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 1)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 2))) derived_9 = 6; else if (((widen0 == 0) && (widen1 != 0) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 < src1) && (cmpf == 4)) || ((widen0 == 0) && (widen1 == 0) && (src0 >= src1) && (cmpf == 5)) || ((widen0 == 0) && (widen1 == 0) && (src0 == src1) && (cmpf == 0))) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_icmp_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b280 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned pan_pack_add_texs_cube_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned skip = ins->skip; assert(skip < 2); unsigned sampler_index = ins->texture.sampler_index; unsigned texture_index = ins->texture.texture_index; bi_write_staging_register(clause, ins); return 0xdc000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (skip << 9) | (sampler_index << 10) | (texture_index << 12); } static inline unsigned pan_pack_fma_rshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); return 0x303000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned pan_pack_add_ld_var(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); unsigned update = (ins->constant.u64 >= 20) ? 3 : 0; assert(update < 4); unsigned register_format_temp = 0; if (ins->format == nir_type_float32) register_format_temp = 0; else if (ins->format == nir_type_float16) register_format_temp = 1; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 4); unsigned sample = ins->load_vary.interp_mode; assert(sample < 8); bi_write_staging_register(clause, ins); if (register_format != 2) { unsigned derived_19 = 0; if (register_format == 0) derived_19 = 0; else if (register_format == 1) derived_19 = 1; else unreachable("No pattern match at pos 19"); unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0x500c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_19 << 19) | (derived_10 << 10); } else if (register_format == 2) { unsigned derived_10 = 0; if ((sample == 0) && (update == 0)) derived_10 = 0; else if ((sample == 1) && (update == 0)) derived_10 = 1; else if ((sample == 2) && (update == 0)) derived_10 = 2; else if ((sample == 3) && (update == 0)) derived_10 = 3; else if ((sample == 4) && (update == 1)) derived_10 = 4; else if ((sample == 0) && (update == 2)) derived_10 = 8; else if ((sample == 1) && (update == 2)) derived_10 = 9; else if ((sample == 0) && (update == 3)) derived_10 = 10; else if ((sample == 1) && (update == 3)) derived_10 = 11; else if ((sample == 2) && (update == 3)) derived_10 = 12; else if ((sample == 3) && (update == 3)) derived_10 = 13; else unreachable("No pattern match at pos 10"); return 0xcc0c0 | (src0 << 0) | (src1 << 3) | (vecsize << 8) | (derived_10 << 10); } else { unreachable("No matching state found in add_ld_var"); } } static inline unsigned pan_pack_add_hadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swap1_temp = 0; if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap1 = swap1_temp; assert(swap1 < 2); unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swap0_temp = 0; if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap0 = swap0_temp; assert(swap0 < 2); return 0xbc840 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); } static inline unsigned pan_pack_add_swz_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x3d948 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_atest(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 2; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); bi_write_staging_register(clause, ins); return 0xc8f00 | (src0 << 0) | (src1 << 3) | (widen1 << 6); } static inline unsigned pan_pack_add_ldexp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned round = ins->roundmode; assert(round < 8); return 0x74e00 | (src0 << 0) | (src1 << 3) | (round << 6); } static inline unsigned pan_pack_fma_bitrev_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); return 0x701fc0 | (src0 << 0); } static inline unsigned pan_pack_add_icmpi_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7b900 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned pan_pack_add_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0x3d968 | (src0 << 0); } static inline unsigned pan_pack_fma_frexpm_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned sqrt = 0; unsigned log = 1; unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 2; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 3; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x701b20 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (widen0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x701a20 | (src0 << 0) | (abs0 << 6) | (widen0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in fma_frexpm_f32"); } } static inline unsigned pan_pack_add_atom_cx(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); unsigned src2 = bi_get_src(ins, regs, 3); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0xd7400 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_fadd_rscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert(ins->outmod == BIFROST_NONE || ins->outmod == BIFROST_SAT); unsigned clamp = (ins->outmod == BIFROST_SAT) ? 1 : 0; assert(clamp < 2); unsigned special = 0; unsigned round = ins->roundmode; assert(round < 8); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned derived_9 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_9 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_9 = 2; else if ((clamp == 0) && (special == 1) && (round == 4)) derived_9 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_9 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_9 = 5; else if ((clamp == 0) && (special == 1) && (round == 2)) derived_9 = 6; else if ((clamp == 0) && (special == 1) && (round == 3)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x88000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs1 << 12) | (neg0 << 13) | (neg1 << 14) | (abs0 << 16) | (derived_9 << 9); } static inline unsigned pan_pack_fma_atom_post_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned atom_opc = 2; return 0x6ee400 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_fma_imul_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else if (widen1_sz == 8 && ins->swizzle[1][0] == 0) widen1_temp = 3; else if (widen1_sz == 8 && ins->swizzle[1][0] == 1) widen1_temp = 4; else if (widen1_sz == 8 && ins->swizzle[1][0] == 2) widen1_temp = 5; else if (widen1_sz == 8 && ins->swizzle[1][0] == 3) widen1_temp = 6; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 8); ASSERTED bool extend1_small = nir_alu_type_get_type_size(ins->src_types[1]) <= 16; bool extend1_signed = nir_alu_type_get_base_type(ins->src_types[1]) == nir_type_int; unsigned extend1 = extend1_small ? (extend1_signed ? 1 : 2) : 0; assert(extend1 < 4); if ((extend1 == 0) && (widen1 == 0)) { return 0x73c0c0 | (src0 << 0) | (src1 << 3); } else if ((extend1 != 0) && ((widen1 == 1) || (widen1 == 2))) { unsigned derived_9 = 0; if (widen1 == 1) derived_9 = 0; else if (widen1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (extend1 == 2) derived_10 = 0; else if (extend1 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0x73c8c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_10 << 10); } else if ((extend1 != 0) && ((widen1 == 3) || (widen1 == 4) || (widen1 == 5) || (widen1 == 6))) { unsigned derived_9 = 0; if (widen1 == 3) derived_9 = 0; else if (widen1 == 4) derived_9 = 1; else if (widen1 == 5) derived_9 = 2; else if (widen1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); unsigned derived_11 = 0; if (extend1 == 2) derived_11 = 0; else if (extend1 == 1) derived_11 = 1; else unreachable("No pattern match at pos 11"); return 0x73b0c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9) | (derived_11 << 11); } else { unreachable("No matching state found in fma_imul_i32"); } } static inline unsigned pan_pack_add_flogd_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x66340 | (src0 << 0); } static inline unsigned pan_pack_fma_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned sqrt = 0; unsigned log = 1; unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x701b00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x701a00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in fma_frexpm_v2f16"); } } static inline unsigned pan_pack_add_s8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x3cb80 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_zs_emit(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned stencil = (ins->src[1] != 0); assert(stencil < 2); unsigned z = (ins->src[0] != 0); assert(z < 2); bi_write_staging_register(clause, ins); unsigned derived_9 = 0; if ((stencil == 1) && (z == 0)) derived_9 = 1; else if ((stencil == 0) && (z == 1)) derived_9 = 2; else if ((stencil == 1) && (z == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xd7800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_load_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_write_staging_register(clause, ins); return 0x60e00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_branchz_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned result_type = 2; if (((neg0 == 0) && (neg1 == 1)) || ((widen0 == 1) && (widen1 == 0)) || ((widen0 == 2) && (widen1 == 0))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else unreachable("No pattern match at pos 9"); unsigned derived_13 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_13 = 0; else if ((neg0 == 1) && (neg1 == 0)) derived_13 = 1; else unreachable("No pattern match at pos 13"); return 0x30000 | (src0 << 0) | (src1 << 3) | (cmpf << 6) | (abs0 << 11) | (abs1 << 12) | (result_type << 14) | (derived_9 << 9) | (derived_13 << 13); } static inline unsigned pan_pack_fma_atom_c1_return_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned atom_opc = 2; return 0x2f3e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_add_hadd_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swap1_temp = 0; if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap1 = swap1_temp; assert(swap1 < 2); unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swap0_temp = 0; if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap0 = swap0_temp; assert(swap0 < 2); return 0xbc8c0 | (src0 << 0) | (src1 << 3) | (round << 12) | (swap1 << 9) | (swap0 << 10); } static inline unsigned pan_pack_add_acmpstore_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); return 0x648c0 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_add_frcp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned derived_6 = 0; if (widen0 == 0) derived_6 = 0; else unreachable("No pattern match at pos 6"); return 0x66000 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6); } static inline unsigned pan_pack_fma_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned round = ins->roundmode; assert(round < 4); unsigned clamp = ins->outmod; assert(clamp < 4); if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x6c0000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (clamp << 15) | (derived_6 << 6); } static inline unsigned pan_pack_add_var_tex_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned update = 0; unsigned skip = ins->skip; assert(skip < 2); unsigned lod_mode = 1 - ins->texture.compute_lod; assert(lod_mode < 2); unsigned sample = ins->load_vary.interp_mode; assert(sample < 2); unsigned varying_index = bi_get_immediate(ins, 0); unsigned texture_index = ins->texture.texture_index; bi_write_staging_register(clause, ins); unsigned derived_5 = 0; if ((sample == 0) && (update == 0)) derived_5 = 0; else if ((sample == 1) && (update == 1)) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0xca100 | (skip << 7) | (lod_mode << 9) | (varying_index << 0) | (texture_index << 3) | (derived_5 << 5); } static inline unsigned pan_pack_fma_lshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x310800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x311800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_and_v2i16"); } } static inline unsigned pan_pack_add_quiet_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x3d900 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_iabs_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0x3deb0 | (src0 << 0); } static inline unsigned pan_pack_add_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x3ccc8 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_fma_csel_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_12 = 0; if (cmpf == 0) derived_12 = 0; else if (cmpf == 1) derived_12 = 1; else unreachable("No pattern match at pos 12"); return 0x2e6000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (derived_12 << 12); } static inline unsigned pan_pack_fma_shaddxl_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned shift = 0; return 0x70e600 | (src0 << 0) | (src1 << 3) | (shift << 6); } static inline unsigned pan_pack_add_s32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3cbc0 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cd00 | (src0 << 0); } else { unreachable("No matching state found in add_s32_to_f32"); } } static inline unsigned pan_pack_add_fmax_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned sem = 0; unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); return 0x0 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); } static inline unsigned pan_pack_fma_lshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); return 0x325000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); } static inline unsigned pan_pack_add_shift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); return 0xefe00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_jump(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x6fe34 | (src0 << 6); } static inline unsigned pan_pack_add_branchz_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_branch_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if (src0 < src1) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 0) cmpf = 2; else if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_12 = 0; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((src0 >= src1) && (cmpf == 2)) derived_9 = 0; else if ((src0 >= src1) && (cmpf == 3)) derived_9 = 1; else if ((src0 >= src1) && (cmpf == 1)) derived_9 = 2; else if ((src0 >= src1) && (cmpf == 0)) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_mux_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned mux = 1; return 0x74000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); } static inline unsigned pan_pack_add_texs_2d_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned skip = ins->skip; assert(skip < 2); unsigned lod_mode = 1 - ins->texture.compute_lod; assert(lod_mode < 2); unsigned texture_index = ins->texture.texture_index; unsigned sampler_index = ins->texture.sampler_index; bi_write_staging_register(clause, ins); return 0xd8000 | (src0 << 0) | (src1 << 3) | (skip << 9) | (lod_mode << 13) | (texture_index << 6) | (sampler_index << 10); } static inline unsigned pan_pack_add_s8_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x3cb40 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_fma_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x700b48 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_cube_tsel(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned derived_9 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_9 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x3e400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_9 << 9); } static inline unsigned pan_pack_add_fpow_sc_det_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned func = 0; return 0x67640 | (src0 << 0) | (src1 << 3) | (func << 7); } static inline unsigned pan_pack_fma_mkvec_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 8 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 8 && ins->swizzle[1][0] == 2) lane1_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned lane3_sz = nir_alu_type_get_type_size(ins->src_types[3]); unsigned lane3_temp = 0; if (lane3_sz == 8 && ins->swizzle[3][0] == 0) lane3_temp = 0; else if (lane3_sz == 8 && ins->swizzle[3][0] == 2) lane3_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane3 = lane3_temp; assert(lane3 < 2); return 0x710000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (lane0 << 12) | (lane1 << 13) | (lane2 << 14) | (lane3 << 15); } static inline unsigned pan_pack_add_fmin_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned sem = 0; unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); return 0x10000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (clamp << 11) | (sem << 13) | (abs0 << 15); } static inline unsigned pan_pack_fma_fcmp_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned result_type = 2; if (((abs0 == 0) && (src0 > src1)) || ((abs1 == 1) && (src0 <= src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = swz0; swz0 = swz1; swz1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_6 = 0; if (((abs0 == 1) && (abs1 == 0) && (src0 > src1)) || ((abs0 == 0) && (abs1 == 0) && (src0 <= src1))) derived_6 = 0; else if (((abs0 == 1) && (abs1 == 1) && (src0 > src1)) || ((abs0 == 1) && (abs1 == 0) && (src0 <= src1))) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_13 = 0; if (cmpf == 0) derived_13 = 0; else if (cmpf == 1) derived_13 = 1; else if (cmpf == 2) derived_13 = 2; else if (cmpf == 3) derived_13 = 3; else if (cmpf == 4) derived_13 = 4; else if (cmpf == 5) derived_13 = 5; else if (cmpf == 6) derived_13 = 6; else if ((cmpf == 7) && (abs0 == 0) && (abs1 == 0)) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x640000 | (src0 << 0) | (src1 << 3) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (result_type << 16) | (derived_6 << 6) | (derived_13 << 13); } static inline unsigned pan_pack_add_acmpxchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0x64500 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_fma_rshift_and_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x300800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_rshift_and_v2i16"); } } static inline unsigned pan_pack_add_fpow_sc_det_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned func = 0; unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else if (lane1_sz == 32) lane1_temp = 2; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 4); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); if ((func == 0) || (func == 1)) { unsigned derived_6 = 0; if ((lane1 == 2) || (lane1 == 0)) derived_6 = 0; else if (lane1 == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_8 = 0; if (func == 0) derived_8 = 0; else if (func == 1) derived_8 = 1; else unreachable("No pattern match at pos 8"); return 0x67400 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_6 << 6) | (derived_8 << 8); } else if (((func == 2) || (func == 3)) && (lane1 == 2)) { unsigned derived_8 = 0; if (func == 2) derived_8 = 0; else if (func == 3) derived_8 = 1; else unreachable("No pattern match at pos 8"); return 0x67600 | (src0 << 0) | (src1 << 3) | (lane0 << 7) | (derived_8 << 8); } else { unreachable("No matching state found in add_fpow_sc_det_f16"); } } static inline unsigned pan_pack_add_iadd_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) lanes0_temp = 0; else if (lanes0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) lanes0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 2); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) lanes1_temp = 2; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3) lanes1_temp = 5; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (((lanes0 == 0) || (lanes0 == 1)) && ((lanes1 == 0) || (lanes1 == 1))) { unsigned derived_9 = 0; if (lanes1 == 0) derived_9 = 0; else if (lanes1 == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); unsigned derived_10 = 0; if (lanes0 == 0) derived_10 = 0; else if (lanes0 == 1) derived_10 = 1; else unreachable("No pattern match at pos 10"); return 0xbc800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9) | (derived_10 << 10); } else if ((lanes0 == 0) && ((lanes1 == 2) || (lanes1 == 3))) { unsigned derived_9 = 0; if (lanes1 == 2) derived_9 = 0; else if (lanes1 == 3) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbec40 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 4) || (lanes1 == 5))) { unsigned derived_9 = 0; if (lanes1 == 4) derived_9 = 0; else if (lanes1 == 5) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe800 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v2s16"); } } static inline unsigned pan_pack_fma_arshift_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0x8); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); return 0x335018 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9); } static inline unsigned pan_pack_add_store_i128(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x61200 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_fpclass_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x67c40 | (src0 << 0) | (lane0 << 3); } static inline unsigned pan_pack_add_u8_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x3cb88 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_fma_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { return 0x701963; } static inline unsigned pan_pack_add_lea_attr_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_11 = 0; if (register_format == 0) derived_11 = 0; else if (register_format == 1) derived_11 = 1; else if (register_format == 2) derived_11 = 2; else if (register_format == 3) derived_11 = 3; else if (register_format == 4) derived_11 = 4; else if (register_format == 5) derived_11 = 5; else if (register_format == 6) derived_11 = 6; else if (register_format == 7) derived_11 = 7; else unreachable("No pattern match at pos 11"); return 0xc0600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_11 << 11); } else if (register_format == 8) { return 0xc8600 | (src0 << 0) | (src1 << 3) | (src2 << 6); } else { unreachable("No matching state found in add_lea_attr_tex"); } } static inline unsigned pan_pack_fma_mkvec_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 2); return 0x70f000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (lane1 << 7); } static inline unsigned pan_pack_fma_fadd_lscale_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); return 0x70f400 | (src0 << 0) | (src1 << 3) | (abs0 << 6) | (neg0 << 7) | (abs1 << 8) | (neg1 << 9); } static inline unsigned pan_pack_add_v2f16_to_v2u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c208 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3ca88 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2f16_to_v2u16"); } } static inline unsigned pan_pack_fma_fcmp_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned result_type = 2; if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } if (cmpf == 1) cmpf = 4; else if (cmpf == 5) cmpf = 2; else if (cmpf == 4) cmpf = 1; else if (cmpf == 2) cmpf = 5; } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x240000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (abs0 << 12) | (cmpf << 13) | (result_type << 16) | (derived_9 << 9); } static inline unsigned pan_pack_add_fpclass_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x67c50 | (src0 << 0); } static inline unsigned pan_pack_add_ld_attr(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned register_format_temp = 0; if (ins->format == nir_type_float16) register_format_temp = 0; else if (ins->format == nir_type_float32) register_format_temp = 1; else if (ins->format == nir_type_int32) register_format_temp = 2; else if (ins->format == nir_type_uint32) register_format_temp = 3; else if (ins->format == nir_type_int16) register_format_temp = 4; else if (ins->format == nir_type_uint16) register_format_temp = 5; else if (ins->format == nir_type_float64) register_format_temp = 6; else if (ins->format == nir_type_int64) register_format_temp = 7; else unreachable("Could not pattern match register format"); unsigned register_format = register_format_temp; assert(register_format < 16); unsigned vecsize = ins->vector_channels - 1; assert(vecsize < 4); bi_write_staging_register(clause, ins); if (register_format != 8) { unsigned derived_13 = 0; if (register_format == 0) derived_13 = 0; else if (register_format == 1) derived_13 = 1; else if (register_format == 2) derived_13 = 2; else if (register_format == 3) derived_13 = 3; else if (register_format == 4) derived_13 = 4; else if (register_format == 5) derived_13 = 5; else if (register_format == 6) derived_13 = 6; else if (register_format == 7) derived_13 = 7; else unreachable("No pattern match at pos 13"); return 0x40400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11) | (derived_13 << 13); } else if (register_format == 8) { return 0xc4400 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (vecsize << 11); } else { unreachable("No matching state found in add_ld_attr"); } } static inline unsigned pan_pack_fma_rshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned result_word = 0; return 0x33d000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned pan_pack_add_branchz_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); } static inline unsigned pan_pack_fma_atom_c1_return_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned atom_opc = 2; return 0x2f7e00 | (src0 << 0) | (src1 << 3) | (atom_opc << 6); } static inline unsigned pan_pack_add_store_i48(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x65a00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_rshift_and_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 3) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0 && ins->swizzle[2][2] == 0 && ins->swizzle[2][3] == 0) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1 && ins->swizzle[2][2] == 1 && ins->swizzle[2][3] == 1) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2 && ins->swizzle[2][2] == 2 && ins->swizzle[2][3] == 2) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3 && ins->swizzle[2][2] == 3 && ins->swizzle[2][3] == 3) lanes2_temp = 4; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); if (lanes2 != 0) { unsigned derived_9 = 0; if (lanes2 == 1) derived_9 = 0; else if (lanes2 == 2) derived_9 = 1; else if (lanes2 == 3) derived_9 = 2; else if (lanes2 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x300000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if (lanes2 == 0) { return 0x301800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15); } else { unreachable("No matching state found in fma_rshift_and_v4i8"); } } static inline unsigned pan_pack_add_frsq_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned derived_6 = 0; if (widen0 == 0) derived_6 = 0; else unreachable("No pattern match at pos 6"); return 0x66100 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_6 << 6); } static inline unsigned pan_pack_add_icmpf_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); return 0x7be00 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_add_lea_tex_imm(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned format = 1; unsigned texture_index = ins->texture.texture_index; bi_write_staging_register(clause, ins); return 0xd6000 | (src0 << 0) | (src1 << 3) | (format << 11) | (texture_index << 6); } static inline unsigned pan_pack_add_f16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c508 | (src0 << 0) | (lane0 << 7) | (derived_4 << 4); } else if (round == 4) { return 0x3cc48 | (src0 << 0) | (lane0 << 5); } else { unreachable("No matching state found in add_f16_to_u32"); } } static inline unsigned pan_pack_add_isub_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 32) lanes1_temp = 0; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 0) lanes1_temp = 1; else if (lanes1_sz == 16 && ins->swizzle[1][0] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if (lanes1 == 0) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbd600 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes1 == 1) || (lanes1 == 2)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbfc00 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes1 == 3) || (lanes1 == 4) || (lanes1 == 5) || (lanes1 == 6)) { unsigned derived_7 = 0; if ((saturate == 0) && (lanes1 == 0)) derived_7 = 0; else if ((saturate == 1) || (lanes1 != 0)) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 3) derived_9 = 0; else if (lanes1 == 4) derived_9 = 1; else if (lanes1 == 5) derived_9 = 2; else if (lanes1 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf000 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_u32"); } } static inline unsigned pan_pack_fma_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned round = ins->roundmode; assert(round < 8); unsigned derived_6 = 0; if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_7 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x6e8000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); } static inline unsigned pan_pack_add_u8_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 8 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 8 && ins->swizzle[0][0] == 1) lane0_temp = 1; else if (lane0_sz == 8 && ins->swizzle[0][0] == 2) lane0_temp = 2; else if (lane0_sz == 8 && ins->swizzle[0][0] == 3) lane0_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 4); return 0x3cb48 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_kaboom(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0xd7858 | (src0 << 0); } static inline unsigned pan_pack_fma_mov_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); return 0x701968 | (src0 << 0); } static inline unsigned pan_pack_add_nop_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { return 0x3d964; } static inline unsigned pan_pack_fma_frexpe_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned sqrt = 0; unsigned log = 1; unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (log == 0) { return 0x701c00 | (src0 << 0) | (neg << 6) | (sqrt << 8) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0) && (neg == 0)) { return 0x701e00 | (src0 << 0) | (swz0 << 3); } else { unreachable("No matching state found in fma_frexpe_v2f16"); } } static inline unsigned pan_pack_add_store_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x62e00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_frexpm_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned sqrt = 0; unsigned log = 1; unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); if ((log == 0) && (neg0 == 0)) { return 0x3db00 | (src0 << 0) | (abs0 << 6) | (sqrt << 7) | (swz0 << 3); } else if ((log == 1) && (sqrt == 0)) { return 0x3da00 | (src0 << 0) | (abs0 << 6) | (swz0 << 3) | (neg0 << 7); } else { unreachable("No matching state found in add_frexpm_v2f16"); } } static inline unsigned pan_pack_add_branchz_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); unsigned derived_3 = 0; if (cmpf == 1) derived_3 = 0; else if (cmpf == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_3 << 3); } static inline unsigned pan_pack_add_swz_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 0 && ins->swizzle[0][3] == 0) swz0_temp = 0; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 1; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 2) swz0_temp = 2; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 3; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 1) swz0_temp = 4; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 3) swz0_temp = 5; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0 && ins->swizzle[0][2] == 3 && ins->swizzle[0][3] == 2) swz0_temp = 6; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2 && ins->swizzle[0][2] == 1 && ins->swizzle[0][3] == 0) swz0_temp = 7; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 8); return 0x3df40 | (src0 << 0) | (swz0 << 3); } static inline unsigned pan_pack_add_branchz_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_3 = 0; if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_9 = 0; if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3) | (derived_9 << 9); } static inline unsigned pan_pack_add_u16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x3cce8 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_icmp_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if ((cmpf == 2) || (cmpf == 3)) { { unsigned temp = src0; src0 = src1; src1 = temp; } if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; } unsigned derived_6 = 0; if (cmpf == 0) derived_6 = 0; else if (cmpf == 1) derived_6 = 1; else unreachable("No pattern match at pos 6"); return 0x7b000 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (derived_6 << 6); } static inline unsigned pan_pack_fma_frshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); return 0x33f000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); } static inline unsigned pan_pack_add_frcbrt_approx_c_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x67ab8 | (src0 << 0); } static inline unsigned pan_pack_add_hadd_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->roundmode == BIFROST_RTN || ins->roundmode == BIFROST_RTP); unsigned round = (ins->roundmode == BIFROST_RTP) ? 1 : 0; assert(round < 2); return 0xbc440 | (src0 << 0) | (src1 << 3) | (round << 12); } static inline unsigned pan_pack_add_s16_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x3cce0 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_v2u8_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 16); return 0x3c808 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_branchz_s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_9 = 0; if (cmpf == 2) derived_9 = 0; else if (cmpf == 3) derived_9 = 1; else if (cmpf == 1) derived_9 = 2; else if (cmpf == 0) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x6f008 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_9 << 9); } static inline unsigned pan_pack_fma_imul_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned replicate0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned replicate0_temp = 0; if (replicate0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) replicate0_temp = 0; else unreachable("Could not pattern match widen"); unsigned replicate0 = replicate0_temp; assert(replicate0 < 8); unsigned replicate1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned replicate1_temp = 0; if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) replicate1_temp = 0; else if (replicate1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) replicate1_temp = 1; else if (replicate1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) replicate1_temp = 2; else if (replicate1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) replicate1_temp = 3; else if (replicate1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) replicate1_temp = 4; else unreachable("Could not pattern match widen"); unsigned replicate1 = replicate1_temp; assert(replicate1 < 8); if ((replicate0 == 0) && (replicate1 == 0)) { return 0x73e0c0 | (src0 << 0) | (src1 << 3); } else if ((replicate0 == 0) && (replicate1 != 0)) { unsigned derived_9 = 0; if (replicate1 == 1) derived_9 = 0; else if (replicate1 == 2) derived_9 = 1; else if (replicate1 == 3) derived_9 = 2; else if (replicate1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x7380c0 | (src0 << 0) | (src1 << 3) | (derived_9 << 9); } else { unreachable("No matching state found in fma_imul_v4i8"); } } static inline unsigned pan_pack_add_s16_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x3ccc0 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_f32_to_s32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c980 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cca0 | (src0 << 0); } else { unreachable("No matching state found in add_f32_to_s32"); } } static inline unsigned pan_pack_fma_rshift_xor_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); return 0x321000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not_result << 13); } static inline unsigned pan_pack_add_fatan_assist_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); return 0x67a00 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_mux_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned mux = 1; return 0x74800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9); } static inline unsigned pan_pack_fma_lshift_xor_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x324800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x325800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not_result << 13) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_xor_v2i16"); } } static inline unsigned pan_pack_add_load_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_write_staging_register(clause, ins); return 0x65400 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_lshift_or_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); return 0x313000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned pan_pack_fma_lshift_or_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lanes2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lanes2_temp = 0; if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 0) lanes2_temp = 0; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 1) lanes2_temp = 1; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 2) lanes2_temp = 2; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 3 && ins->swizzle[2][1] == 3) lanes2_temp = 3; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) lanes2_temp = 4; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 2 && ins->swizzle[2][1] == 3) lanes2_temp = 5; else if (lanes2_sz == 8 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 2) lanes2_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes2 = lanes2_temp; assert(lanes2 < 8); unsigned not1 = ins->bitwise.src1_invert ? 0 : 1; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 1 : 0; assert(not_result < 2); if ((lanes2 == 0) || (lanes2 == 1) || (lanes2 == 2) || (lanes2 == 3)) { unsigned derived_9 = 0; if (lanes2 == 0) derived_9 = 0; else if (lanes2 == 1) derived_9 = 1; else if (lanes2 == 2) derived_9 = 2; else if (lanes2 == 3) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x312800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else if ((lanes2 == 4) || (lanes2 == 5) || (lanes2 == 6)) { unsigned derived_9 = 0; if (lanes2 == 4) derived_9 = 1; else if (lanes2 == 5) derived_9 = 2; else if (lanes2 == 6) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x313800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (not1 << 14) | (not_result << 15) | (derived_9 << 9); } else { unreachable("No matching state found in fma_lshift_or_v2i16"); } } static inline unsigned pan_pack_add_ld_gclk_u64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned source = 7; bi_write_staging_register(clause, ins); return 0xd7800 | (source << 0); } static inline unsigned pan_pack_add_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned preserve_null = 0; return 0x3d500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned pan_pack_add_axchg_i64(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment == BI_SEGMENT_NONE || ins->segment == BI_SEGMENT_WLS); unsigned seg = ins->segment == BI_SEGMENT_WLS ? 1 : 0; assert(seg < 2); bi_read_staging_register(clause, ins); assert(ins->src[0] == ins->dest); return 0x64100 | (src0 << 0) | (src1 << 3) | (seg << 9); } static inline unsigned pan_pack_add_isub_v4s8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 8); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { return 0xbd400 | (src0 << 0) | (src1 << 3) | (saturate << 8); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbf040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbf840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_9 << 9); } else { unreachable("No matching state found in add_isub_v4s8"); } } static inline unsigned pan_pack_fma_fma_rscale_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned src3 = bi_get_src(ins, regs, 3); assert(ins->roundmode == BIFROST_RTE || ins->roundmode == BIFROST_RTZ); unsigned round = (ins->roundmode == BIFROST_RTZ) ? 1 : 0; assert(round < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); unsigned special = 0; unsigned derived_16 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_16 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_16 = 1; else unreachable("No pattern match at pos 16"); unsigned derived_12 = 0; if ((clamp == 0) && (special == 0) && (round == 0)) derived_12 = 0; else if ((clamp == 1) && (special == 0) && (round == 0)) derived_12 = 1; else if ((clamp == 2) && (special == 0) && (round == 0)) derived_12 = 2; else if ((clamp == 3) && (special == 0) && (round == 0)) derived_12 = 3; else if ((clamp == 0) && (special == 1) && (round == 0)) derived_12 = 4; else if ((clamp == 0) && (special == 1) && (round == 1)) derived_12 = 5; else if ((clamp == 0) && (special == 2) && (round == 0)) derived_12 = 7; else unreachable("No pattern match at pos 12"); return 0x680000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (src3 << 9) | (abs0 << 15) | (neg2 << 17) | (derived_16 << 16) | (derived_12 << 12); } static inline unsigned pan_pack_add_fpow_sc_apply(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); return 0x75080 | (src0 << 0) | (src1 << 3); } static inline unsigned pan_pack_add_v2f16_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c200 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3ca80 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2f16_to_v2s16"); } } static inline unsigned pan_pack_add_icmp_v4i8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned result_type = 1; unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); return 0x7b100 | (src0 << 0) | (src1 << 3) | (result_type << 10) | (cmpf << 6); } static inline unsigned pan_pack_add_eureka(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); return 0xd7850 | (src0 << 0); } static inline unsigned pan_pack_add_branch_u16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, 2, 3, 1, 0, ~0, ~0 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 4); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 < src1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } if (cmpf == 0) cmpf = 2; else if (cmpf == 3) cmpf = 1; else if (cmpf == 2) cmpf = 0; else if (cmpf == 1) cmpf = 3; } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1)) derived_12 = 3; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if (((widen0 == 2) && (widen1 == 1) && (cmpf == 2)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 2))) derived_9 = 0; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 3)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 3))) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 2; else if (((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 0))) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_v2f32_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned round = ins->roundmode; assert(round < 8); unsigned derived_6 = 0; if ((abs0 == 0) && (abs1 == 0)) derived_6 = 0; else if ((abs0 == 1) && (abs1 == 1)) derived_6 = 1; else unreachable("No pattern match at pos 6"); unsigned derived_7 = 0; if ((neg0 == 0) && (neg1 == 0)) derived_7 = 0; else if ((neg0 == 1) && (neg1 == 1)) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x76000 | (src0 << 0) | (src1 << 3) | (clamp << 8) | (round << 10) | (derived_6 << 6) | (derived_7 << 7); } static inline unsigned pan_pack_add_frcbrt_approx_a_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; if (widen0 == 0) { return 0x67200 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); } else if (widen0 != 0) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67240 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else { unreachable("No matching state found in add_frcbrt_approx_a_f32"); } } static inline unsigned pan_pack_fma_atom_c_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf3); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf3); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned atom_opc = 2; return 0x2f4000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (atom_opc << 9); } static inline unsigned pan_pack_fma_seg_add(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned preserve_null = 0; return 0x701500 | (src0 << 0) | (seg << 3) | (preserve_null << 7); } static inline unsigned pan_pack_add_store_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x62c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_add_fatan_assist_f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned lane1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lane1_temp = 0; if (lane1_sz == 16 && ins->swizzle[1][0] == 0) lane1_temp = 0; else if (lane1_sz == 16 && ins->swizzle[1][0] == 1) lane1_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane1 = lane1_temp; assert(lane1 < 2); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x67800 | (src0 << 0) | (src1 << 3) | (lane1 << 6) | (lane0 << 7); } static inline unsigned pan_pack_add_v2u16_to_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c608 | (src0 << 0) | (swz0 << 6) | (derived_4 << 4); } else if (round == 4) { return 0x3cb08 | (src0 << 0) | (swz0 << 4); } else { unreachable("No matching state found in add_v2u16_to_v2f16"); } } static inline unsigned pan_pack_add_iadd_v4u8(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned saturate = 0; unsigned lanes0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lanes0_temp = 0; if (lanes0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1 && ins->swizzle[0][2] == 2 && ins->swizzle[0][3] == 3) lanes0_temp = 0; else unreachable("Could not pattern match widen"); unsigned lanes0 = lanes0_temp; assert(lanes0 < 8); unsigned lanes1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned lanes1_temp = 0; if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 0; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 0) lanes1_temp = 1; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 1 && ins->swizzle[1][3] == 1) lanes1_temp = 2; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 2 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 2) lanes1_temp = 3; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 3 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 3 && ins->swizzle[1][3] == 3) lanes1_temp = 4; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1 && ins->swizzle[1][2] == 0 && ins->swizzle[1][3] == 1) lanes1_temp = 5; else if (lanes1_sz == 8 && ins->swizzle[1][0] == 2 && ins->swizzle[1][1] == 3 && ins->swizzle[1][2] == 2 && ins->swizzle[1][3] == 3) lanes1_temp = 6; else unreachable("Could not pattern match widen"); unsigned lanes1 = lanes1_temp; assert(lanes1 < 8); if ((lanes0 == 0) && (lanes1 == 0)) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0xbc400 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7); } else if ((lanes0 == 0) && ((lanes1 == 1) || (lanes1 == 2) || (lanes1 == 3) || (lanes1 == 4))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 1) derived_9 = 0; else if (lanes1 == 2) derived_9 = 1; else if (lanes1 == 3) derived_9 = 2; else if (lanes1 == 4) derived_9 = 3; else unreachable("No pattern match at pos 9"); return 0xbe040 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else if ((lanes0 == 0) && ((lanes1 == 5) || (lanes1 == 6))) { unsigned derived_7 = 0; if (saturate == 0) derived_7 = 0; else if (saturate == 1) derived_7 = 1; else unreachable("No pattern match at pos 7"); unsigned derived_9 = 0; if (lanes1 == 5) derived_9 = 0; else if (lanes1 == 6) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0xbe840 | (src0 << 0) | (src1 << 3) | (saturate << 8) | (derived_7 << 7) | (derived_9 << 9); } else { unreachable("No matching state found in add_iadd_v4u8"); } } static inline unsigned pan_pack_add_store_i96(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); bi_read_staging_register(clause, ins); return 0x65c00 | (src0 << 0) | (src1 << 3) | (seg << 6); } static inline unsigned pan_pack_fma_lshift_and_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 1) lane2_temp = 1; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 2; else if (lane2_sz == 8 && ins->swizzle[2][0] == 3) lane2_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 4); unsigned not1 = ins->bitwise.src1_invert ? 1 : 0; assert(not1 < 2); unsigned not_result = ins->bitwise.dest_invert ? 0 : 1; assert(not_result < 2); return 0x311000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (lane2 << 9) | (not1 << 14) | (not_result << 15); } static inline unsigned pan_pack_fma_u16_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); return 0x700cc8 | (src0 << 0) | (lane0 << 4); } static inline unsigned pan_pack_add_wmask(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned subgroup = 1; unsigned fill = 0; return 0x3d700 | (src0 << 0) | (subgroup << 4) | (fill << 3); } static inline unsigned pan_pack_add_fadd_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); unsigned swz1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swz1_temp = 0; if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 0) swz1_temp = 0; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swz1_temp = 1; else if (swz1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swz1_temp = 2; else if (swz1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 1) swz1_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz1 = swz1_temp; assert(swz1 < 4); unsigned round = ins->roundmode; assert(round < 4); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); return 0xa0000 | (src0 << 0) | (src1 << 3) | (abs1 << 6) | (neg0 << 7) | (neg1 << 8) | (swz0 << 9) | (swz1 << 11) | (round << 13) | (abs0 << 15); } static inline unsigned pan_pack_add_flog_table_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned mode = 0; unsigned precision = 0; unsigned neg = ins->src_neg[0]; assert(neg < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned divzero = 0; if ((mode == 0) && (widen0 == 0) && (precision == 0)) { return 0x67300 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5); } else if ((mode == 0) && (widen0 != 0) && (precision == 0)) { unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67340 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (divzero << 5) | (derived_7 << 7); } else if ((mode != 0) && (widen0 == 0) && (precision == 0) && (divzero == 0)) { unsigned derived_5 = 0; if (mode == 1) derived_5 = 0; else if (mode == 2) derived_5 = 1; else unreachable("No pattern match at pos 5"); return 0x67b00 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5); } else if ((mode != 0) && (widen0 != 0) && (precision == 0) && (divzero == 0)) { unsigned derived_5 = 0; if (mode == 1) derived_5 = 0; else if (mode == 2) derived_5 = 1; else unreachable("No pattern match at pos 5"); unsigned derived_7 = 0; if (widen0 == 1) derived_7 = 0; else if (widen0 == 2) derived_7 = 1; else unreachable("No pattern match at pos 7"); return 0x67b40 | (src0 << 0) | (neg << 3) | (abs0 << 4) | (derived_5 << 5) | (derived_7 << 7); } else if ((mode != 0) && (widen0 == 0) && (precision != 0) && (divzero == 0) && (abs0 == 0) && (neg == 0)) { unsigned derived_3 = 0; if (mode == 2) derived_3 = 0; else if (mode == 1) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_4 = 0; if (precision == 1) derived_4 = 0; else if (precision == 2) derived_4 = 1; else unreachable("No pattern match at pos 4"); return 0x67ae0 | (src0 << 0) | (derived_3 << 3) | (derived_4 << 4); } else { unreachable("No matching state found in add_flog_table_f32"); } } static inline unsigned pan_pack_add_branchz_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); unsigned derived_4 = 0; if (widen0 == 2) derived_4 = 1; else if (widen0 == 1) derived_4 = 2; else unreachable("No pattern match at pos 4"); unsigned derived_3 = 0; if (cmpf == 1) derived_3 = 0; else if (cmpf == 0) derived_3 = 1; else unreachable("No pattern match at pos 3"); return 0x6f800 | (src0 << 0) | (src1 << 6) | (derived_4 << 4) | (derived_3 << 3); } static inline unsigned pan_pack_add_ilogb_v2f16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 2; else if (swz0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 3; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 4); return 0x3d9c0 | (src0 << 0) | (swz0 << 3); } static inline unsigned pan_pack_add_v2s8_to_v2s16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned swz0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swz0_temp = 0; if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 0) swz0_temp = 0; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swz0_temp = 1; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 0) swz0_temp = 2; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 0) swz0_temp = 3; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swz0_temp = 4; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 1) swz0_temp = 5; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 1) swz0_temp = 6; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 1) swz0_temp = 7; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 2) swz0_temp = 8; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 2) swz0_temp = 9; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 2) swz0_temp = 10; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 2) swz0_temp = 11; else if (swz0_sz == 8 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 3) swz0_temp = 12; else if (swz0_sz == 8 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 3) swz0_temp = 13; else if (swz0_sz == 8 && ins->swizzle[0][0] == 2 && ins->swizzle[0][1] == 3) swz0_temp = 14; else if (swz0_sz == 8 && ins->swizzle[0][0] == 3 && ins->swizzle[0][1] == 3) swz0_temp = 15; else unreachable("Could not pattern match widen"); unsigned swz0 = swz0_temp; assert(swz0 < 16); return 0x3c700 | (src0 << 0) | (swz0 << 4); } static inline unsigned pan_pack_add_u32_to_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3cbc8 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cd08 | (src0 << 0); } else { unreachable("No matching state found in add_u32_to_f32"); } } static inline unsigned pan_pack_add_blend(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 1); unsigned src1 = bi_get_src(ins, regs, 2); assert((1 << src1) & 0xf7); unsigned src2 = bi_get_src(ins, regs, 3); assert((1 << src2) & 0xf7); bi_read_staging_register(clause, ins); return 0xca800 | (src0 << 0) | (src1 << 3) | (src2 << 6); } static inline unsigned pan_pack_fma_fma_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); unsigned neg1 = ins->src_neg[1]; assert(neg1 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned round = ins->roundmode; assert(round < 4); unsigned clamp = ins->outmod; assert(clamp < 4); unsigned abs1 = ins->src_abs[1]; assert(abs1 < 2); unsigned neg2 = ins->src_neg[2]; assert(neg2 < 2); unsigned abs2 = ins->src_abs[2]; assert(abs2 < 2); if ((widen0 == 2) && (widen1 == 1)) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } { unsigned temp = neg0; neg0 = neg1; neg1 = temp; } { unsigned temp = abs0; abs0 = abs1; abs1 = temp; } } unsigned derived_9 = 0; if ((widen0 == 0) && (widen1 == 0)) derived_9 = 0; else if ((widen0 == 0) && (widen1 == 1)) derived_9 = 1; else if ((widen0 == 0) && (widen1 == 2)) derived_9 = 2; else if ((widen0 == 1) && (widen1 == 1)) derived_9 = 3; else if ((widen0 == 1) && (widen1 == 2)) derived_9 = 4; else if ((widen0 == 2) && (widen1 == 2)) derived_9 = 5; else if ((widen0 == 1) && (widen1 == 0)) derived_9 = 6; else if ((widen0 == 2) && (widen1 == 0)) derived_9 = 7; else unreachable("No pattern match at pos 9"); unsigned derived_17 = 0; if (((neg0 == 0) && (neg1 == 0)) || ((neg0 == 1) && (neg1 == 1))) derived_17 = 0; else if (((neg0 == 0) && (neg1 == 1)) || ((neg0 == 1) && (neg1 == 0))) derived_17 = 1; else unreachable("No pattern match at pos 17"); return 0x0 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (abs0 << 12) | (round << 13) | (clamp << 15) | (abs1 << 19) | (neg2 << 18) | (abs2 << 20) | (derived_9 << 9) | (derived_17 << 17); } static inline unsigned pan_pack_add_branchz_f32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xf7); unsigned cmpf_table[] = { ~0, 4, 5, 2, 1, 0, 3 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 8); unsigned derived_3 = 0; if ((cmpf == 3) || (cmpf == 2) || (cmpf == 5)) derived_3 = 0; else if ((cmpf == 0) || (cmpf == 1) || (cmpf == 4)) derived_3 = 1; else unreachable("No pattern match at pos 3"); unsigned derived_9 = 0; if ((cmpf == 3) || (cmpf == 0)) derived_9 = 5; else if ((cmpf == 2) || (cmpf == 1)) derived_9 = 6; else if ((cmpf == 5) || (cmpf == 4)) derived_9 = 7; else unreachable("No pattern match at pos 9"); return 0x6f000 | (src0 << 0) | (src1 << 6) | (derived_3 << 3) | (derived_9 << 9); } static inline unsigned pan_pack_add_lea_tex(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned format = 1; bi_write_staging_register(clause, ins); return 0xd6600 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (format << 11); } static inline unsigned pan_pack_add_branch_diverg(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xf7); return 0x6f83c | (src0 << 6); } static inline unsigned pan_pack_fma_lrot_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); unsigned result_word = 0; return 0x33b000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10) | (result_word << 11); } static inline unsigned pan_pack_fma_flshift_double_i32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned src2 = bi_get_src(ins, regs, 2); unsigned bytes2 = 0; unsigned lane2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned lane2_temp = 0; if (lane2_sz == 8 && ins->swizzle[2][0] == 0) lane2_temp = 0; else if (lane2_sz == 8 && ins->swizzle[2][0] == 2) lane2_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane2 = lane2_temp; assert(lane2 < 2); return 0x33f800 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (bytes2 << 9) | (lane2 << 10); } static inline unsigned pan_pack_fma_fmul_cslice(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); assert((1 << src0) & 0xfb); unsigned src1 = bi_get_src(ins, regs, 1); assert((1 << src1) & 0xfb); unsigned lane0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane0_temp = 0; if (lane0_sz == 16 && ins->swizzle[0][0] == 0) lane0_temp = 0; else if (lane0_sz == 16 && ins->swizzle[0][0] == 1) lane0_temp = 1; else unreachable("Could not pattern match widen"); unsigned lane0 = lane0_temp; assert(lane0 < 2); unsigned abs0 = ins->src_abs[0]; assert(abs0 < 2); unsigned neg0 = ins->src_neg[0]; assert(neg0 < 2); return 0x70d000 | (src0 << 0) | (src1 << 3) | (lane0 << 6) | (abs0 << 7) | (neg0 << 8); } static inline unsigned pan_pack_add_branch_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); assert((1 << src2) & 0xf7); unsigned widen0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned widen0_temp = 0; if (widen0_sz == 32) widen0_temp = 0; else if (widen0_sz == 16 && ins->swizzle[0][0] == 0) widen0_temp = 1; else if (widen0_sz == 16 && ins->swizzle[0][0] == 1) widen0_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen0 = widen0_temp; assert(widen0 < 4); unsigned widen1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned widen1_temp = 0; if (widen1_sz == 32) widen1_temp = 0; else if (widen1_sz == 16 && ins->swizzle[1][0] == 0) widen1_temp = 1; else if (widen1_sz == 16 && ins->swizzle[1][0] == 1) widen1_temp = 2; else unreachable("Could not pattern match widen"); unsigned widen1 = widen1_temp; assert(widen1 < 4); unsigned cmpf_table[] = { ~0, ~0, ~0, ~0, ~0, 0, 1 }; unsigned cmpf = cmpf_table[ins->cond]; assert(cmpf < 2); if (((widen0 == 1) && (widen1 == 2)) || ((widen0 == widen1) && (src0 > src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 1))) { { unsigned temp = src0; src0 = src1; src1 = temp; } { unsigned temp = widen0; widen0 = widen1; widen1 = temp; } } unsigned derived_12 = 0; if ((widen0 == 1) && (widen1 == 1)) derived_12 = 1; else if ((widen0 == 2) && (widen1 == 2)) derived_12 = 2; else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 1)) derived_12 = 3; else if ((widen0 == 2) && (widen1 == 1) && (cmpf == 0)) derived_12 = 4; else unreachable("No pattern match at pos 12"); unsigned derived_9 = 0; if ((widen0 == widen1) && (src0 == src1) && (cmpf == 0)) derived_9 = 1; else if (((widen0 == 2) && (widen1 == 1)) || ((widen0 == widen1) && (src0 < src1) && (cmpf == 0)) || ((widen0 == widen1) && (src0 >= src1) && (cmpf == 1))) derived_9 = 4; else unreachable("No pattern match at pos 9"); return 0x68000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (derived_12 << 12) | (derived_9 << 9); } static inline unsigned pan_pack_add_f32_to_u32(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned round = ins->roundmode; assert(round < 8); if (round != 4) { unsigned derived_4 = 0; if (round == 0) derived_4 = 0; else if (round == 1) derived_4 = 1; else if (round == 2) derived_4 = 2; else if (round == 3) derived_4 = 3; else unreachable("No pattern match at pos 4"); return 0x3c988 | (src0 << 0) | (derived_4 << 4); } else if (round == 4) { return 0x3cca8 | (src0 << 0); } else { unreachable("No matching state found in add_f32_to_u32"); } } static inline unsigned pan_pack_add_load_i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); assert(ins->segment); unsigned seg = ins->segment; assert(seg < 8); unsigned lane_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned lane_temp = 0; if (lane_sz == 16 && ins->swizzle[0][0] == 0) lane_temp = 0; else if (lane_sz == 16 && ins->swizzle[0][0] == 1) lane_temp = 1; else if (lane_sz == 32) lane_temp = 2; else if (lane_sz == 64) lane_temp = 3; else unreachable("Could not pattern match widen"); unsigned lane = lane_temp; assert(lane < 4); ASSERTED bool extend_small = nir_alu_type_get_type_size(ins->src_types[0]) <= 16; bool extend_signed = nir_alu_type_get_base_type(ins->src_types[0]) == nir_type_int; unsigned extend = extend_small ? (extend_signed ? 1 : 2) : 0; assert(extend < 4); bi_write_staging_register(clause, ins); if ((extend == 0) && ((lane == 0) || (lane == 1))) { unsigned derived_9 = 0; if (lane == 0) derived_9 = 0; else if (lane == 1) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x60800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane == 2)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x63000 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else if ((extend != 0) && (lane == 3)) { unsigned derived_9 = 0; if (extend == 1) derived_9 = 0; else if (extend == 2) derived_9 = 1; else unreachable("No pattern match at pos 9"); return 0x61800 | (src0 << 0) | (src1 << 3) | (seg << 6) | (derived_9 << 9); } else { unreachable("No matching state found in add_load_i16"); } } static inline unsigned pan_pack_add_mux_v2i16(bi_clause *clause, bi_instruction *ins, bi_registers *regs) { unsigned src0 = bi_get_src(ins, regs, 0); unsigned src1 = bi_get_src(ins, regs, 1); unsigned src2 = bi_get_src(ins, regs, 2); unsigned mux = 1; unsigned swap2_sz = nir_alu_type_get_type_size(ins->src_types[2]); unsigned swap2_temp = 0; if (swap2_sz == 16 && ins->swizzle[2][0] == 0 && ins->swizzle[2][1] == 1) swap2_temp = 0; else if (swap2_sz == 16 && ins->swizzle[2][0] == 1 && ins->swizzle[2][1] == 0) swap2_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap2 = swap2_temp; assert(swap2 < 2); unsigned swap1_sz = nir_alu_type_get_type_size(ins->src_types[1]); unsigned swap1_temp = 0; if (swap1_sz == 16 && ins->swizzle[1][0] == 0 && ins->swizzle[1][1] == 1) swap1_temp = 0; else if (swap1_sz == 16 && ins->swizzle[1][0] == 1 && ins->swizzle[1][1] == 0) swap1_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap1 = swap1_temp; assert(swap1 < 2); unsigned swap0_sz = nir_alu_type_get_type_size(ins->src_types[0]); unsigned swap0_temp = 0; if (swap0_sz == 16 && ins->swizzle[0][0] == 0 && ins->swizzle[0][1] == 1) swap0_temp = 0; else if (swap0_sz == 16 && ins->swizzle[0][0] == 1 && ins->swizzle[0][1] == 0) swap0_temp = 1; else unreachable("Could not pattern match widen"); unsigned swap0 = swap0_temp; assert(swap0 < 2); return 0x70000 | (src0 << 0) | (src1 << 3) | (src2 << 6) | (mux << 9) | (swap2 << 11) | (swap1 << 12) | (swap0 << 13); } #endif