/* * Copyright (C) 2020 Collabora, Ltd. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "compiler.h" /* NIR creates vectors as vecN ops, which we represent by a synthetic * BI_COMBINE instruction, e.g.: * * v = combine x, y, z, w * * These combines need to be lowered by the pass in this file. Fix a given * source at component c. * * First suppose the source is SSA. If it is also scalar, then we may rewrite * the destination of the generating instruction (unique by SSA+scalar) to * write to v.c, and rewrite each of its uses to swizzle out .c instead of .x * (the original by scalar). If it is vector, there are two cases. If the * component c is `x`, we are accessing v.x, and each of the succeeding * components y, z... up to the last component of the vector are accessed * sequentially, then we may perform the same rewrite. If this is not the case, * rewriting would require more complex vector features, so we fallback on a * move. * * Otherwise is the source is not SSA, we also fallback on a move. We could * probably do better. */ static void bi_combine_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R) { bi_instruction move = { .type = BI_MOV, .dest = R, .dest_type = nir_type_uint32, .dest_offset = comp, .src = { parent->src[comp] }, .src_types = { nir_type_uint32 }, .swizzle = { { parent->swizzle[comp][0] } } }; bi_emit_before(ctx, parent, move); } static void bi_combine_sel16(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R) { bi_instruction sel = { .type = BI_SELECT, .dest = R, .dest_type = nir_type_uint32, .dest_offset = comp >> 1, .src = { parent->src[comp], parent->src[comp + 1] }, .src_types = { nir_type_uint16, nir_type_uint16 }, .swizzle = { { parent->swizzle[comp][0] }, { parent->swizzle[comp + 1][0] }, } }; /* In case we have a combine from a vec3 */ if (!sel.src[1]) sel.src[1] = BIR_INDEX_ZERO; bi_emit_before(ctx, parent, sel); } /* Copies result of combine from the temp R to the instruction destination, * given a bitsize sz */ static void bi_combine_copy(bi_context *ctx, bi_instruction *ins, unsigned R, unsigned sz) { bi_foreach_src(ins, s) { if (!ins->src[s]) continue; /* Iterate by 32-bits */ unsigned shift = (sz == 8) ? 2 : (sz == 16) ? 1 : 0; if (s & ((1 << shift) - 1)) continue; bi_instruction copy = { .type = BI_MOV, .dest = ins->dest, .dest_type = nir_type_uint32, .dest_offset = s >> shift, .src = { R }, .src_types = { nir_type_uint32 }, .swizzle = { { s >> shift } } }; bi_emit_before(ctx, ins, copy); } } void bi_lower_combine(bi_context *ctx, bi_block *block) { bi_foreach_instr_in_block_safe(block, ins) { if (ins->type != BI_COMBINE) continue; /* If a register COMBINE reads its own output, we need a * temporary move to allow for swapping. TODO: Could do a bit * better for pairwise swaps of 16-bit vectors */ bool reads_self = false; bi_foreach_src(ins, s) { if(ins->src[s] == ins->dest) reads_self = true; } bool needs_rewrite = !(ins->dest & PAN_IS_REG); bool needs_copy = (ins->dest & PAN_IS_REG) && reads_self; bool needs_temp = needs_rewrite || needs_copy; unsigned R = needs_temp ? bi_make_temp_reg(ctx) : ins->dest; unsigned sz = nir_alu_type_get_type_size(ins->dest_type); bi_foreach_src(ins, s) { /* We're done early for vec2/3 */ if (!ins->src[s]) continue; if (sz == 32) bi_combine_mov32(ctx, ins, s, R); else if (sz == 16) { bi_combine_sel16(ctx, ins, s, R); s++; } else { unreachable("Unknown COMBINE size"); } } if (needs_rewrite) bi_rewrite_uses(ctx, ins->dest, 0, R, 0); else if (needs_copy) bi_combine_copy(ctx, ins, R, sz); bi_remove_instruction(ins); } }