1 /*
2  * Copyright (C) 2020 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "compiler.h"
25 
26 /* NIR creates vectors as vecN ops, which we represent by a synthetic
27  * BI_COMBINE instruction, e.g.:
28  *
29  *      v = combine x, y, z, w
30  *
31  * These combines need to be lowered by the pass in this file. Fix a given
32  * source at component c.
33  *
34  * First suppose the source is SSA. If it is also scalar, then we may rewrite
35  * the destination of the generating instruction (unique by SSA+scalar) to
36  * write to v.c, and rewrite each of its uses to swizzle out .c instead of .x
37  * (the original by scalar). If it is vector, there are two cases. If the
38  * component c is `x`, we are accessing v.x, and each of the succeeding
39  * components y, z... up to the last component of the vector are accessed
40  * sequentially, then we may perform the same rewrite. If this is not the case,
41  * rewriting would require more complex vector features, so we fallback on a
42  * move.
43  *
44  * Otherwise is the source is not SSA, we also fallback on a move. We could
45  * probably do better.
46  */
47 
48 static void
bi_combine_mov32(bi_context * ctx,bi_instruction * parent,unsigned comp,unsigned R)49 bi_combine_mov32(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R)
50 {
51         bi_instruction move = {
52                 .type = BI_MOV,
53                 .dest = R,
54                 .dest_type = nir_type_uint32,
55                 .dest_offset = comp,
56                 .src = { parent->src[comp] },
57                 .src_types = { nir_type_uint32 },
58                 .swizzle = { { parent->swizzle[comp][0] } }
59         };
60 
61         bi_emit_before(ctx, parent, move);
62 }
63 
64 static void
bi_combine_sel16(bi_context * ctx,bi_instruction * parent,unsigned comp,unsigned R)65 bi_combine_sel16(bi_context *ctx, bi_instruction *parent, unsigned comp, unsigned R)
66 {
67         bi_instruction sel = {
68                 .type = BI_SELECT,
69                 .dest = R,
70                 .dest_type = nir_type_uint32,
71                 .dest_offset = comp >> 1,
72                 .src = { parent->src[comp], parent->src[comp + 1] },
73                 .src_types = { nir_type_uint16, nir_type_uint16 },
74                 .swizzle = {
75                         { parent->swizzle[comp][0] },
76                         { parent->swizzle[comp + 1][0] },
77                 }
78         };
79 
80         /* In case we have a combine from a vec3 */
81         if (!sel.src[1])
82                 sel.src[1] = BIR_INDEX_ZERO;
83 
84         bi_emit_before(ctx, parent, sel);
85 }
86 
87 /* Copies result of combine from the temp R to the instruction destination,
88  * given a bitsize sz */
89 
90 static void
bi_combine_copy(bi_context * ctx,bi_instruction * ins,unsigned R,unsigned sz)91 bi_combine_copy(bi_context *ctx, bi_instruction *ins, unsigned R, unsigned sz)
92 {
93         bi_foreach_src(ins, s) {
94                 if (!ins->src[s])
95                         continue;
96 
97                 /* Iterate by 32-bits */
98                 unsigned shift = (sz == 8) ? 2 :
99                         (sz == 16) ? 1 : 0;
100 
101                 if (s & ((1 << shift) - 1))
102                         continue;
103 
104                 bi_instruction copy = {
105                         .type = BI_MOV,
106                         .dest = ins->dest,
107                         .dest_type = nir_type_uint32,
108                         .dest_offset = s >> shift,
109                         .src = { R },
110                         .src_types = { nir_type_uint32 },
111                         .swizzle = { { s >> shift } }
112                 };
113 
114                 bi_emit_before(ctx, ins, copy);
115         }
116 }
117 
118 void
bi_lower_combine(bi_context * ctx,bi_block * block)119 bi_lower_combine(bi_context *ctx, bi_block *block)
120 {
121         bi_foreach_instr_in_block_safe(block, ins) {
122                 if (ins->type != BI_COMBINE) continue;
123 
124                 /* If a register COMBINE reads its own output, we need a
125                  * temporary move to allow for swapping. TODO: Could do a bit
126                  * better for pairwise swaps of 16-bit vectors */
127                 bool reads_self = false;
128 
129                 bi_foreach_src(ins, s) {
130                         if(ins->src[s] == ins->dest)
131                                 reads_self = true;
132                 }
133 
134                 bool needs_rewrite = !(ins->dest & PAN_IS_REG);
135                 bool needs_copy = (ins->dest & PAN_IS_REG) && reads_self;
136                 bool needs_temp = needs_rewrite || needs_copy;
137 
138                 unsigned R = needs_temp ? bi_make_temp_reg(ctx) : ins->dest;
139                 unsigned sz = nir_alu_type_get_type_size(ins->dest_type);
140 
141                 bi_foreach_src(ins, s) {
142                         /* We're done early for vec2/3 */
143                         if (!ins->src[s])
144                                 continue;
145 
146                         if (sz == 32)
147                                 bi_combine_mov32(ctx, ins, s, R);
148                         else if (sz == 16) {
149                                 bi_combine_sel16(ctx, ins, s, R);
150                                 s++;
151                         } else {
152                                 unreachable("Unknown COMBINE size");
153                         }
154                 }
155 
156                 if (needs_rewrite)
157                         bi_rewrite_uses(ctx, ins->dest, 0, R, 0);
158                 else if (needs_copy)
159                         bi_combine_copy(ctx, ins, R, sz);
160 
161                 bi_remove_instruction(ins);
162         }
163 }
164