1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *    Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 
29 /* Midgard's generic load/store instructions, particularly to implement SSBOs
30  * and globals, have support for address arithmetic natively. In particularly,
31  * they take two indirect arguments A, B and two immediates #s, #c, calculating
32  * the address:
33  *
34  *      A + (zext?(B) << #s) + #c
35  *
36  * This allows for fast indexing into arrays. This file tries to pattern match the offset in NIR with this form to reduce pressure on the ALU pipe.
37  */
38 
39 struct mir_address {
40         nir_ssa_scalar A;
41         nir_ssa_scalar B;
42 
43         bool zext;
44         unsigned shift;
45         unsigned bias;
46 };
47 
48 static bool
mir_args_ssa(nir_ssa_scalar s,unsigned count)49 mir_args_ssa(nir_ssa_scalar s, unsigned count)
50 {
51         nir_alu_instr *alu = nir_instr_as_alu(s.def->parent_instr);
52         assert(count <= nir_op_infos[alu->op].num_inputs);
53 
54         for (unsigned i = 0; i < count; ++i) {
55                 if (!alu->src[i].src.is_ssa)
56                         return false;
57         }
58 
59         return true;
60 }
61 
62 /* Matches a constant in either slot and moves it to the bias */
63 
64 static void
mir_match_constant(struct mir_address * address)65 mir_match_constant(struct mir_address *address)
66 {
67         if (address->A.def && nir_ssa_scalar_is_const(address->A)) {
68                 address->bias += nir_ssa_scalar_as_uint(address->A);
69                 address->A.def = NULL;
70         }
71 
72         if (address->B.def && nir_ssa_scalar_is_const(address->B)) {
73                 address->bias += nir_ssa_scalar_as_uint(address->B);
74                 address->B.def = NULL;
75         }
76 }
77 
78 /* Matches an iadd when there is a free slot or constant */
79 
80 static void
mir_match_iadd(struct mir_address * address,bool first_free)81 mir_match_iadd(struct mir_address *address, bool first_free)
82 {
83         if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
84                 return;
85 
86         if (!mir_args_ssa(address->B, 2))
87                 return;
88 
89         nir_op op = nir_ssa_scalar_alu_op(address->B);
90 
91         if (op != nir_op_iadd) return;
92 
93         nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
94         nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
95 
96         if (nir_ssa_scalar_is_const(op1)) {
97                 address->bias += nir_ssa_scalar_as_uint(op1);
98                 address->B = op2;
99         } else if (nir_ssa_scalar_is_const(op2)) {
100                 address->bias += nir_ssa_scalar_as_uint(op2);
101                 address->B = op1;
102         } else if (first_free && !address->A.def) {
103                 address->A = op1;
104                 address->B = op2;
105         }
106 }
107 
108 /* Matches u2u64 and sets zext */
109 
110 static void
mir_match_u2u64(struct mir_address * address)111 mir_match_u2u64(struct mir_address *address)
112 {
113         if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
114                 return;
115 
116         if (!mir_args_ssa(address->B, 1))
117                 return;
118 
119         nir_op op = nir_ssa_scalar_alu_op(address->B);
120         if (op != nir_op_u2u64) return;
121         nir_ssa_scalar arg = nir_ssa_scalar_chase_alu_src(address->B, 0);
122 
123         address->B = arg;
124         address->zext = true;
125 }
126 
127 /* Matches ishl to shift */
128 
129 static void
mir_match_ishl(struct mir_address * address)130 mir_match_ishl(struct mir_address *address)
131 {
132         if (!address->B.def || !nir_ssa_scalar_is_alu(address->B))
133                 return;
134 
135         if (!mir_args_ssa(address->B, 2))
136                 return;
137 
138         nir_op op = nir_ssa_scalar_alu_op(address->B);
139         if (op != nir_op_ishl) return;
140         nir_ssa_scalar op1 = nir_ssa_scalar_chase_alu_src(address->B, 0);
141         nir_ssa_scalar op2 = nir_ssa_scalar_chase_alu_src(address->B, 1);
142 
143         if (!nir_ssa_scalar_is_const(op2)) return;
144 
145         unsigned shift = nir_ssa_scalar_as_uint(op2);
146         if (shift > 0x7) return;
147 
148         address->B = op1;
149         address->shift = shift;
150 }
151 
152 /* Strings through mov which can happen from NIR vectorization */
153 
154 static void
mir_match_mov(struct mir_address * address)155 mir_match_mov(struct mir_address *address)
156 {
157         if (address->A.def && nir_ssa_scalar_is_alu(address->A)) {
158                 nir_op op = nir_ssa_scalar_alu_op(address->A);
159 
160                 if (op == nir_op_mov && mir_args_ssa(address->A, 1))
161                         address->A = nir_ssa_scalar_chase_alu_src(address->A, 0);
162         }
163 
164         if (address->B.def && nir_ssa_scalar_is_alu(address->B)) {
165                 nir_op op = nir_ssa_scalar_alu_op(address->B);
166 
167                 if (op == nir_op_mov && mir_args_ssa(address->B, 1))
168                         address->B = nir_ssa_scalar_chase_alu_src(address->B, 0);
169         }
170 }
171 
172 /* Tries to pattern match into mir_address */
173 
174 static struct mir_address
mir_match_offset(nir_ssa_def * offset,bool first_free)175 mir_match_offset(nir_ssa_def *offset, bool first_free)
176 {
177         struct mir_address address = {
178                 .B = { .def = offset }
179         };
180 
181         mir_match_mov(&address);
182         mir_match_constant(&address);
183         mir_match_mov(&address);
184         mir_match_iadd(&address, first_free);
185         mir_match_mov(&address);
186         mir_match_u2u64(&address);
187         mir_match_mov(&address);
188         mir_match_ishl(&address);
189 
190         return address;
191 }
192 
193 void
mir_set_offset(compiler_context * ctx,midgard_instruction * ins,nir_src * offset,bool is_shared)194 mir_set_offset(compiler_context *ctx, midgard_instruction *ins, nir_src *offset, bool is_shared)
195 {
196         for(unsigned i = 0; i < 16; ++i) {
197                 ins->swizzle[1][i] = 0;
198                 ins->swizzle[2][i] = 0;
199         }
200 
201         bool force_zext = (nir_src_bit_size(*offset) < 64);
202 
203         if (!offset->is_ssa) {
204                 ins->load_store.arg_1 |= is_shared ? 0x6E : 0x7E;
205                 ins->src[2] = nir_src_index(ctx, offset);
206                 ins->src_types[2] = nir_type_uint | nir_src_bit_size(*offset);
207 
208                 if (force_zext)
209                         ins->load_store.arg_1 |= 0x80;
210 
211                 return;
212         }
213 
214         struct mir_address match = mir_match_offset(offset->ssa, !is_shared);
215 
216         if (match.A.def) {
217                 ins->src[1] = nir_ssa_index(match.A.def);
218                 ins->swizzle[1][0] = match.A.comp;
219                 ins->src_types[1] = nir_type_uint | match.A.def->bit_size;
220         } else
221                 ins->load_store.arg_1 |= is_shared ? 0x6E : 0x7E;
222 
223         if (match.B.def) {
224                 ins->src[2] = nir_ssa_index(match.B.def);
225                 ins->swizzle[2][0] = match.B.comp;
226                 ins->src_types[2] = nir_type_uint | match.B.def->bit_size;
227         } else
228                 ins->load_store.arg_2 = 0x1E;
229 
230         if (match.zext || force_zext)
231                 ins->load_store.arg_1 |= 0x80;
232 
233         assert(match.shift <= 7);
234         ins->load_store.arg_2 |= (match.shift) << 5;
235 
236         ins->constants.u32[0] = match.bias;
237 }
238