1 /*
2 * Copyright © 2018 Timothy Arceri
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29 #define XXH_INLINE_ALL
30 #include "util/xxhash.h"
31
32 /** @file nir_opt_vectorize_io.c
33 *
34 * Replaces scalar nir_load_input/nir_store_output operations with
35 * vectorized instructions.
36 */
37 bool
38 r600_vectorize_vs_inputs(nir_shader *shader);
39
40 static nir_deref_instr *
r600_clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)41 r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
42 const nir_deref_instr *src_head)
43 {
44 const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
45
46 if (!parent)
47 return dst_tail;
48
49 assert(src_head->deref_type == nir_deref_type_array);
50
51 dst_tail = r600_clone_deref_array(b, dst_tail, parent);
52
53 return nir_build_deref_array(b, dst_tail,
54 nir_ssa_for_src(b, src_head->arr.index, 1));
55 }
56
57 static bool
r600_variable_can_rewrite(nir_variable * var)58 r600_variable_can_rewrite(nir_variable *var)
59 {
60
61 /* Skip complex types we don't split in the first place */
62 if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
63 return false;
64
65
66 /* TODO: add 64/16bit support ? */
67 if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
68 return false;
69
70 /* We only check VSand attribute imputs */
71 return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
72 var->data.location <= VERT_ATTRIB_GENERIC15);
73 }
74
75 static bool
r600_instr_can_rewrite(nir_instr * instr)76 r600_instr_can_rewrite(nir_instr *instr)
77 {
78 if (instr->type != nir_instr_type_intrinsic)
79 return false;
80
81 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
82
83 if (intr->num_components > 3)
84 return false;
85
86 if (intr->intrinsic != nir_intrinsic_load_deref)
87 return false;
88
89 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
90 if (!nir_deref_mode_is(deref, nir_var_shader_in))
91 return false;
92
93 return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
94 }
95
96 static bool
r600_io_access_same_var(const nir_instr * instr1,const nir_instr * instr2)97 r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
98 {
99 assert(instr1->type == nir_instr_type_intrinsic &&
100 instr2->type == nir_instr_type_intrinsic);
101
102 nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
103 nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
104
105 nir_variable *var1 =
106 nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
107 nir_variable *var2 =
108 nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
109
110 /* We don't handle combining vars of different base types, so skip those */
111 if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
112 return false;
113
114 if (var1->data.location != var2->data.location)
115 return false;
116
117 return true;
118 }
119
120 static struct util_dynarray *
r600_vec_instr_stack_create(void * mem_ctx)121 r600_vec_instr_stack_create(void *mem_ctx)
122 {
123 struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
124 util_dynarray_init(stack, mem_ctx);
125 return stack;
126 }
127
128 static void
r600_vec_instr_stack_push(struct util_dynarray * stack,nir_instr * instr)129 r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
130 {
131 util_dynarray_append(stack, nir_instr *, instr);
132 }
133
r600_correct_location(nir_variable * var)134 static unsigned r600_correct_location(nir_variable *var)
135 {
136 return var->data.location - VERT_ATTRIB_GENERIC0;
137 }
138
139 static void
r600_create_new_load(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,unsigned comp,unsigned num_comps,unsigned old_num_comps)140 r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
141 unsigned comp, unsigned num_comps, unsigned old_num_comps)
142 {
143 unsigned channels[4];
144
145 b->cursor = nir_before_instr(&intr->instr);
146
147 assert(intr->dest.is_ssa);
148
149 nir_intrinsic_instr *new_intr =
150 nir_intrinsic_instr_create(b->shader, intr->intrinsic);
151 nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
152 intr->dest.ssa.bit_size, NULL);
153 new_intr->num_components = num_comps;
154
155 nir_deref_instr *deref = nir_build_deref_var(b, var);
156 deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
157
158 new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
159
160 if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
161 intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
162 nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
163
164 nir_builder_instr_insert(b, &new_intr->instr);
165
166 for (unsigned i = 0; i < old_num_comps; ++i)
167 channels[i] = comp - var->data.location_frac + i;
168 nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
169 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
170
171 /* Remove the old load intrinsic */
172 nir_instr_remove(&intr->instr);
173 }
174
175
176 static bool
r600_vec_instr_stack_pop(nir_builder * b,struct util_dynarray * stack,nir_instr * instr,nir_variable * updated_vars[16][4])177 r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
178 nir_instr *instr,
179 nir_variable *updated_vars[16][4])
180 {
181 nir_instr *last = util_dynarray_pop(stack, nir_instr *);
182
183 assert(last == instr);
184 assert(last->type == nir_instr_type_intrinsic);
185
186 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
187 nir_variable *var =
188 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
189 unsigned loc = r600_correct_location(var);
190
191 nir_variable *new_var;
192 new_var = updated_vars[loc][var->data.location_frac];
193
194 unsigned num_comps =
195 glsl_get_vector_elements(glsl_without_array(new_var->type));
196
197 unsigned old_num_comps =
198 glsl_get_vector_elements(glsl_without_array(var->type));
199
200 /* Don't bother walking the stack if this component can't be vectorised. */
201 if (old_num_comps > 3) {
202 return false;
203 }
204
205 if (new_var == var) {
206 return false;
207 }
208
209 r600_create_new_load(b, intr, new_var, var->data.location_frac,
210 num_comps, old_num_comps);
211 return true;
212 }
213
214 static bool
r600_cmp_func(const void * data1,const void * data2)215 r600_cmp_func(const void *data1, const void *data2)
216 {
217 const struct util_dynarray *arr1 = data1;
218 const struct util_dynarray *arr2 = data2;
219
220 const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
221 const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
222
223 return r600_io_access_same_var(instr1, instr2);
224 }
225
226 #define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
227
228 static uint32_t
r600_hash_instr(const nir_instr * instr)229 r600_hash_instr(const nir_instr *instr)
230 {
231 assert(instr->type == nir_instr_type_intrinsic);
232
233 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
234 nir_variable *var =
235 nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
236
237 uint32_t hash = 0;
238
239 hash = HASH(hash, var->type);
240 return HASH(hash, var->data.location);
241 }
242
243 static uint32_t
r600_hash_stack(const void * data)244 r600_hash_stack(const void *data)
245 {
246 const struct util_dynarray *stack = data;
247 const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
248 return r600_hash_instr(first);
249 }
250
251 static struct set *
r600_vec_instr_set_create(void)252 r600_vec_instr_set_create(void)
253 {
254 return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
255 }
256
257 static void
r600_vec_instr_set_destroy(struct set * instr_set)258 r600_vec_instr_set_destroy(struct set *instr_set)
259 {
260 _mesa_set_destroy(instr_set, NULL);
261 }
262
263 static void
r600_vec_instr_set_add(struct set * instr_set,nir_instr * instr)264 r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
265 {
266 if (!r600_instr_can_rewrite(instr)) {
267 return;
268 }
269
270 struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
271 r600_vec_instr_stack_push(new_stack, instr);
272
273 struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
274
275 if (entry) {
276 ralloc_free(new_stack);
277 struct util_dynarray *stack = (struct util_dynarray *) entry->key;
278 r600_vec_instr_stack_push(stack, instr);
279 return;
280 }
281
282 _mesa_set_add(instr_set, new_stack);
283
284 return;
285 }
286
287 static bool
r600_vec_instr_set_remove(nir_builder * b,struct set * instr_set,nir_instr * instr,nir_variable * updated_vars[16][4])288 r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
289 nir_variable *updated_vars[16][4])
290 {
291 if (!r600_instr_can_rewrite(instr)) {
292 return false;
293 }
294 /*
295 * It's pretty unfortunate that we have to do this, but it's a side effect
296 * of the hash set interfaces. The hash set assumes that we're only
297 * interested in storing one equivalent element at a time, and if we try to
298 * insert a duplicate element it will remove the original. We could hack up
299 * the comparison function to "know" which input is an instruction we
300 * passed in and which is an array that's part of the entry, but that
301 * wouldn't work because we need to pass an array to _mesa_set_add() in
302 * vec_instr_add() above, and _mesa_set_add() will call our comparison
303 * function as well.
304 */
305 struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
306 r600_vec_instr_stack_push(temp, instr);
307 struct set_entry *entry = _mesa_set_search(instr_set, temp);
308 ralloc_free(temp);
309
310 if (entry) {
311 struct util_dynarray *stack = (struct util_dynarray *) entry->key;
312 bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
313
314 if (!util_dynarray_num_elements(stack, nir_instr *))
315 _mesa_set_remove(instr_set, entry);
316
317 return progress;
318 }
319
320 return false;
321 }
322
323 static bool
r600_vectorize_block(nir_builder * b,nir_block * block,struct set * instr_set,nir_variable * updated_vars[16][4])324 r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
325 nir_variable *updated_vars[16][4])
326 {
327 bool progress = false;
328
329 nir_foreach_instr_safe(instr, block) {
330 r600_vec_instr_set_add(instr_set, instr);
331 }
332
333 for (unsigned i = 0; i < block->num_dom_children; i++) {
334 nir_block *child = block->dom_children[i];
335 progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
336 }
337
338 nir_foreach_instr_reverse_safe(instr, block) {
339 progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
340 }
341
342 return progress;
343 }
344
345 static void
r600_create_new_io_var(nir_shader * shader,nir_variable * vars[16][4],unsigned location,unsigned comps)346 r600_create_new_io_var(nir_shader *shader,
347 nir_variable *vars[16][4],
348 unsigned location, unsigned comps)
349 {
350 unsigned num_comps = util_bitcount(comps);
351 assert(num_comps > 1);
352
353 /* Note: u_bit_scan() strips a component of the comps bitfield here */
354 unsigned first_comp = u_bit_scan(&comps);
355
356 nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
357 var->data.location_frac = first_comp;
358 var->type = glsl_replace_vector_type(var->type, num_comps);
359
360 nir_shader_add_variable(shader, var);
361
362 vars[location][first_comp] = var;
363
364 while (comps) {
365 const int comp = u_bit_scan(&comps);
366 if (vars[location][comp]) {
367 vars[location][comp] = var;
368 }
369 }
370 }
371
372 static inline bool
r600_variables_can_merge(const nir_variable * lhs,const nir_variable * rhs)373 r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
374 {
375 return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
376 }
377
378 static void
r600_create_new_io_vars(nir_shader * shader,nir_variable_mode mode,nir_variable * vars[16][4])379 r600_create_new_io_vars(nir_shader *shader, nir_variable_mode mode,
380 nir_variable *vars[16][4])
381 {
382 bool can_rewrite_vars = false;
383 nir_foreach_variable_with_modes(var, shader, mode) {
384 if (r600_variable_can_rewrite(var)) {
385 can_rewrite_vars = true;
386 unsigned loc = r600_correct_location(var);
387 vars[loc][var->data.location_frac] = var;
388 }
389 }
390
391 if (!can_rewrite_vars)
392 return;
393
394 /* We don't handle combining vars of different type e.g. different array
395 * lengths.
396 */
397 for (unsigned i = 0; i < 16; i++) {
398 unsigned comps = 0;
399
400 for (unsigned j = 0; j < 3; j++) {
401
402 if (!vars[i][j])
403 continue;
404
405 for (unsigned k = j + 1; k < 4; k++) {
406 if (!vars[i][k])
407 continue;
408
409 if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
410 continue;
411
412 /* Set comps */
413 for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
414 comps |= 1 << (vars[i][j]->data.location_frac + n);
415
416 for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
417 comps |= 1 << (vars[i][k]->data.location_frac + n);
418
419 }
420 }
421 if (comps)
422 r600_create_new_io_var(shader, vars, i, comps);
423 }
424 }
425
426 static bool
r600_vectorize_io_impl(nir_function_impl * impl)427 r600_vectorize_io_impl(nir_function_impl *impl)
428 {
429 nir_builder b;
430 nir_builder_init(&b, impl);
431
432 nir_metadata_require(impl, nir_metadata_dominance);
433
434 nir_shader *shader = impl->function->shader;
435 nir_variable *updated_vars[16][4] = {0};
436
437 r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
438
439 struct set *instr_set = r600_vec_instr_set_create();
440 bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
441 updated_vars);
442
443 if (progress) {
444 nir_metadata_preserve(impl, nir_metadata_block_index |
445 nir_metadata_dominance);
446 }
447
448 r600_vec_instr_set_destroy(instr_set);
449 return false;
450 }
451
452 bool
r600_vectorize_vs_inputs(nir_shader * shader)453 r600_vectorize_vs_inputs(nir_shader *shader)
454 {
455 bool progress = false;
456
457 if (shader->info.stage != MESA_SHADER_VERTEX)
458 return false;
459
460 nir_foreach_function(function, shader) {
461 if (function->impl)
462 progress |= r600_vectorize_io_impl(function->impl);
463 }
464
465 return progress;
466 }
467