1 /*
2  * Copyright © 2018 Timothy Arceri
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "util/u_dynarray.h"
28 #include "util/u_math.h"
29 #define XXH_INLINE_ALL
30 #include "util/xxhash.h"
31 
32 /** @file nir_opt_vectorize_io.c
33  *
34  * Replaces scalar nir_load_input/nir_store_output operations with
35  * vectorized instructions.
36  */
37 bool
38 r600_vectorize_vs_inputs(nir_shader *shader);
39 
40 static nir_deref_instr *
r600_clone_deref_array(nir_builder * b,nir_deref_instr * dst_tail,const nir_deref_instr * src_head)41 r600_clone_deref_array(nir_builder *b, nir_deref_instr *dst_tail,
42                   const nir_deref_instr *src_head)
43 {
44    const nir_deref_instr *parent = nir_deref_instr_parent(src_head);
45 
46    if (!parent)
47       return dst_tail;
48 
49    assert(src_head->deref_type == nir_deref_type_array);
50 
51    dst_tail = r600_clone_deref_array(b, dst_tail, parent);
52 
53    return nir_build_deref_array(b, dst_tail,
54                                 nir_ssa_for_src(b, src_head->arr.index, 1));
55 }
56 
57 static bool
r600_variable_can_rewrite(nir_variable * var)58 r600_variable_can_rewrite(nir_variable *var)
59 {
60 
61    /* Skip complex types we don't split in the first place */
62    if (!glsl_type_is_vector_or_scalar(glsl_without_array(var->type)))
63       return false;
64 
65 
66    /* TODO: add 64/16bit support ? */
67    if (glsl_get_bit_size(glsl_without_array(var->type)) != 32)
68       return false;
69 
70    /* We only check VSand attribute imputs */
71    return (var->data.location >= VERT_ATTRIB_GENERIC0 &&
72            var->data.location <= VERT_ATTRIB_GENERIC15);
73 }
74 
75 static bool
r600_instr_can_rewrite(nir_instr * instr)76 r600_instr_can_rewrite(nir_instr *instr)
77 {
78    if (instr->type != nir_instr_type_intrinsic)
79       return false;
80 
81    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
82 
83    if (intr->num_components > 3)
84       return false;
85 
86    if (intr->intrinsic != nir_intrinsic_load_deref)
87       return false;
88 
89    nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
90    if (!nir_deref_mode_is(deref, nir_var_shader_in))
91       return false;
92 
93    return r600_variable_can_rewrite(nir_deref_instr_get_variable(deref));
94 }
95 
96 static bool
r600_io_access_same_var(const nir_instr * instr1,const nir_instr * instr2)97 r600_io_access_same_var(const nir_instr *instr1, const nir_instr *instr2)
98 {
99    assert(instr1->type == nir_instr_type_intrinsic &&
100           instr2->type == nir_instr_type_intrinsic);
101 
102    nir_intrinsic_instr *intr1 = nir_instr_as_intrinsic(instr1);
103    nir_intrinsic_instr *intr2 = nir_instr_as_intrinsic(instr2);
104 
105    nir_variable *var1 =
106       nir_deref_instr_get_variable(nir_src_as_deref(intr1->src[0]));
107    nir_variable *var2 =
108       nir_deref_instr_get_variable(nir_src_as_deref(intr2->src[0]));
109 
110    /* We don't handle combining vars of different base types, so skip those */
111    if (glsl_get_base_type(var1->type) != glsl_get_base_type(var2->type))
112       return false;
113 
114    if (var1->data.location != var2->data.location)
115       return false;
116 
117    return true;
118 }
119 
120 static struct util_dynarray *
r600_vec_instr_stack_create(void * mem_ctx)121 r600_vec_instr_stack_create(void *mem_ctx)
122 {
123    struct util_dynarray *stack = ralloc(mem_ctx, struct util_dynarray);
124    util_dynarray_init(stack, mem_ctx);
125    return stack;
126 }
127 
128 static void
r600_vec_instr_stack_push(struct util_dynarray * stack,nir_instr * instr)129 r600_vec_instr_stack_push(struct util_dynarray *stack, nir_instr *instr)
130 {
131    util_dynarray_append(stack, nir_instr *, instr);
132 }
133 
r600_correct_location(nir_variable * var)134 static unsigned r600_correct_location(nir_variable *var)
135 {
136    return var->data.location - VERT_ATTRIB_GENERIC0;
137 }
138 
139 static void
r600_create_new_load(nir_builder * b,nir_intrinsic_instr * intr,nir_variable * var,unsigned comp,unsigned num_comps,unsigned old_num_comps)140 r600_create_new_load(nir_builder *b, nir_intrinsic_instr *intr, nir_variable *var,
141                 unsigned comp, unsigned num_comps, unsigned old_num_comps)
142 {
143    unsigned channels[4];
144 
145    b->cursor = nir_before_instr(&intr->instr);
146 
147    assert(intr->dest.is_ssa);
148 
149    nir_intrinsic_instr *new_intr =
150       nir_intrinsic_instr_create(b->shader, intr->intrinsic);
151    nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, num_comps,
152                      intr->dest.ssa.bit_size, NULL);
153    new_intr->num_components = num_comps;
154 
155    nir_deref_instr *deref = nir_build_deref_var(b, var);
156    deref = r600_clone_deref_array(b, deref, nir_src_as_deref(intr->src[0]));
157 
158    new_intr->src[0] = nir_src_for_ssa(&deref->dest.ssa);
159 
160    if (intr->intrinsic == nir_intrinsic_interp_deref_at_offset ||
161        intr->intrinsic == nir_intrinsic_interp_deref_at_sample)
162       nir_src_copy(&new_intr->src[1], &intr->src[1], &new_intr->instr);
163 
164    nir_builder_instr_insert(b, &new_intr->instr);
165 
166    for (unsigned i = 0; i < old_num_comps; ++i)
167       channels[i] = comp - var->data.location_frac + i;
168    nir_ssa_def *load = nir_swizzle(b, &new_intr->dest.ssa, channels, old_num_comps);
169    nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load));
170 
171    /* Remove the old load intrinsic */
172    nir_instr_remove(&intr->instr);
173 }
174 
175 
176 static bool
r600_vec_instr_stack_pop(nir_builder * b,struct util_dynarray * stack,nir_instr * instr,nir_variable * updated_vars[16][4])177 r600_vec_instr_stack_pop(nir_builder *b, struct util_dynarray *stack,
178                          nir_instr *instr,
179                          nir_variable *updated_vars[16][4])
180 {
181    nir_instr *last = util_dynarray_pop(stack, nir_instr *);
182 
183    assert(last == instr);
184    assert(last->type == nir_instr_type_intrinsic);
185 
186    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(last);
187    nir_variable *var =
188       nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
189    unsigned loc = r600_correct_location(var);
190 
191    nir_variable *new_var;
192    new_var = updated_vars[loc][var->data.location_frac];
193 
194    unsigned num_comps =
195       glsl_get_vector_elements(glsl_without_array(new_var->type));
196 
197    unsigned old_num_comps =
198          glsl_get_vector_elements(glsl_without_array(var->type));
199 
200    /* Don't bother walking the stack if this component can't be vectorised. */
201    if (old_num_comps > 3) {
202       return false;
203    }
204 
205    if (new_var == var) {
206       return false;
207    }
208 
209    r600_create_new_load(b, intr, new_var, var->data.location_frac,
210                         num_comps, old_num_comps);
211    return true;
212 }
213 
214 static bool
r600_cmp_func(const void * data1,const void * data2)215 r600_cmp_func(const void *data1, const void *data2)
216 {
217    const struct util_dynarray *arr1 = data1;
218    const struct util_dynarray *arr2 = data2;
219 
220    const nir_instr *instr1 = *(nir_instr **)util_dynarray_begin(arr1);
221    const nir_instr *instr2 = *(nir_instr **)util_dynarray_begin(arr2);
222 
223    return r600_io_access_same_var(instr1, instr2);
224 }
225 
226 #define HASH(hash, data) XXH32(&(data), sizeof(data), (hash))
227 
228 static uint32_t
r600_hash_instr(const nir_instr * instr)229 r600_hash_instr(const nir_instr *instr)
230 {
231    assert(instr->type == nir_instr_type_intrinsic);
232 
233    nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
234    nir_variable *var =
235       nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));
236 
237    uint32_t hash = 0;
238 
239    hash = HASH(hash, var->type);
240    return HASH(hash, var->data.location);
241 }
242 
243 static uint32_t
r600_hash_stack(const void * data)244 r600_hash_stack(const void *data)
245 {
246    const struct util_dynarray *stack = data;
247    const nir_instr *first = *(nir_instr **)util_dynarray_begin(stack);
248    return r600_hash_instr(first);
249 }
250 
251 static struct set *
r600_vec_instr_set_create(void)252 r600_vec_instr_set_create(void)
253 {
254    return _mesa_set_create(NULL, r600_hash_stack, r600_cmp_func);
255 }
256 
257 static void
r600_vec_instr_set_destroy(struct set * instr_set)258 r600_vec_instr_set_destroy(struct set *instr_set)
259 {
260    _mesa_set_destroy(instr_set, NULL);
261 }
262 
263 static void
r600_vec_instr_set_add(struct set * instr_set,nir_instr * instr)264 r600_vec_instr_set_add(struct set *instr_set, nir_instr *instr)
265 {
266    if (!r600_instr_can_rewrite(instr)) {
267       return;
268    }
269 
270    struct util_dynarray *new_stack = r600_vec_instr_stack_create(instr_set);
271    r600_vec_instr_stack_push(new_stack, instr);
272 
273    struct set_entry *entry = _mesa_set_search(instr_set, new_stack);
274 
275    if (entry) {
276       ralloc_free(new_stack);
277       struct util_dynarray *stack = (struct util_dynarray *) entry->key;
278       r600_vec_instr_stack_push(stack, instr);
279       return;
280    }
281 
282    _mesa_set_add(instr_set, new_stack);
283 
284    return;
285 }
286 
287 static bool
r600_vec_instr_set_remove(nir_builder * b,struct set * instr_set,nir_instr * instr,nir_variable * updated_vars[16][4])288 r600_vec_instr_set_remove(nir_builder *b, struct set *instr_set, nir_instr *instr,
289                           nir_variable *updated_vars[16][4])
290 {
291    if (!r600_instr_can_rewrite(instr)) {
292       return false;
293    }
294    /*
295     * It's pretty unfortunate that we have to do this, but it's a side effect
296     * of the hash set interfaces. The hash set assumes that we're only
297     * interested in storing one equivalent element at a time, and if we try to
298     * insert a duplicate element it will remove the original. We could hack up
299     * the comparison function to "know" which input is an instruction we
300     * passed in and which is an array that's part of the entry, but that
301     * wouldn't work because we need to pass an array to _mesa_set_add() in
302     * vec_instr_add() above, and _mesa_set_add() will call our comparison
303     * function as well.
304     */
305    struct util_dynarray *temp = r600_vec_instr_stack_create(instr_set);
306    r600_vec_instr_stack_push(temp, instr);
307    struct set_entry *entry = _mesa_set_search(instr_set, temp);
308    ralloc_free(temp);
309 
310    if (entry) {
311       struct util_dynarray *stack = (struct util_dynarray *) entry->key;
312       bool progress = r600_vec_instr_stack_pop(b, stack, instr, updated_vars);
313 
314       if (!util_dynarray_num_elements(stack, nir_instr *))
315          _mesa_set_remove(instr_set, entry);
316 
317       return progress;
318    }
319 
320    return false;
321 }
322 
323 static bool
r600_vectorize_block(nir_builder * b,nir_block * block,struct set * instr_set,nir_variable * updated_vars[16][4])324 r600_vectorize_block(nir_builder *b, nir_block *block, struct set *instr_set,
325                 nir_variable *updated_vars[16][4])
326 {
327    bool progress = false;
328 
329    nir_foreach_instr_safe(instr, block) {
330       r600_vec_instr_set_add(instr_set, instr);
331    }
332 
333    for (unsigned i = 0; i < block->num_dom_children; i++) {
334       nir_block *child = block->dom_children[i];
335       progress |= r600_vectorize_block(b, child, instr_set, updated_vars);
336    }
337 
338    nir_foreach_instr_reverse_safe(instr, block) {
339       progress |= r600_vec_instr_set_remove(b, instr_set, instr, updated_vars);
340    }
341 
342    return progress;
343 }
344 
345 static void
r600_create_new_io_var(nir_shader * shader,nir_variable * vars[16][4],unsigned location,unsigned comps)346 r600_create_new_io_var(nir_shader *shader,
347                   nir_variable *vars[16][4],
348                   unsigned location, unsigned comps)
349 {
350    unsigned num_comps = util_bitcount(comps);
351    assert(num_comps > 1);
352 
353    /* Note: u_bit_scan() strips a component of the comps bitfield here */
354    unsigned first_comp = u_bit_scan(&comps);
355 
356    nir_variable *var = nir_variable_clone(vars[location][first_comp], shader);
357    var->data.location_frac = first_comp;
358    var->type = glsl_replace_vector_type(var->type, num_comps);
359 
360    nir_shader_add_variable(shader, var);
361 
362    vars[location][first_comp] = var;
363 
364    while (comps) {
365       const int comp = u_bit_scan(&comps);
366       if (vars[location][comp]) {
367          vars[location][comp] = var;
368       }
369    }
370 }
371 
372 static inline bool
r600_variables_can_merge(const nir_variable * lhs,const nir_variable * rhs)373 r600_variables_can_merge(const nir_variable *lhs, const nir_variable *rhs)
374 {
375    return (glsl_get_base_type(lhs->type) == glsl_get_base_type(rhs->type));
376 }
377 
378 static void
r600_create_new_io_vars(nir_shader * shader,nir_variable_mode mode,nir_variable * vars[16][4])379 r600_create_new_io_vars(nir_shader *shader, nir_variable_mode mode,
380                    nir_variable *vars[16][4])
381 {
382    bool can_rewrite_vars = false;
383    nir_foreach_variable_with_modes(var, shader, mode) {
384       if (r600_variable_can_rewrite(var)) {
385          can_rewrite_vars = true;
386          unsigned loc = r600_correct_location(var);
387          vars[loc][var->data.location_frac] = var;
388       }
389    }
390 
391    if (!can_rewrite_vars)
392       return;
393 
394    /* We don't handle combining vars of different type e.g. different array
395     * lengths.
396     */
397    for (unsigned i = 0; i < 16; i++) {
398       unsigned comps = 0;
399 
400       for (unsigned j = 0; j < 3; j++) {
401 
402          if (!vars[i][j])
403             continue;
404 
405          for (unsigned k = j + 1; k < 4; k++) {
406             if (!vars[i][k])
407                continue;
408 
409             if (!r600_variables_can_merge(vars[i][j], vars[i][k]))
410                continue;
411 
412             /* Set comps */
413             for (unsigned n = 0; n < glsl_get_components(vars[i][j]->type); ++n)
414                comps |= 1 << (vars[i][j]->data.location_frac + n);
415 
416             for (unsigned n = 0; n < glsl_get_components(vars[i][k]->type); ++n)
417                comps |= 1 << (vars[i][k]->data.location_frac + n);
418 
419          }
420       }
421       if (comps)
422          r600_create_new_io_var(shader, vars, i, comps);
423    }
424 }
425 
426 static bool
r600_vectorize_io_impl(nir_function_impl * impl)427 r600_vectorize_io_impl(nir_function_impl *impl)
428 {
429    nir_builder b;
430    nir_builder_init(&b, impl);
431 
432    nir_metadata_require(impl, nir_metadata_dominance);
433 
434    nir_shader *shader = impl->function->shader;
435    nir_variable *updated_vars[16][4] = {0};
436 
437    r600_create_new_io_vars(shader, nir_var_shader_in, updated_vars);
438 
439    struct set *instr_set = r600_vec_instr_set_create();
440    bool progress = r600_vectorize_block(&b, nir_start_block(impl), instr_set,
441                                         updated_vars);
442 
443    if (progress) {
444       nir_metadata_preserve(impl, nir_metadata_block_index |
445                                   nir_metadata_dominance);
446    }
447 
448    r600_vec_instr_set_destroy(instr_set);
449    return false;
450 }
451 
452 bool
r600_vectorize_vs_inputs(nir_shader * shader)453 r600_vectorize_vs_inputs(nir_shader *shader)
454 {
455    bool progress = false;
456 
457    if (shader->info.stage != MESA_SHADER_VERTEX)
458       return false;
459 
460    nir_foreach_function(function, shader) {
461       if (function->impl)
462          progress |= r600_vectorize_io_impl(function->impl);
463    }
464 
465    return progress;
466 }
467