1 /*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir.h"
25 #include "nir_builder.h"
26 #include "nir_deref.h"
27 #include "nir_vla.h"
28
29 #include "util/set.h"
30 #include "util/u_math.h"
31
32 static struct set *
get_complex_used_vars(nir_shader * shader,void * mem_ctx)33 get_complex_used_vars(nir_shader *shader, void *mem_ctx)
34 {
35 struct set *complex_vars = _mesa_pointer_set_create(mem_ctx);
36
37 nir_foreach_function(function, shader) {
38 if (!function->impl)
39 continue;
40
41 nir_foreach_block(block, function->impl) {
42 nir_foreach_instr(instr, block) {
43 if (instr->type != nir_instr_type_deref)
44 continue;
45
46 nir_deref_instr *deref = nir_instr_as_deref(instr);
47
48 /* We only need to consider var derefs because
49 * nir_deref_instr_has_complex_use is recursive.
50 */
51 if (deref->deref_type == nir_deref_type_var &&
52 nir_deref_instr_has_complex_use(deref))
53 _mesa_set_add(complex_vars, deref->var);
54 }
55 }
56 }
57
58 return complex_vars;
59 }
60
61 struct split_var_state {
62 void *mem_ctx;
63
64 nir_shader *shader;
65 nir_function_impl *impl;
66
67 nir_variable *base_var;
68 };
69
70 struct field {
71 struct field *parent;
72
73 const struct glsl_type *type;
74
75 unsigned num_fields;
76 struct field *fields;
77
78 nir_variable *var;
79 };
80
81 static const struct glsl_type *
wrap_type_in_array(const struct glsl_type * type,const struct glsl_type * array_type)82 wrap_type_in_array(const struct glsl_type *type,
83 const struct glsl_type *array_type)
84 {
85 if (!glsl_type_is_array(array_type))
86 return type;
87
88 const struct glsl_type *elem_type =
89 wrap_type_in_array(type, glsl_get_array_element(array_type));
90 assert(glsl_get_explicit_stride(array_type) == 0);
91 return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
92 }
93
94 static int
num_array_levels_in_array_of_vector_type(const struct glsl_type * type)95 num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
96 {
97 int num_levels = 0;
98 while (true) {
99 if (glsl_type_is_array_or_matrix(type)) {
100 num_levels++;
101 type = glsl_get_array_element(type);
102 } else if (glsl_type_is_vector_or_scalar(type)) {
103 return num_levels;
104 } else {
105 /* Not an array of vectors */
106 return -1;
107 }
108 }
109 }
110
111 static void
init_field_for_type(struct field * field,struct field * parent,const struct glsl_type * type,const char * name,struct split_var_state * state)112 init_field_for_type(struct field *field, struct field *parent,
113 const struct glsl_type *type,
114 const char *name,
115 struct split_var_state *state)
116 {
117 *field = (struct field) {
118 .parent = parent,
119 .type = type,
120 };
121
122 const struct glsl_type *struct_type = glsl_without_array(type);
123 if (glsl_type_is_struct_or_ifc(struct_type)) {
124 field->num_fields = glsl_get_length(struct_type),
125 field->fields = ralloc_array(state->mem_ctx, struct field,
126 field->num_fields);
127 for (unsigned i = 0; i < field->num_fields; i++) {
128 char *field_name = NULL;
129 if (name) {
130 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
131 glsl_get_struct_elem_name(struct_type, i));
132 } else {
133 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
134 glsl_get_type_name(struct_type),
135 glsl_get_struct_elem_name(struct_type, i));
136 }
137 init_field_for_type(&field->fields[i], field,
138 glsl_get_struct_field(struct_type, i),
139 field_name, state);
140 }
141 } else {
142 const struct glsl_type *var_type = type;
143 for (struct field *f = field->parent; f; f = f->parent)
144 var_type = wrap_type_in_array(var_type, f->type);
145
146 nir_variable_mode mode = state->base_var->data.mode;
147 if (mode == nir_var_function_temp) {
148 field->var = nir_local_variable_create(state->impl, var_type, name);
149 } else {
150 field->var = nir_variable_create(state->shader, mode, var_type, name);
151 }
152 }
153 }
154
155 static bool
split_var_list_structs(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_field_map,struct set ** complex_vars,void * mem_ctx)156 split_var_list_structs(nir_shader *shader,
157 nir_function_impl *impl,
158 struct exec_list *vars,
159 nir_variable_mode mode,
160 struct hash_table *var_field_map,
161 struct set **complex_vars,
162 void *mem_ctx)
163 {
164 struct split_var_state state = {
165 .mem_ctx = mem_ctx,
166 .shader = shader,
167 .impl = impl,
168 };
169
170 struct exec_list split_vars;
171 exec_list_make_empty(&split_vars);
172
173 /* To avoid list confusion (we'll be adding things as we split variables),
174 * pull all of the variables we plan to split off of the list
175 */
176 nir_foreach_variable_in_list_safe(var, vars) {
177 if (var->data.mode != mode)
178 continue;
179
180 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
181 continue;
182
183 if (*complex_vars == NULL)
184 *complex_vars = get_complex_used_vars(shader, mem_ctx);
185
186 /* We can't split a variable that's referenced with deref that has any
187 * sort of complex usage.
188 */
189 if (_mesa_set_search(*complex_vars, var))
190 continue;
191
192 exec_node_remove(&var->node);
193 exec_list_push_tail(&split_vars, &var->node);
194 }
195
196 nir_foreach_variable_in_list(var, &split_vars) {
197 state.base_var = var;
198
199 struct field *root_field = ralloc(mem_ctx, struct field);
200 init_field_for_type(root_field, NULL, var->type, var->name, &state);
201 _mesa_hash_table_insert(var_field_map, var, root_field);
202 }
203
204 return !exec_list_is_empty(&split_vars);
205 }
206
207 static void
split_struct_derefs_impl(nir_function_impl * impl,struct hash_table * var_field_map,nir_variable_mode modes,void * mem_ctx)208 split_struct_derefs_impl(nir_function_impl *impl,
209 struct hash_table *var_field_map,
210 nir_variable_mode modes,
211 void *mem_ctx)
212 {
213 nir_builder b;
214 nir_builder_init(&b, impl);
215
216 nir_foreach_block(block, impl) {
217 nir_foreach_instr_safe(instr, block) {
218 if (instr->type != nir_instr_type_deref)
219 continue;
220
221 nir_deref_instr *deref = nir_instr_as_deref(instr);
222 if (!nir_deref_mode_may_be(deref, modes))
223 continue;
224
225 /* Clean up any dead derefs we find lying around. They may refer to
226 * variables we're planning to split.
227 */
228 if (nir_deref_instr_remove_if_unused(deref))
229 continue;
230
231 if (!glsl_type_is_vector_or_scalar(deref->type))
232 continue;
233
234 nir_variable *base_var = nir_deref_instr_get_variable(deref);
235 /* If we can't chase back to the variable, then we're a complex use.
236 * This should have been detected by get_complex_used_vars() and the
237 * variable should not have been split. However, we have no way of
238 * knowing that here, so we just have to trust it.
239 */
240 if (base_var == NULL)
241 continue;
242
243 struct hash_entry *entry =
244 _mesa_hash_table_search(var_field_map, base_var);
245 if (!entry)
246 continue;
247
248 struct field *root_field = entry->data;
249
250 nir_deref_path path;
251 nir_deref_path_init(&path, deref, mem_ctx);
252
253 struct field *tail_field = root_field;
254 for (unsigned i = 0; path.path[i]; i++) {
255 if (path.path[i]->deref_type != nir_deref_type_struct)
256 continue;
257
258 assert(i > 0);
259 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
260 assert(path.path[i - 1]->type ==
261 glsl_without_array(tail_field->type));
262
263 tail_field = &tail_field->fields[path.path[i]->strct.index];
264 }
265 nir_variable *split_var = tail_field->var;
266
267 nir_deref_instr *new_deref = NULL;
268 for (unsigned i = 0; path.path[i]; i++) {
269 nir_deref_instr *p = path.path[i];
270 b.cursor = nir_after_instr(&p->instr);
271
272 switch (p->deref_type) {
273 case nir_deref_type_var:
274 assert(new_deref == NULL);
275 new_deref = nir_build_deref_var(&b, split_var);
276 break;
277
278 case nir_deref_type_array:
279 case nir_deref_type_array_wildcard:
280 new_deref = nir_build_deref_follower(&b, new_deref, p);
281 break;
282
283 case nir_deref_type_struct:
284 /* Nothing to do; we're splitting structs */
285 break;
286
287 default:
288 unreachable("Invalid deref type in path");
289 }
290 }
291
292 assert(new_deref->type == deref->type);
293 nir_ssa_def_rewrite_uses(&deref->dest.ssa,
294 nir_src_for_ssa(&new_deref->dest.ssa));
295 nir_deref_instr_remove_if_unused(deref);
296 }
297 }
298 }
299
300 /** A pass for splitting structs into multiple variables
301 *
302 * This pass splits arrays of structs into multiple variables, one for each
303 * (possibly nested) structure member. After this pass completes, no
304 * variables of the given mode will contain a struct type.
305 */
306 bool
nir_split_struct_vars(nir_shader * shader,nir_variable_mode modes)307 nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
308 {
309 void *mem_ctx = ralloc_context(NULL);
310 struct hash_table *var_field_map =
311 _mesa_pointer_hash_table_create(mem_ctx);
312 struct set *complex_vars = NULL;
313
314 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
315
316 bool has_global_splits = false;
317 if (modes & nir_var_shader_temp) {
318 has_global_splits = split_var_list_structs(shader, NULL,
319 &shader->variables,
320 nir_var_shader_temp,
321 var_field_map,
322 &complex_vars,
323 mem_ctx);
324 }
325
326 bool progress = false;
327 nir_foreach_function(function, shader) {
328 if (!function->impl)
329 continue;
330
331 bool has_local_splits = false;
332 if (modes & nir_var_function_temp) {
333 has_local_splits = split_var_list_structs(shader, function->impl,
334 &function->impl->locals,
335 nir_var_function_temp,
336 var_field_map,
337 &complex_vars,
338 mem_ctx);
339 }
340
341 if (has_global_splits || has_local_splits) {
342 split_struct_derefs_impl(function->impl, var_field_map,
343 modes, mem_ctx);
344
345 nir_metadata_preserve(function->impl, nir_metadata_block_index |
346 nir_metadata_dominance);
347 progress = true;
348 } else {
349 nir_metadata_preserve(function->impl, nir_metadata_all);
350 }
351 }
352
353 ralloc_free(mem_ctx);
354
355 return progress;
356 }
357
358 struct array_level_info {
359 unsigned array_len;
360 bool split;
361 };
362
363 struct array_split {
364 /* Only set if this is the tail end of the splitting */
365 nir_variable *var;
366
367 unsigned num_splits;
368 struct array_split *splits;
369 };
370
371 struct array_var_info {
372 nir_variable *base_var;
373
374 const struct glsl_type *split_var_type;
375
376 bool split_var;
377 struct array_split root_split;
378
379 unsigned num_levels;
380 struct array_level_info levels[0];
381 };
382
383 static bool
init_var_list_array_infos(nir_shader * shader,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,struct set ** complex_vars,void * mem_ctx)384 init_var_list_array_infos(nir_shader *shader,
385 struct exec_list *vars,
386 nir_variable_mode mode,
387 struct hash_table *var_info_map,
388 struct set **complex_vars,
389 void *mem_ctx)
390 {
391 bool has_array = false;
392
393 nir_foreach_variable_in_list(var, vars) {
394 if (var->data.mode != mode)
395 continue;
396
397 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
398 if (num_levels <= 0)
399 continue;
400
401 if (*complex_vars == NULL)
402 *complex_vars = get_complex_used_vars(shader, mem_ctx);
403
404 /* We can't split a variable that's referenced with deref that has any
405 * sort of complex usage.
406 */
407 if (_mesa_set_search(*complex_vars, var))
408 continue;
409
410 struct array_var_info *info =
411 rzalloc_size(mem_ctx, sizeof(*info) +
412 num_levels * sizeof(info->levels[0]));
413
414 info->base_var = var;
415 info->num_levels = num_levels;
416
417 const struct glsl_type *type = var->type;
418 for (int i = 0; i < num_levels; i++) {
419 info->levels[i].array_len = glsl_get_length(type);
420 type = glsl_get_array_element(type);
421
422 /* All levels start out initially as split */
423 info->levels[i].split = true;
424 }
425
426 _mesa_hash_table_insert(var_info_map, var, info);
427 has_array = true;
428 }
429
430 return has_array;
431 }
432
433 static struct array_var_info *
get_array_var_info(nir_variable * var,struct hash_table * var_info_map)434 get_array_var_info(nir_variable *var,
435 struct hash_table *var_info_map)
436 {
437 struct hash_entry *entry =
438 _mesa_hash_table_search(var_info_map, var);
439 return entry ? entry->data : NULL;
440 }
441
442 static struct array_var_info *
get_array_deref_info(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes)443 get_array_deref_info(nir_deref_instr *deref,
444 struct hash_table *var_info_map,
445 nir_variable_mode modes)
446 {
447 if (!nir_deref_mode_may_be(deref, modes))
448 return NULL;
449
450 nir_variable *var = nir_deref_instr_get_variable(deref);
451 if (var == NULL)
452 return NULL;
453
454 return get_array_var_info(var, var_info_map);
455 }
456
457 static void
mark_array_deref_used(nir_deref_instr * deref,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)458 mark_array_deref_used(nir_deref_instr *deref,
459 struct hash_table *var_info_map,
460 nir_variable_mode modes,
461 void *mem_ctx)
462 {
463 struct array_var_info *info =
464 get_array_deref_info(deref, var_info_map, modes);
465 if (!info)
466 return;
467
468 nir_deref_path path;
469 nir_deref_path_init(&path, deref, mem_ctx);
470
471 /* Walk the path and look for indirects. If we have an array deref with an
472 * indirect, mark the given level as not being split.
473 */
474 for (unsigned i = 0; i < info->num_levels; i++) {
475 nir_deref_instr *p = path.path[i + 1];
476 if (p->deref_type == nir_deref_type_array &&
477 !nir_src_is_const(p->arr.index))
478 info->levels[i].split = false;
479 }
480 }
481
482 static void
mark_array_usage_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)483 mark_array_usage_impl(nir_function_impl *impl,
484 struct hash_table *var_info_map,
485 nir_variable_mode modes,
486 void *mem_ctx)
487 {
488 nir_foreach_block(block, impl) {
489 nir_foreach_instr(instr, block) {
490 if (instr->type != nir_instr_type_intrinsic)
491 continue;
492
493 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
494 switch (intrin->intrinsic) {
495 case nir_intrinsic_copy_deref:
496 mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
497 var_info_map, modes, mem_ctx);
498 /* Fall Through */
499
500 case nir_intrinsic_load_deref:
501 case nir_intrinsic_store_deref:
502 mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
503 var_info_map, modes, mem_ctx);
504 break;
505
506 default:
507 break;
508 }
509 }
510 }
511 }
512
513 static void
create_split_array_vars(struct array_var_info * var_info,unsigned level,struct array_split * split,const char * name,nir_shader * shader,nir_function_impl * impl,void * mem_ctx)514 create_split_array_vars(struct array_var_info *var_info,
515 unsigned level,
516 struct array_split *split,
517 const char *name,
518 nir_shader *shader,
519 nir_function_impl *impl,
520 void *mem_ctx)
521 {
522 while (level < var_info->num_levels && !var_info->levels[level].split) {
523 name = ralloc_asprintf(mem_ctx, "%s[*]", name);
524 level++;
525 }
526
527 if (level == var_info->num_levels) {
528 /* We add parens to the variable name so it looks like "(foo[2][*])" so
529 * that further derefs will look like "(foo[2][*])[ssa_6]"
530 */
531 name = ralloc_asprintf(mem_ctx, "(%s)", name);
532
533 nir_variable_mode mode = var_info->base_var->data.mode;
534 if (mode == nir_var_function_temp) {
535 split->var = nir_local_variable_create(impl,
536 var_info->split_var_type, name);
537 } else {
538 split->var = nir_variable_create(shader, mode,
539 var_info->split_var_type, name);
540 }
541 } else {
542 assert(var_info->levels[level].split);
543 split->num_splits = var_info->levels[level].array_len;
544 split->splits = rzalloc_array(mem_ctx, struct array_split,
545 split->num_splits);
546 for (unsigned i = 0; i < split->num_splits; i++) {
547 create_split_array_vars(var_info, level + 1, &split->splits[i],
548 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
549 shader, impl, mem_ctx);
550 }
551 }
552 }
553
554 static bool
split_var_list_arrays(nir_shader * shader,nir_function_impl * impl,struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_info_map,void * mem_ctx)555 split_var_list_arrays(nir_shader *shader,
556 nir_function_impl *impl,
557 struct exec_list *vars,
558 nir_variable_mode mode,
559 struct hash_table *var_info_map,
560 void *mem_ctx)
561 {
562 struct exec_list split_vars;
563 exec_list_make_empty(&split_vars);
564
565 nir_foreach_variable_in_list_safe(var, vars) {
566 if (var->data.mode != mode)
567 continue;
568
569 struct array_var_info *info = get_array_var_info(var, var_info_map);
570 if (!info)
571 continue;
572
573 bool has_split = false;
574 const struct glsl_type *split_type =
575 glsl_without_array_or_matrix(var->type);
576 for (int i = info->num_levels - 1; i >= 0; i--) {
577 if (info->levels[i].split) {
578 has_split = true;
579 continue;
580 }
581
582 /* If the original type was a matrix type, we'd like to keep that so
583 * we don't convert matrices into arrays.
584 */
585 if (i == info->num_levels - 1 &&
586 glsl_type_is_matrix(glsl_without_array(var->type))) {
587 split_type = glsl_matrix_type(glsl_get_base_type(split_type),
588 glsl_get_components(split_type),
589 info->levels[i].array_len);
590 } else {
591 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
592 }
593 }
594
595 if (has_split) {
596 info->split_var_type = split_type;
597 /* To avoid list confusion (we'll be adding things as we split
598 * variables), pull all of the variables we plan to split off of the
599 * main variable list.
600 */
601 exec_node_remove(&var->node);
602 exec_list_push_tail(&split_vars, &var->node);
603 } else {
604 assert(split_type == glsl_get_bare_type(var->type));
605 /* If we're not modifying this variable, delete the info so we skip
606 * it faster in later passes.
607 */
608 _mesa_hash_table_remove_key(var_info_map, var);
609 }
610 }
611
612 nir_foreach_variable_in_list(var, &split_vars) {
613 struct array_var_info *info = get_array_var_info(var, var_info_map);
614 create_split_array_vars(info, 0, &info->root_split, var->name,
615 shader, impl, mem_ctx);
616 }
617
618 return !exec_list_is_empty(&split_vars);
619 }
620
621 static bool
deref_has_split_wildcard(nir_deref_path * path,struct array_var_info * info)622 deref_has_split_wildcard(nir_deref_path *path,
623 struct array_var_info *info)
624 {
625 if (info == NULL)
626 return false;
627
628 assert(path->path[0]->var == info->base_var);
629 for (unsigned i = 0; i < info->num_levels; i++) {
630 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
631 info->levels[i].split)
632 return true;
633 }
634
635 return false;
636 }
637
638 static bool
array_path_is_out_of_bounds(nir_deref_path * path,struct array_var_info * info)639 array_path_is_out_of_bounds(nir_deref_path *path,
640 struct array_var_info *info)
641 {
642 if (info == NULL)
643 return false;
644
645 assert(path->path[0]->var == info->base_var);
646 for (unsigned i = 0; i < info->num_levels; i++) {
647 nir_deref_instr *p = path->path[i + 1];
648 if (p->deref_type == nir_deref_type_array_wildcard)
649 continue;
650
651 if (nir_src_is_const(p->arr.index) &&
652 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
653 return true;
654 }
655
656 return false;
657 }
658
659 static void
emit_split_copies(nir_builder * b,struct array_var_info * dst_info,nir_deref_path * dst_path,unsigned dst_level,nir_deref_instr * dst,struct array_var_info * src_info,nir_deref_path * src_path,unsigned src_level,nir_deref_instr * src)660 emit_split_copies(nir_builder *b,
661 struct array_var_info *dst_info, nir_deref_path *dst_path,
662 unsigned dst_level, nir_deref_instr *dst,
663 struct array_var_info *src_info, nir_deref_path *src_path,
664 unsigned src_level, nir_deref_instr *src)
665 {
666 nir_deref_instr *dst_p, *src_p;
667
668 while ((dst_p = dst_path->path[dst_level + 1])) {
669 if (dst_p->deref_type == nir_deref_type_array_wildcard)
670 break;
671
672 dst = nir_build_deref_follower(b, dst, dst_p);
673 dst_level++;
674 }
675
676 while ((src_p = src_path->path[src_level + 1])) {
677 if (src_p->deref_type == nir_deref_type_array_wildcard)
678 break;
679
680 src = nir_build_deref_follower(b, src, src_p);
681 src_level++;
682 }
683
684 if (src_p == NULL || dst_p == NULL) {
685 assert(src_p == NULL && dst_p == NULL);
686 nir_copy_deref(b, dst, src);
687 } else {
688 assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
689 src_p->deref_type == nir_deref_type_array_wildcard);
690
691 if ((dst_info && dst_info->levels[dst_level].split) ||
692 (src_info && src_info->levels[src_level].split)) {
693 /* There are no indirects at this level on one of the source or the
694 * destination so we are lowering it.
695 */
696 assert(glsl_get_length(dst_path->path[dst_level]->type) ==
697 glsl_get_length(src_path->path[src_level]->type));
698 unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
699 for (unsigned i = 0; i < len; i++) {
700 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
701 nir_build_deref_array_imm(b, dst, i),
702 src_info, src_path, src_level + 1,
703 nir_build_deref_array_imm(b, src, i));
704 }
705 } else {
706 /* Neither side is being split so we just keep going */
707 emit_split_copies(b, dst_info, dst_path, dst_level + 1,
708 nir_build_deref_array_wildcard(b, dst),
709 src_info, src_path, src_level + 1,
710 nir_build_deref_array_wildcard(b, src));
711 }
712 }
713 }
714
715 static void
split_array_copies_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)716 split_array_copies_impl(nir_function_impl *impl,
717 struct hash_table *var_info_map,
718 nir_variable_mode modes,
719 void *mem_ctx)
720 {
721 nir_builder b;
722 nir_builder_init(&b, impl);
723
724 nir_foreach_block(block, impl) {
725 nir_foreach_instr_safe(instr, block) {
726 if (instr->type != nir_instr_type_intrinsic)
727 continue;
728
729 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
730 if (copy->intrinsic != nir_intrinsic_copy_deref)
731 continue;
732
733 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
734 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
735
736 struct array_var_info *dst_info =
737 get_array_deref_info(dst_deref, var_info_map, modes);
738 struct array_var_info *src_info =
739 get_array_deref_info(src_deref, var_info_map, modes);
740
741 if (!src_info && !dst_info)
742 continue;
743
744 nir_deref_path dst_path, src_path;
745 nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
746 nir_deref_path_init(&src_path, src_deref, mem_ctx);
747
748 if (!deref_has_split_wildcard(&dst_path, dst_info) &&
749 !deref_has_split_wildcard(&src_path, src_info))
750 continue;
751
752 b.cursor = nir_instr_remove(©->instr);
753
754 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
755 src_info, &src_path, 0, src_path.path[0]);
756 }
757 }
758 }
759
760 static void
split_array_access_impl(nir_function_impl * impl,struct hash_table * var_info_map,nir_variable_mode modes,void * mem_ctx)761 split_array_access_impl(nir_function_impl *impl,
762 struct hash_table *var_info_map,
763 nir_variable_mode modes,
764 void *mem_ctx)
765 {
766 nir_builder b;
767 nir_builder_init(&b, impl);
768
769 nir_foreach_block(block, impl) {
770 nir_foreach_instr_safe(instr, block) {
771 if (instr->type == nir_instr_type_deref) {
772 /* Clean up any dead derefs we find lying around. They may refer
773 * to variables we're planning to split.
774 */
775 nir_deref_instr *deref = nir_instr_as_deref(instr);
776 if (nir_deref_mode_may_be(deref, modes))
777 nir_deref_instr_remove_if_unused(deref);
778 continue;
779 }
780
781 if (instr->type != nir_instr_type_intrinsic)
782 continue;
783
784 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
785 if (intrin->intrinsic != nir_intrinsic_load_deref &&
786 intrin->intrinsic != nir_intrinsic_store_deref &&
787 intrin->intrinsic != nir_intrinsic_copy_deref)
788 continue;
789
790 const unsigned num_derefs =
791 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
792
793 for (unsigned d = 0; d < num_derefs; d++) {
794 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
795
796 struct array_var_info *info =
797 get_array_deref_info(deref, var_info_map, modes);
798 if (!info)
799 continue;
800
801 nir_deref_path path;
802 nir_deref_path_init(&path, deref, mem_ctx);
803
804 b.cursor = nir_before_instr(&intrin->instr);
805
806 if (array_path_is_out_of_bounds(&path, info)) {
807 /* If one of the derefs is out-of-bounds, we just delete the
808 * instruction. If a destination is out of bounds, then it may
809 * have been in-bounds prior to shrinking so we don't want to
810 * accidentally stomp something. However, we've already proven
811 * that it will never be read so it's safe to delete. If a
812 * source is out of bounds then it is loading random garbage.
813 * For loads, we replace their uses with an undef instruction
814 * and for copies we just delete the copy since it was writing
815 * undefined garbage anyway and we may as well leave the random
816 * garbage in the destination alone.
817 */
818 if (intrin->intrinsic == nir_intrinsic_load_deref) {
819 nir_ssa_def *u =
820 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
821 intrin->dest.ssa.bit_size);
822 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
823 nir_src_for_ssa(u));
824 }
825 nir_instr_remove(&intrin->instr);
826 for (unsigned i = 0; i < num_derefs; i++)
827 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
828 break;
829 }
830
831 struct array_split *split = &info->root_split;
832 for (unsigned i = 0; i < info->num_levels; i++) {
833 if (info->levels[i].split) {
834 nir_deref_instr *p = path.path[i + 1];
835 unsigned index = nir_src_as_uint(p->arr.index);
836 assert(index < info->levels[i].array_len);
837 split = &split->splits[index];
838 }
839 }
840 assert(!split->splits && split->var);
841
842 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
843 for (unsigned i = 0; i < info->num_levels; i++) {
844 if (!info->levels[i].split) {
845 new_deref = nir_build_deref_follower(&b, new_deref,
846 path.path[i + 1]);
847 }
848 }
849 assert(new_deref->type == deref->type);
850
851 /* Rewrite the deref source to point to the split one */
852 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
853 nir_src_for_ssa(&new_deref->dest.ssa));
854 nir_deref_instr_remove_if_unused(deref);
855 }
856 }
857 }
858 }
859
860 /** A pass for splitting arrays of vectors into multiple variables
861 *
862 * This pass looks at arrays (possibly multiple levels) of vectors (not
863 * structures or other types) and tries to split them into piles of variables,
864 * one for each array element. The heuristic used is simple: If a given array
865 * level is never used with an indirect, that array level will get split.
866 *
867 * This pass probably could handles structures easily enough but making a pass
868 * that could see through an array of structures of arrays would be difficult
869 * so it's best to just run nir_split_struct_vars first.
870 */
871 bool
nir_split_array_vars(nir_shader * shader,nir_variable_mode modes)872 nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
873 {
874 void *mem_ctx = ralloc_context(NULL);
875 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
876 struct set *complex_vars = NULL;
877
878 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
879
880 bool has_global_array = false;
881 if (modes & nir_var_shader_temp) {
882 has_global_array = init_var_list_array_infos(shader,
883 &shader->variables,
884 nir_var_shader_temp,
885 var_info_map,
886 &complex_vars,
887 mem_ctx);
888 }
889
890 bool has_any_array = false;
891 nir_foreach_function(function, shader) {
892 if (!function->impl)
893 continue;
894
895 bool has_local_array = false;
896 if (modes & nir_var_function_temp) {
897 has_local_array = init_var_list_array_infos(shader,
898 &function->impl->locals,
899 nir_var_function_temp,
900 var_info_map,
901 &complex_vars,
902 mem_ctx);
903 }
904
905 if (has_global_array || has_local_array) {
906 has_any_array = true;
907 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
908 }
909 }
910
911 /* If we failed to find any arrays of arrays, bail early. */
912 if (!has_any_array) {
913 ralloc_free(mem_ctx);
914 nir_shader_preserve_all_metadata(shader);
915 return false;
916 }
917
918 bool has_global_splits = false;
919 if (modes & nir_var_shader_temp) {
920 has_global_splits = split_var_list_arrays(shader, NULL,
921 &shader->variables,
922 nir_var_shader_temp,
923 var_info_map, mem_ctx);
924 }
925
926 bool progress = false;
927 nir_foreach_function(function, shader) {
928 if (!function->impl)
929 continue;
930
931 bool has_local_splits = false;
932 if (modes & nir_var_function_temp) {
933 has_local_splits = split_var_list_arrays(shader, function->impl,
934 &function->impl->locals,
935 nir_var_function_temp,
936 var_info_map, mem_ctx);
937 }
938
939 if (has_global_splits || has_local_splits) {
940 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
941 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
942
943 nir_metadata_preserve(function->impl, nir_metadata_block_index |
944 nir_metadata_dominance);
945 progress = true;
946 } else {
947 nir_metadata_preserve(function->impl, nir_metadata_all);
948 }
949 }
950
951 ralloc_free(mem_ctx);
952
953 return progress;
954 }
955
956 struct array_level_usage {
957 unsigned array_len;
958
959 /* The value UINT_MAX will be used to indicate an indirect */
960 unsigned max_read;
961 unsigned max_written;
962
963 /* True if there is a copy that isn't to/from a shrinkable array */
964 bool has_external_copy;
965 struct set *levels_copied;
966 };
967
968 struct vec_var_usage {
969 /* Convenience set of all components this variable has */
970 nir_component_mask_t all_comps;
971
972 nir_component_mask_t comps_read;
973 nir_component_mask_t comps_written;
974
975 nir_component_mask_t comps_kept;
976
977 /* True if there is a copy that isn't to/from a shrinkable vector */
978 bool has_external_copy;
979 bool has_complex_use;
980 struct set *vars_copied;
981
982 unsigned num_levels;
983 struct array_level_usage levels[0];
984 };
985
986 static struct vec_var_usage *
get_vec_var_usage(nir_variable * var,struct hash_table * var_usage_map,bool add_usage_entry,void * mem_ctx)987 get_vec_var_usage(nir_variable *var,
988 struct hash_table *var_usage_map,
989 bool add_usage_entry, void *mem_ctx)
990 {
991 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
992 if (entry)
993 return entry->data;
994
995 if (!add_usage_entry)
996 return NULL;
997
998 /* Check to make sure that we are working with an array of vectors. We
999 * don't bother to shrink single vectors because we figure that we can
1000 * clean it up better with SSA than by inserting piles of vecN instructions
1001 * to compact results.
1002 */
1003 int num_levels = num_array_levels_in_array_of_vector_type(var->type);
1004 if (num_levels < 1)
1005 return NULL; /* Not an array of vectors */
1006
1007 struct vec_var_usage *usage =
1008 rzalloc_size(mem_ctx, sizeof(*usage) +
1009 num_levels * sizeof(usage->levels[0]));
1010
1011 usage->num_levels = num_levels;
1012 const struct glsl_type *type = var->type;
1013 for (unsigned i = 0; i < num_levels; i++) {
1014 usage->levels[i].array_len = glsl_get_length(type);
1015 type = glsl_get_array_element(type);
1016 }
1017 assert(glsl_type_is_vector_or_scalar(type));
1018
1019 usage->all_comps = (1 << glsl_get_components(type)) - 1;
1020
1021 _mesa_hash_table_insert(var_usage_map, var, usage);
1022
1023 return usage;
1024 }
1025
1026 static struct vec_var_usage *
get_vec_deref_usage(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,bool add_usage_entry,void * mem_ctx)1027 get_vec_deref_usage(nir_deref_instr *deref,
1028 struct hash_table *var_usage_map,
1029 nir_variable_mode modes,
1030 bool add_usage_entry, void *mem_ctx)
1031 {
1032 if (!nir_deref_mode_may_be(deref, modes))
1033 return NULL;
1034
1035 nir_variable *var = nir_deref_instr_get_variable(deref);
1036 if (var == NULL)
1037 return NULL;
1038
1039 return get_vec_var_usage(nir_deref_instr_get_variable(deref),
1040 var_usage_map, add_usage_entry, mem_ctx);
1041 }
1042
1043 static void
mark_deref_if_complex(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1044 mark_deref_if_complex(nir_deref_instr *deref,
1045 struct hash_table *var_usage_map,
1046 nir_variable_mode modes,
1047 void *mem_ctx)
1048 {
1049 /* Only bother with var derefs because nir_deref_instr_has_complex_use is
1050 * recursive.
1051 */
1052 if (deref->deref_type != nir_deref_type_var)
1053 return;
1054
1055 if (!(deref->var->data.mode & modes))
1056 return;
1057
1058 if (!nir_deref_instr_has_complex_use(deref))
1059 return;
1060
1061 struct vec_var_usage *usage =
1062 get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx);
1063 if (!usage)
1064 return;
1065
1066 usage->has_complex_use = true;
1067 }
1068
1069 static void
mark_deref_used(nir_deref_instr * deref,nir_component_mask_t comps_read,nir_component_mask_t comps_written,nir_deref_instr * copy_deref,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1070 mark_deref_used(nir_deref_instr *deref,
1071 nir_component_mask_t comps_read,
1072 nir_component_mask_t comps_written,
1073 nir_deref_instr *copy_deref,
1074 struct hash_table *var_usage_map,
1075 nir_variable_mode modes,
1076 void *mem_ctx)
1077 {
1078 if (!nir_deref_mode_may_be(deref, modes))
1079 return;
1080
1081 nir_variable *var = nir_deref_instr_get_variable(deref);
1082 if (var == NULL)
1083 return;
1084
1085 struct vec_var_usage *usage =
1086 get_vec_var_usage(var, var_usage_map, true, mem_ctx);
1087 if (!usage)
1088 return;
1089
1090 usage->comps_read |= comps_read & usage->all_comps;
1091 usage->comps_written |= comps_written & usage->all_comps;
1092
1093 struct vec_var_usage *copy_usage = NULL;
1094 if (copy_deref) {
1095 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
1096 true, mem_ctx);
1097 if (copy_usage) {
1098 if (usage->vars_copied == NULL) {
1099 usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
1100 }
1101 _mesa_set_add(usage->vars_copied, copy_usage);
1102 } else {
1103 usage->has_external_copy = true;
1104 }
1105 }
1106
1107 nir_deref_path path;
1108 nir_deref_path_init(&path, deref, mem_ctx);
1109
1110 nir_deref_path copy_path;
1111 if (copy_usage)
1112 nir_deref_path_init(©_path, copy_deref, mem_ctx);
1113
1114 unsigned copy_i = 0;
1115 for (unsigned i = 0; i < usage->num_levels; i++) {
1116 struct array_level_usage *level = &usage->levels[i];
1117 nir_deref_instr *deref = path.path[i + 1];
1118 assert(deref->deref_type == nir_deref_type_array ||
1119 deref->deref_type == nir_deref_type_array_wildcard);
1120
1121 unsigned max_used;
1122 if (deref->deref_type == nir_deref_type_array) {
1123 max_used = nir_src_is_const(deref->arr.index) ?
1124 nir_src_as_uint(deref->arr.index) : UINT_MAX;
1125 } else {
1126 /* For wildcards, we read or wrote the whole thing. */
1127 assert(deref->deref_type == nir_deref_type_array_wildcard);
1128 max_used = level->array_len - 1;
1129
1130 if (copy_usage) {
1131 /* Match each wildcard level with the level on copy_usage */
1132 for (; copy_path.path[copy_i + 1]; copy_i++) {
1133 if (copy_path.path[copy_i + 1]->deref_type ==
1134 nir_deref_type_array_wildcard)
1135 break;
1136 }
1137 struct array_level_usage *copy_level =
1138 ©_usage->levels[copy_i++];
1139
1140 if (level->levels_copied == NULL) {
1141 level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1142 }
1143 _mesa_set_add(level->levels_copied, copy_level);
1144 } else {
1145 /* We have a wildcard and it comes from a variable we aren't
1146 * tracking; flag it and we'll know to not shorten this array.
1147 */
1148 level->has_external_copy = true;
1149 }
1150 }
1151
1152 if (comps_written)
1153 level->max_written = MAX2(level->max_written, max_used);
1154 if (comps_read)
1155 level->max_read = MAX2(level->max_read, max_used);
1156 }
1157 }
1158
1159 static bool
src_is_load_deref(nir_src src,nir_src deref_src)1160 src_is_load_deref(nir_src src, nir_src deref_src)
1161 {
1162 nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1163 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1164 return false;
1165
1166 assert(load->src[0].is_ssa);
1167
1168 return load->src[0].ssa == deref_src.ssa;
1169 }
1170
1171 /* Returns all non-self-referential components of a store instruction. A
1172 * component is self-referential if it comes from the same component of a load
1173 * instruction on the same deref. If the only data in a particular component
1174 * of a variable came directly from that component then it's undefined. The
1175 * only way to get defined data into a component of a variable is for it to
1176 * get written there by something outside or from a different component.
1177 *
1178 * This is a fairly common pattern in shaders that come from either GLSL IR or
1179 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1180 * load-vec-store.
1181 */
1182 static nir_component_mask_t
get_non_self_referential_store_comps(nir_intrinsic_instr * store)1183 get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1184 {
1185 nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1186
1187 assert(store->src[1].is_ssa);
1188 nir_instr *src_instr = store->src[1].ssa->parent_instr;
1189 if (src_instr->type != nir_instr_type_alu)
1190 return comps;
1191
1192 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1193
1194 if (src_alu->op == nir_op_mov) {
1195 /* If it's just a swizzle of a load from the same deref, discount any
1196 * channels that don't move in the swizzle.
1197 */
1198 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1199 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1200 if (src_alu->src[0].swizzle[i] == i)
1201 comps &= ~(1u << i);
1202 }
1203 }
1204 } else if (nir_op_is_vec(src_alu->op)) {
1205 /* If it's a vec, discount any channels that are just loads from the
1206 * same deref put in the same spot.
1207 */
1208 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1209 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1210 src_alu->src[i].swizzle[0] == i)
1211 comps &= ~(1u << i);
1212 }
1213 }
1214
1215 return comps;
1216 }
1217
1218 static void
find_used_components_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes,void * mem_ctx)1219 find_used_components_impl(nir_function_impl *impl,
1220 struct hash_table *var_usage_map,
1221 nir_variable_mode modes,
1222 void *mem_ctx)
1223 {
1224 nir_foreach_block(block, impl) {
1225 nir_foreach_instr(instr, block) {
1226 if (instr->type == nir_instr_type_deref) {
1227 mark_deref_if_complex(nir_instr_as_deref(instr),
1228 var_usage_map, modes, mem_ctx);
1229 }
1230
1231 if (instr->type != nir_instr_type_intrinsic)
1232 continue;
1233
1234 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1235 switch (intrin->intrinsic) {
1236 case nir_intrinsic_load_deref:
1237 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1238 nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1239 NULL, var_usage_map, modes, mem_ctx);
1240 break;
1241
1242 case nir_intrinsic_store_deref:
1243 mark_deref_used(nir_src_as_deref(intrin->src[0]),
1244 0, get_non_self_referential_store_comps(intrin),
1245 NULL, var_usage_map, modes, mem_ctx);
1246 break;
1247
1248 case nir_intrinsic_copy_deref: {
1249 /* Just mark everything used for copies. */
1250 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1251 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1252 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1253 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1254 break;
1255 }
1256
1257 default:
1258 break;
1259 }
1260 }
1261 }
1262 }
1263
1264 static bool
shrink_vec_var_list(struct exec_list * vars,nir_variable_mode mode,struct hash_table * var_usage_map)1265 shrink_vec_var_list(struct exec_list *vars,
1266 nir_variable_mode mode,
1267 struct hash_table *var_usage_map)
1268 {
1269 /* Initialize the components kept field of each variable. This is the
1270 * AND of the components written and components read. If a component is
1271 * written but never read, it's dead. If it is read but never written,
1272 * then all values read are undefined garbage and we may as well not read
1273 * them.
1274 *
1275 * The same logic applies to the array length. We make the array length
1276 * the minimum needed required length between read and write and plan to
1277 * discard any OOB access. The one exception here is indirect writes
1278 * because we don't know where they will land and we can't shrink an array
1279 * with indirect writes because previously in-bounds writes may become
1280 * out-of-bounds and have undefined behavior.
1281 *
1282 * Also, if we have a copy that to/from something we can't shrink, we need
1283 * to leave components and array_len of any wildcards alone.
1284 */
1285 nir_foreach_variable_in_list(var, vars) {
1286 if (var->data.mode != mode)
1287 continue;
1288
1289 struct vec_var_usage *usage =
1290 get_vec_var_usage(var, var_usage_map, false, NULL);
1291 if (!usage)
1292 continue;
1293
1294 assert(usage->comps_kept == 0);
1295 if (usage->has_external_copy || usage->has_complex_use)
1296 usage->comps_kept = usage->all_comps;
1297 else
1298 usage->comps_kept = usage->comps_read & usage->comps_written;
1299
1300 for (unsigned i = 0; i < usage->num_levels; i++) {
1301 struct array_level_usage *level = &usage->levels[i];
1302 assert(level->array_len > 0);
1303
1304 if (level->max_written == UINT_MAX || level->has_external_copy ||
1305 usage->has_complex_use)
1306 continue; /* Can't shrink */
1307
1308 unsigned max_used = MIN2(level->max_read, level->max_written);
1309 level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1310 }
1311 }
1312
1313 /* In order for variable copies to work, we have to have the same data type
1314 * on the source and the destination. In order to satisfy this, we run a
1315 * little fixed-point algorithm to transitively ensure that we get enough
1316 * components and array elements for this to hold for all copies.
1317 */
1318 bool fp_progress;
1319 do {
1320 fp_progress = false;
1321 nir_foreach_variable_in_list(var, vars) {
1322 if (var->data.mode != mode)
1323 continue;
1324
1325 struct vec_var_usage *var_usage =
1326 get_vec_var_usage(var, var_usage_map, false, NULL);
1327 if (!var_usage || !var_usage->vars_copied)
1328 continue;
1329
1330 set_foreach(var_usage->vars_copied, copy_entry) {
1331 struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1332 if (copy_usage->comps_kept != var_usage->comps_kept) {
1333 nir_component_mask_t comps_kept =
1334 (var_usage->comps_kept | copy_usage->comps_kept);
1335 var_usage->comps_kept = comps_kept;
1336 copy_usage->comps_kept = comps_kept;
1337 fp_progress = true;
1338 }
1339 }
1340
1341 for (unsigned i = 0; i < var_usage->num_levels; i++) {
1342 struct array_level_usage *var_level = &var_usage->levels[i];
1343 if (!var_level->levels_copied)
1344 continue;
1345
1346 set_foreach(var_level->levels_copied, copy_entry) {
1347 struct array_level_usage *copy_level = (void *)copy_entry->key;
1348 if (var_level->array_len != copy_level->array_len) {
1349 unsigned array_len =
1350 MAX2(var_level->array_len, copy_level->array_len);
1351 var_level->array_len = array_len;
1352 copy_level->array_len = array_len;
1353 fp_progress = true;
1354 }
1355 }
1356 }
1357 }
1358 } while (fp_progress);
1359
1360 bool vars_shrunk = false;
1361 nir_foreach_variable_in_list_safe(var, vars) {
1362 if (var->data.mode != mode)
1363 continue;
1364
1365 struct vec_var_usage *usage =
1366 get_vec_var_usage(var, var_usage_map, false, NULL);
1367 if (!usage)
1368 continue;
1369
1370 bool shrunk = false;
1371 const struct glsl_type *vec_type = var->type;
1372 for (unsigned i = 0; i < usage->num_levels; i++) {
1373 /* If we've reduced the array to zero elements at some level, just
1374 * set comps_kept to 0 and delete the variable.
1375 */
1376 if (usage->levels[i].array_len == 0) {
1377 usage->comps_kept = 0;
1378 break;
1379 }
1380
1381 assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1382 if (usage->levels[i].array_len < glsl_get_length(vec_type))
1383 shrunk = true;
1384 vec_type = glsl_get_array_element(vec_type);
1385 }
1386 assert(glsl_type_is_vector_or_scalar(vec_type));
1387
1388 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1389 if (usage->comps_kept != usage->all_comps)
1390 shrunk = true;
1391
1392 if (usage->comps_kept == 0) {
1393 /* This variable is dead, remove it */
1394 vars_shrunk = true;
1395 exec_node_remove(&var->node);
1396 continue;
1397 }
1398
1399 if (!shrunk) {
1400 /* This variable doesn't need to be shrunk. Remove it from the
1401 * hash table so later steps will ignore it.
1402 */
1403 _mesa_hash_table_remove_key(var_usage_map, var);
1404 continue;
1405 }
1406
1407 /* Build the new var type */
1408 unsigned new_num_comps = util_bitcount(usage->comps_kept);
1409 const struct glsl_type *new_type =
1410 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1411 for (int i = usage->num_levels - 1; i >= 0; i--) {
1412 assert(usage->levels[i].array_len > 0);
1413 /* If the original type was a matrix type, we'd like to keep that so
1414 * we don't convert matrices into arrays.
1415 */
1416 if (i == usage->num_levels - 1 &&
1417 glsl_type_is_matrix(glsl_without_array(var->type)) &&
1418 new_num_comps > 1 && usage->levels[i].array_len > 1) {
1419 new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1420 new_num_comps,
1421 usage->levels[i].array_len);
1422 } else {
1423 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1424 }
1425 }
1426 var->type = new_type;
1427
1428 vars_shrunk = true;
1429 }
1430
1431 return vars_shrunk;
1432 }
1433
1434 static bool
vec_deref_is_oob(nir_deref_instr * deref,struct vec_var_usage * usage)1435 vec_deref_is_oob(nir_deref_instr *deref,
1436 struct vec_var_usage *usage)
1437 {
1438 nir_deref_path path;
1439 nir_deref_path_init(&path, deref, NULL);
1440
1441 bool oob = false;
1442 for (unsigned i = 0; i < usage->num_levels; i++) {
1443 nir_deref_instr *p = path.path[i + 1];
1444 if (p->deref_type == nir_deref_type_array_wildcard)
1445 continue;
1446
1447 if (nir_src_is_const(p->arr.index) &&
1448 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1449 oob = true;
1450 break;
1451 }
1452 }
1453
1454 nir_deref_path_finish(&path);
1455
1456 return oob;
1457 }
1458
1459 static bool
vec_deref_is_dead_or_oob(nir_deref_instr * deref,struct hash_table * var_usage_map,nir_variable_mode modes)1460 vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1461 struct hash_table *var_usage_map,
1462 nir_variable_mode modes)
1463 {
1464 struct vec_var_usage *usage =
1465 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1466 if (!usage)
1467 return false;
1468
1469 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1470 }
1471
1472 static void
shrink_vec_var_access_impl(nir_function_impl * impl,struct hash_table * var_usage_map,nir_variable_mode modes)1473 shrink_vec_var_access_impl(nir_function_impl *impl,
1474 struct hash_table *var_usage_map,
1475 nir_variable_mode modes)
1476 {
1477 nir_builder b;
1478 nir_builder_init(&b, impl);
1479
1480 nir_foreach_block(block, impl) {
1481 nir_foreach_instr_safe(instr, block) {
1482 switch (instr->type) {
1483 case nir_instr_type_deref: {
1484 nir_deref_instr *deref = nir_instr_as_deref(instr);
1485 if (!nir_deref_mode_may_be(deref, modes))
1486 break;
1487
1488 /* Clean up any dead derefs we find lying around. They may refer
1489 * to variables we've deleted.
1490 */
1491 if (nir_deref_instr_remove_if_unused(deref))
1492 break;
1493
1494 /* Update the type in the deref to keep the types consistent as
1495 * you walk down the chain. We don't need to check if this is one
1496 * of the derefs we're shrinking because this is a no-op if it
1497 * isn't. The worst that could happen is that we accidentally fix
1498 * an invalid deref.
1499 */
1500 if (deref->deref_type == nir_deref_type_var) {
1501 deref->type = deref->var->type;
1502 } else if (deref->deref_type == nir_deref_type_array ||
1503 deref->deref_type == nir_deref_type_array_wildcard) {
1504 nir_deref_instr *parent = nir_deref_instr_parent(deref);
1505 assert(glsl_type_is_array(parent->type) ||
1506 glsl_type_is_matrix(parent->type));
1507 deref->type = glsl_get_array_element(parent->type);
1508 }
1509 break;
1510 }
1511
1512 case nir_instr_type_intrinsic: {
1513 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1514
1515 /* If we have a copy whose source or destination has been deleted
1516 * because we determined the variable was dead, then we just
1517 * delete the copy instruction. If the source variable was dead
1518 * then it was writing undefined garbage anyway and if it's the
1519 * destination variable that's dead then the write isn't needed.
1520 */
1521 if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1522 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1523 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1524 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1525 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1526 nir_instr_remove(&intrin->instr);
1527 nir_deref_instr_remove_if_unused(dst);
1528 nir_deref_instr_remove_if_unused(src);
1529 }
1530 continue;
1531 }
1532
1533 if (intrin->intrinsic != nir_intrinsic_load_deref &&
1534 intrin->intrinsic != nir_intrinsic_store_deref)
1535 continue;
1536
1537 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1538 if (!nir_deref_mode_may_be(deref, modes))
1539 continue;
1540
1541 struct vec_var_usage *usage =
1542 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1543 if (!usage)
1544 continue;
1545
1546 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1547 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1548 nir_ssa_def *u =
1549 nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1550 intrin->dest.ssa.bit_size);
1551 nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1552 nir_src_for_ssa(u));
1553 }
1554 nir_instr_remove(&intrin->instr);
1555 nir_deref_instr_remove_if_unused(deref);
1556 continue;
1557 }
1558
1559 /* If we're not dropping any components, there's no need to
1560 * compact vectors.
1561 */
1562 if (usage->comps_kept == usage->all_comps)
1563 continue;
1564
1565 if (intrin->intrinsic == nir_intrinsic_load_deref) {
1566 b.cursor = nir_after_instr(&intrin->instr);
1567
1568 nir_ssa_def *undef =
1569 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1570 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1571 unsigned c = 0;
1572 for (unsigned i = 0; i < intrin->num_components; i++) {
1573 if (usage->comps_kept & (1u << i))
1574 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1575 else
1576 vec_srcs[i] = undef;
1577 }
1578 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1579
1580 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1581 nir_src_for_ssa(vec),
1582 vec->parent_instr);
1583
1584 /* The SSA def is now only used by the swizzle. It's safe to
1585 * shrink the number of components.
1586 */
1587 assert(list_length(&intrin->dest.ssa.uses) == c);
1588 intrin->num_components = c;
1589 intrin->dest.ssa.num_components = c;
1590 } else {
1591 nir_component_mask_t write_mask =
1592 nir_intrinsic_write_mask(intrin);
1593
1594 unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1595 nir_component_mask_t new_write_mask = 0;
1596 unsigned c = 0;
1597 for (unsigned i = 0; i < intrin->num_components; i++) {
1598 if (usage->comps_kept & (1u << i)) {
1599 swizzle[c] = i;
1600 if (write_mask & (1u << i))
1601 new_write_mask |= 1u << c;
1602 c++;
1603 }
1604 }
1605
1606 b.cursor = nir_before_instr(&intrin->instr);
1607
1608 nir_ssa_def *swizzled =
1609 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c);
1610
1611 /* Rewrite to use the compacted source */
1612 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1613 nir_src_for_ssa(swizzled));
1614 nir_intrinsic_set_write_mask(intrin, new_write_mask);
1615 intrin->num_components = c;
1616 }
1617 break;
1618 }
1619
1620 default:
1621 break;
1622 }
1623 }
1624 }
1625 }
1626
1627 static bool
function_impl_has_vars_with_modes(nir_function_impl * impl,nir_variable_mode modes)1628 function_impl_has_vars_with_modes(nir_function_impl *impl,
1629 nir_variable_mode modes)
1630 {
1631 nir_shader *shader = impl->function->shader;
1632
1633 if (modes & ~nir_var_function_temp) {
1634 nir_foreach_variable_with_modes(var, shader,
1635 modes & ~nir_var_function_temp)
1636 return true;
1637 }
1638
1639 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1640 return true;
1641
1642 return false;
1643 }
1644
1645 /** Attempt to shrink arrays of vectors
1646 *
1647 * This pass looks at variables which contain a vector or an array (possibly
1648 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1649 * or array. If the pass can prove that a component of a vector (or array of
1650 * vectors) is never really used, then that component will be removed.
1651 * Similarly, the pass attempts to shorten arrays based on what elements it
1652 * can prove are never read or never contain valid data.
1653 */
1654 bool
nir_shrink_vec_array_vars(nir_shader * shader,nir_variable_mode modes)1655 nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1656 {
1657 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1658
1659 void *mem_ctx = ralloc_context(NULL);
1660
1661 struct hash_table *var_usage_map =
1662 _mesa_pointer_hash_table_create(mem_ctx);
1663
1664 bool has_vars_to_shrink = false;
1665 nir_foreach_function(function, shader) {
1666 if (!function->impl)
1667 continue;
1668
1669 /* Don't even bother crawling the IR if we don't have any variables.
1670 * Given that this pass deletes any unused variables, it's likely that
1671 * we will be in this scenario eventually.
1672 */
1673 if (function_impl_has_vars_with_modes(function->impl, modes)) {
1674 has_vars_to_shrink = true;
1675 find_used_components_impl(function->impl, var_usage_map,
1676 modes, mem_ctx);
1677 }
1678 }
1679 if (!has_vars_to_shrink) {
1680 ralloc_free(mem_ctx);
1681 nir_shader_preserve_all_metadata(shader);
1682 return false;
1683 }
1684
1685 bool globals_shrunk = false;
1686 if (modes & nir_var_shader_temp) {
1687 globals_shrunk = shrink_vec_var_list(&shader->variables,
1688 nir_var_shader_temp,
1689 var_usage_map);
1690 }
1691
1692 bool progress = false;
1693 nir_foreach_function(function, shader) {
1694 if (!function->impl)
1695 continue;
1696
1697 bool locals_shrunk = false;
1698 if (modes & nir_var_function_temp) {
1699 locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1700 nir_var_function_temp,
1701 var_usage_map);
1702 }
1703
1704 if (globals_shrunk || locals_shrunk) {
1705 shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1706
1707 nir_metadata_preserve(function->impl, nir_metadata_block_index |
1708 nir_metadata_dominance);
1709 progress = true;
1710 } else {
1711 nir_metadata_preserve(function->impl, nir_metadata_all);
1712 }
1713 }
1714
1715 ralloc_free(mem_ctx);
1716
1717 return progress;
1718 }
1719