1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 #include "nir_builder.h"
26 
27 #include "util/bitscan.h"
28 
29 /**
30  * Variable-based copy propagation
31  *
32  * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
33  * However, there are cases, especially when dealing with indirects, where SSA
34  * won't help you.  This pass is for those times.  Specifically, it handles
35  * the following things that the rest of NIR can't:
36  *
37  *  1) Copy-propagation on variables that have indirect access.  This includes
38  *     propagating from indirect stores into indirect loads.
39  *
40  *  2) Dead code elimination of store_var and copy_var intrinsics based on
41  *     killed destination values.
42  *
43  *  3) Removal of redundant load_var intrinsics.  We can't trust regular CSE
44  *     to do this because it isn't aware of variable writes that may alias the
45  *     value and make the former load invalid.
46  *
47  * Unfortunately, properly handling all of those cases makes this path rather
48  * complex.  In order to avoid additional complexity, this pass is entirely
49  * block-local.  If we tried to make it global, the data-flow analysis would
50  * rapidly get out of hand.  Fortunately, for anything that is only ever
51  * accessed directly, we get SSA based copy-propagation which is extremely
52  * powerful so this isn't that great a loss.
53  */
54 
55 struct value {
56    bool is_ssa;
57    union {
58       nir_ssa_def *ssa[4];
59       nir_deref_var *deref;
60    };
61 };
62 
63 struct copy_entry {
64    struct list_head link;
65 
66    nir_instr *store_instr[4];
67 
68    unsigned comps_may_be_read;
69    struct value src;
70 
71    nir_deref_var *dst;
72 };
73 
74 struct copy_prop_var_state {
75    nir_shader *shader;
76 
77    void *mem_ctx;
78 
79    struct list_head copies;
80 
81    /* We're going to be allocating and deleting a lot of copy entries so we'll
82     * keep a free list to avoid thrashing malloc too badly.
83     */
84    struct list_head copy_free_list;
85 
86    bool progress;
87 };
88 
89 static struct copy_entry *
copy_entry_create(struct copy_prop_var_state * state,nir_deref_var * dst_deref)90 copy_entry_create(struct copy_prop_var_state *state,
91                   nir_deref_var *dst_deref)
92 {
93    struct copy_entry *entry;
94    if (!list_empty(&state->copy_free_list)) {
95       struct list_head *item = state->copy_free_list.next;
96       list_del(item);
97       entry = LIST_ENTRY(struct copy_entry, item, link);
98       memset(entry, 0, sizeof(*entry));
99    } else {
100       entry = rzalloc(state->mem_ctx, struct copy_entry);
101    }
102 
103    entry->dst = dst_deref;
104    list_add(&entry->link, &state->copies);
105 
106    return entry;
107 }
108 
109 static void
copy_entry_remove(struct copy_prop_var_state * state,struct copy_entry * entry)110 copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry *entry)
111 {
112    list_del(&entry->link);
113    list_add(&entry->link, &state->copy_free_list);
114 }
115 
116 enum deref_compare_result {
117    derefs_equal_bit = (1 << 0),
118    derefs_may_alias_bit = (1 << 1),
119    derefs_a_contains_b_bit = (1 << 2),
120    derefs_b_contains_a_bit = (1 << 3),
121 };
122 
123 /** Returns true if the storage referrenced to by deref completely contains
124  * the storage referenced by sub.
125  *
126  * NOTE: This is fairly general and could be moved to core NIR if someone else
127  * ever needs it.
128  */
129 static enum deref_compare_result
compare_derefs(nir_deref_var * a,nir_deref_var * b)130 compare_derefs(nir_deref_var *a, nir_deref_var *b)
131 {
132    if (a->var != b->var)
133       return 0;
134 
135    /* Start off assuming they fully compare.  We ignore equality for now.  In
136     * the end, we'll determine that by containment.
137     */
138    enum deref_compare_result result = derefs_may_alias_bit |
139                                       derefs_a_contains_b_bit |
140                                       derefs_b_contains_a_bit;
141 
142    nir_deref *a_tail = &a->deref;
143    nir_deref *b_tail = &b->deref;
144    while (a_tail->child && b_tail->child) {
145       a_tail = a_tail->child;
146       b_tail = b_tail->child;
147 
148       assert(a_tail->deref_type == b_tail->deref_type);
149       switch (a_tail->deref_type) {
150       case nir_deref_type_array: {
151          nir_deref_array *a_arr = nir_deref_as_array(a_tail);
152          nir_deref_array *b_arr = nir_deref_as_array(b_tail);
153 
154          if (a_arr->deref_array_type == nir_deref_array_type_direct &&
155              b_arr->deref_array_type == nir_deref_array_type_direct) {
156             /* If they're both direct and have different offsets, they
157              * don't even alias much less anything else.
158              */
159             if (a_arr->base_offset != b_arr->base_offset)
160                return 0;
161          } else if (a_arr->deref_array_type == nir_deref_array_type_wildcard) {
162             if (b_arr->deref_array_type != nir_deref_array_type_wildcard)
163                result &= ~derefs_b_contains_a_bit;
164          } else if (b_arr->deref_array_type == nir_deref_array_type_wildcard) {
165             if (a_arr->deref_array_type != nir_deref_array_type_wildcard)
166                result &= ~derefs_a_contains_b_bit;
167          } else if (a_arr->deref_array_type == nir_deref_array_type_indirect &&
168                     b_arr->deref_array_type == nir_deref_array_type_indirect) {
169             assert(a_arr->indirect.is_ssa && b_arr->indirect.is_ssa);
170             if (a_arr->indirect.ssa == b_arr->indirect.ssa) {
171                /* If they're different constant offsets from the same indirect
172                 * then they don't alias at all.
173                 */
174                if (a_arr->base_offset != b_arr->base_offset)
175                   return 0;
176                /* Otherwise the indirect and base both match */
177             } else {
178                /* If they're have different indirect offsets then we can't
179                 * prove anything about containment.
180                 */
181                result &= ~(derefs_a_contains_b_bit | derefs_b_contains_a_bit);
182             }
183          } else {
184             /* In this case, one is indirect and the other direct so we can't
185              * prove anything about containment.
186              */
187             result &= ~(derefs_a_contains_b_bit | derefs_b_contains_a_bit);
188          }
189          break;
190       }
191 
192       case nir_deref_type_struct: {
193          nir_deref_struct *a_struct = nir_deref_as_struct(a_tail);
194          nir_deref_struct *b_struct = nir_deref_as_struct(b_tail);
195 
196          /* If they're different struct members, they don't even alias */
197          if (a_struct->index != b_struct->index)
198             return 0;
199          break;
200       }
201 
202       default:
203          unreachable("Invalid deref type");
204       }
205    }
206 
207    /* If a is longer than b, then it can't contain b */
208    if (a_tail->child)
209       result &= ~derefs_a_contains_b_bit;
210    if (b_tail->child)
211       result &= ~derefs_b_contains_a_bit;
212 
213    /* If a contains b and b contains a they must be equal. */
214    if ((result & derefs_a_contains_b_bit) && (result & derefs_b_contains_a_bit))
215       result |= derefs_equal_bit;
216 
217    return result;
218 }
219 
220 static void
remove_dead_writes(struct copy_prop_var_state * state,struct copy_entry * entry,unsigned write_mask)221 remove_dead_writes(struct copy_prop_var_state *state,
222                    struct copy_entry *entry, unsigned write_mask)
223 {
224    /* We're overwriting another entry.  Some of it's components may not
225     * have been read yet and, if that's the case, we may be able to delete
226     * some instructions but we have to be careful.
227     */
228    unsigned dead_comps = write_mask & ~entry->comps_may_be_read;
229 
230    for (unsigned mask = dead_comps; mask;) {
231       unsigned i = u_bit_scan(&mask);
232 
233       nir_instr *instr = entry->store_instr[i];
234 
235       /* We may have already deleted it on a previous iteration */
236       if (!instr)
237          continue;
238 
239       /* See if this instr is used anywhere that it's not dead */
240       bool keep = false;
241       for (unsigned j = 0; j < 4; j++) {
242          if (entry->store_instr[j] == instr) {
243             if (dead_comps & (1 << j)) {
244                entry->store_instr[j] = NULL;
245             } else {
246                keep = true;
247             }
248          }
249       }
250 
251       if (!keep) {
252          nir_instr_remove(instr);
253          state->progress = true;
254       }
255    }
256 }
257 
258 static struct copy_entry *
lookup_entry_for_deref(struct copy_prop_var_state * state,nir_deref_var * deref,enum deref_compare_result allowed_comparisons)259 lookup_entry_for_deref(struct copy_prop_var_state *state,
260                        nir_deref_var *deref,
261                        enum deref_compare_result allowed_comparisons)
262 {
263    list_for_each_entry(struct copy_entry, iter, &state->copies, link) {
264       if (compare_derefs(iter->dst, deref) & allowed_comparisons)
265          return iter;
266    }
267 
268    return NULL;
269 }
270 
271 static void
mark_aliased_entries_as_read(struct copy_prop_var_state * state,nir_deref_var * deref,unsigned components)272 mark_aliased_entries_as_read(struct copy_prop_var_state *state,
273                              nir_deref_var *deref, unsigned components)
274 {
275    list_for_each_entry(struct copy_entry, iter, &state->copies, link) {
276       if (compare_derefs(iter->dst, deref) & derefs_may_alias_bit)
277          iter->comps_may_be_read |= components;
278    }
279 }
280 
281 static struct copy_entry *
get_entry_and_kill_aliases(struct copy_prop_var_state * state,nir_deref_var * deref,unsigned write_mask)282 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
283                            nir_deref_var *deref,
284                            unsigned write_mask)
285 {
286    struct copy_entry *entry = NULL;
287    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) {
288       if (!iter->src.is_ssa) {
289          /* If this write aliases the source of some entry, get rid of it */
290          if (compare_derefs(iter->src.deref, deref) & derefs_may_alias_bit) {
291             copy_entry_remove(state, iter);
292             continue;
293          }
294       }
295 
296       enum deref_compare_result comp = compare_derefs(iter->dst, deref);
297       /* This is a store operation.  If we completely overwrite some value, we
298        * want to delete any dead writes that may be present.
299        */
300       if (comp & derefs_b_contains_a_bit)
301          remove_dead_writes(state, iter, write_mask);
302 
303       if (comp & derefs_equal_bit) {
304          assert(entry == NULL);
305          entry = iter;
306       } else if (comp & derefs_may_alias_bit) {
307          copy_entry_remove(state, iter);
308       }
309    }
310 
311    if (entry == NULL)
312       entry = copy_entry_create(state, deref);
313 
314    return entry;
315 }
316 
317 static void
apply_barrier_for_modes(struct copy_prop_var_state * state,nir_variable_mode modes)318 apply_barrier_for_modes(struct copy_prop_var_state *state,
319                         nir_variable_mode modes)
320 {
321    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) {
322       if ((iter->dst->var->data.mode & modes) ||
323           (!iter->src.is_ssa && (iter->src.deref->var->data.mode & modes)))
324          copy_entry_remove(state, iter);
325    }
326 }
327 
328 static void
store_to_entry(struct copy_prop_var_state * state,struct copy_entry * entry,const struct value * value,unsigned write_mask,nir_instr * store_instr)329 store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
330                const struct value *value, unsigned write_mask,
331                nir_instr *store_instr)
332 {
333    entry->comps_may_be_read &= ~write_mask;
334    if (value->is_ssa) {
335       entry->src.is_ssa = true;
336       /* Only overwrite the written components */
337       for (unsigned i = 0; i < 4; i++) {
338          if (write_mask & (1 << i)) {
339             entry->store_instr[i] = store_instr;
340             entry->src.ssa[i] = value->ssa[i];
341          }
342       }
343    } else {
344       /* Non-ssa stores always write everything */
345       entry->src.is_ssa = false;
346       entry->src.deref = value->deref;
347       for (unsigned i = 0; i < 4; i++)
348          entry->store_instr[i] = store_instr;
349    }
350 }
351 
352 /* Remove an instruction and return a cursor pointing to where it was */
353 static nir_cursor
instr_remove_cursor(nir_instr * instr)354 instr_remove_cursor(nir_instr *instr)
355 {
356    nir_cursor cursor;
357    nir_instr *prev = nir_instr_prev(instr);
358    if (prev) {
359       cursor = nir_after_instr(prev);
360    } else {
361       cursor = nir_before_block(instr->block);
362    }
363    nir_instr_remove(instr);
364    return cursor;
365 }
366 
367 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
368  * single SSA def.  Because an entry could reference up to 4 different SSA
369  * defs, a vecN operation may be inserted to combine them into a single SSA
370  * def before handing it back to the caller.  If the load instruction is no
371  * longer needed, it is removed and nir_instr::block is set to NULL.  (It is
372  * possible, in some cases, for the load to be used in the vecN operation in
373  * which case it isn't deleted.)
374  */
375 static bool
load_from_ssa_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,struct value * value)376 load_from_ssa_entry_value(struct copy_prop_var_state *state,
377                           struct copy_entry *entry,
378                           nir_builder *b, nir_intrinsic_instr *intrin,
379                           struct value *value)
380 {
381    *value = entry->src;
382    assert(value->is_ssa);
383 
384    const struct glsl_type *type = nir_deref_tail(&entry->dst->deref)->type;
385    unsigned num_components = glsl_get_vector_elements(type);
386 
387    uint8_t available = 0;
388    bool all_same = true;
389    for (unsigned i = 0; i < num_components; i++) {
390       if (value->ssa[i])
391          available |= (1 << i);
392 
393       if (value->ssa[i] != value->ssa[0])
394          all_same = false;
395    }
396 
397    if (all_same) {
398       /* Our work here is done */
399       b->cursor = instr_remove_cursor(&intrin->instr);
400       intrin->instr.block = NULL;
401       return true;
402    }
403 
404    if (available != (1 << num_components) - 1 &&
405        intrin->intrinsic == nir_intrinsic_load_var &&
406        (available & nir_ssa_def_components_read(&intrin->dest.ssa)) == 0) {
407       /* If none of the components read are available as SSA values, then we
408        * should just bail.  Otherwise, we would end up replacing the uses of
409        * the load_var a vecN() that just gathers up its components.
410        */
411       return false;
412    }
413 
414    b->cursor = nir_after_instr(&intrin->instr);
415 
416    nir_ssa_def *load_def =
417       intrin->intrinsic == nir_intrinsic_load_var ? &intrin->dest.ssa : NULL;
418 
419    bool keep_intrin = false;
420    nir_ssa_def *comps[4];
421    for (unsigned i = 0; i < num_components; i++) {
422       if (value->ssa[i]) {
423          comps[i] = nir_channel(b, value->ssa[i], i);
424       } else {
425          /* We don't have anything for this component in our
426           * list.  Just re-use a channel from the load.
427           */
428          if (load_def == NULL)
429             load_def = nir_load_deref_var(b, entry->dst);
430 
431          if (load_def->parent_instr == &intrin->instr)
432             keep_intrin = true;
433 
434          comps[i] = nir_channel(b, load_def, i);
435       }
436    }
437 
438    nir_ssa_def *vec = nir_vec(b, comps, num_components);
439    for (unsigned i = 0; i < num_components; i++)
440       value->ssa[i] = vec;
441 
442    if (!keep_intrin) {
443       /* Removing this instruction should not touch the cursor because we
444        * created the cursor after the intrinsic and have added at least one
445        * instruction (the vec) since then.
446        */
447       assert(b->cursor.instr != &intrin->instr);
448       nir_instr_remove(&intrin->instr);
449       intrin->instr.block = NULL;
450    }
451 
452    return true;
453 }
454 
455 /**
456  * Specialize the wildcards in a deref chain
457  *
458  * This function returns a deref chain identical to \param deref except that
459  * some of its wildcards are replaced with indices from \param specific.  The
460  * process is guided by \param guide which references the same type as \param
461  * specific but has the same wildcard array lengths as \param deref.
462  */
463 static nir_deref_var *
specialize_wildcards(nir_deref_var * deref,nir_deref_var * guide,nir_deref_var * specific,void * mem_ctx)464 specialize_wildcards(nir_deref_var *deref,
465                      nir_deref_var *guide,
466                      nir_deref_var *specific,
467                      void *mem_ctx)
468 {
469    nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
470 
471    nir_deref *deref_tail = deref->deref.child;
472    nir_deref *guide_tail = &guide->deref;
473    nir_deref *spec_tail = &specific->deref;
474    nir_deref *ret_tail = &ret->deref;
475    while (deref_tail) {
476       switch (deref_tail->deref_type) {
477       case nir_deref_type_array: {
478          nir_deref_array *deref_arr = nir_deref_as_array(deref_tail);
479 
480          nir_deref_array *ret_arr = nir_deref_array_create(ret_tail);
481          ret_arr->deref.type = deref_arr->deref.type;
482          ret_arr->deref_array_type = deref_arr->deref_array_type;
483 
484          switch (deref_arr->deref_array_type) {
485          case nir_deref_array_type_direct:
486             ret_arr->base_offset = deref_arr->base_offset;
487             break;
488          case nir_deref_array_type_indirect:
489             ret_arr->base_offset = deref_arr->base_offset;
490             assert(deref_arr->indirect.is_ssa);
491             ret_arr->indirect = deref_arr->indirect;
492             break;
493          case nir_deref_array_type_wildcard:
494             /* This is where things get tricky.  We have to search through
495              * the entry deref to find its corresponding wildcard and fill
496              * this slot in with the value from the src.
497              */
498             while (guide_tail->child) {
499                guide_tail = guide_tail->child;
500                spec_tail = spec_tail->child;
501 
502                if (guide_tail->deref_type == nir_deref_type_array &&
503                    nir_deref_as_array(guide_tail)->deref_array_type ==
504                    nir_deref_array_type_wildcard)
505                   break;
506             }
507 
508             nir_deref_array *spec_arr = nir_deref_as_array(spec_tail);
509             ret_arr->deref_array_type = spec_arr->deref_array_type;
510             ret_arr->base_offset = spec_arr->base_offset;
511             ret_arr->indirect = spec_arr->indirect;
512          }
513 
514          ret_tail->child = &ret_arr->deref;
515          break;
516       }
517       case nir_deref_type_struct: {
518          nir_deref_struct *deref_struct = nir_deref_as_struct(deref_tail);
519 
520          nir_deref_struct *ret_struct =
521             nir_deref_struct_create(ret_tail, deref_struct->index);
522          ret_struct->deref.type = deref_struct->deref.type;
523 
524          ret_tail->child = &ret_struct->deref;
525          break;
526       }
527       case nir_deref_type_var:
528          unreachable("Invalid deref type");
529       }
530 
531       deref_tail = deref_tail->child;
532       ret_tail = ret_tail->child;
533    }
534 
535    return ret;
536 }
537 
538 /* Do a "load" from an deref-based entry return it in "value" as a value.  The
539  * deref returned in "value" will always be a fresh copy so the caller can
540  * steal it and assign it to the instruction directly without copying it
541  * again.
542  */
543 static bool
load_from_deref_entry_value(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_var * src,struct value * value)544 load_from_deref_entry_value(struct copy_prop_var_state *state,
545                             struct copy_entry *entry,
546                             nir_builder *b, nir_intrinsic_instr *intrin,
547                             nir_deref_var *src, struct value *value)
548 {
549    *value = entry->src;
550 
551    /* Walk the deref to get the two tails and also figure out if we need to
552     * specialize any wildcards.
553     */
554    bool need_to_specialize_wildcards = false;
555    nir_deref *entry_tail = &entry->dst->deref;
556    nir_deref *src_tail = &src->deref;
557    while (entry_tail->child && src_tail->child) {
558       assert(src_tail->child->deref_type == entry_tail->child->deref_type);
559       if (src_tail->child->deref_type == nir_deref_type_array) {
560          nir_deref_array *entry_arr = nir_deref_as_array(entry_tail->child);
561          nir_deref_array *src_arr = nir_deref_as_array(src_tail->child);
562 
563          if (src_arr->deref_array_type != nir_deref_array_type_wildcard &&
564              entry_arr->deref_array_type == nir_deref_array_type_wildcard)
565             need_to_specialize_wildcards = true;
566       }
567 
568       entry_tail = entry_tail->child;
569       src_tail = src_tail->child;
570    }
571 
572    /* If the entry deref is longer than the source deref then it refers to a
573     * smaller type and we can't source from it.
574     */
575    assert(entry_tail->child == NULL);
576 
577    if (need_to_specialize_wildcards) {
578       /* The entry has some wildcards that are not in src.  This means we need
579        * to construct a new deref based on the entry but using the wildcards
580        * from the source and guided by the entry dst.  Oof.
581        */
582       value->deref = specialize_wildcards(entry->src.deref, entry->dst, src,
583                                           state->mem_ctx);
584    } else {
585       /* We're going to need to make a copy in case we modify it below */
586       value->deref = nir_deref_var_clone(value->deref, state->mem_ctx);
587    }
588 
589    if (src_tail->child) {
590       /* If our source deref is longer than the entry deref, that's ok because
591        * it just means the entry deref needs to be extended a bit.
592        */
593       nir_deref *value_tail = nir_deref_tail(&value->deref->deref);
594       value_tail->child = nir_deref_clone(src_tail->child, value_tail);
595    }
596 
597    b->cursor = instr_remove_cursor(&intrin->instr);
598 
599    return true;
600 }
601 
602 static bool
try_load_from_entry(struct copy_prop_var_state * state,struct copy_entry * entry,nir_builder * b,nir_intrinsic_instr * intrin,nir_deref_var * src,struct value * value)603 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
604                     nir_builder *b, nir_intrinsic_instr *intrin,
605                     nir_deref_var *src, struct value *value)
606 {
607    if (entry == NULL)
608       return false;
609 
610    if (entry->src.is_ssa) {
611       return load_from_ssa_entry_value(state, entry, b, intrin, value);
612    } else {
613       return load_from_deref_entry_value(state, entry, b, intrin, src, value);
614    }
615 }
616 
617 static void
copy_prop_vars_block(struct copy_prop_var_state * state,nir_builder * b,nir_block * block)618 copy_prop_vars_block(struct copy_prop_var_state *state,
619                      nir_builder *b, nir_block *block)
620 {
621    /* Start each block with a blank slate */
622    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link)
623       copy_entry_remove(state, iter);
624 
625    nir_foreach_instr_safe(instr, block) {
626       if (instr->type != nir_instr_type_intrinsic)
627          continue;
628 
629       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
630       switch (intrin->intrinsic) {
631       case nir_intrinsic_barrier:
632       case nir_intrinsic_memory_barrier:
633          /* If we hit a barrier, we need to trash everything that may possibly
634           * be accessible to another thread.  Locals, globals, and things of
635           * the like are safe, however.
636           */
637          apply_barrier_for_modes(state, ~(nir_var_local | nir_var_global |
638                                           nir_var_shader_in | nir_var_uniform));
639          break;
640 
641       case nir_intrinsic_emit_vertex:
642       case nir_intrinsic_emit_vertex_with_counter:
643          apply_barrier_for_modes(state, nir_var_shader_out);
644          break;
645 
646       case nir_intrinsic_load_var: {
647          nir_deref_var *src = intrin->variables[0];
648 
649          uint8_t comps_read = nir_ssa_def_components_read(&intrin->dest.ssa);
650          mark_aliased_entries_as_read(state, src, comps_read);
651 
652          struct copy_entry *src_entry =
653             lookup_entry_for_deref(state, src, derefs_a_contains_b_bit);
654          struct value value;
655          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
656             if (value.is_ssa) {
657                /* lookup_load has already ensured that we get a single SSA
658                 * value that has all of the channels.  We just have to do the
659                 * rewrite operation.
660                 */
661                if (intrin->instr.block) {
662                   /* The lookup left our instruction in-place.  This means it
663                    * must have used it to vec up a bunch of different sources.
664                    * We need to be careful when rewriting uses so we don't
665                    * rewrite the vecN itself.
666                    */
667                   nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
668                                                  nir_src_for_ssa(value.ssa[0]),
669                                                  value.ssa[0]->parent_instr);
670                } else {
671                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
672                                            nir_src_for_ssa(value.ssa[0]));
673                }
674             } else {
675                /* We're turning it into a load of a different variable */
676                ralloc_steal(intrin, value.deref);
677                intrin->variables[0] = value.deref;
678 
679                /* Put it back in again. */
680                nir_builder_instr_insert(b, instr);
681 
682                value.is_ssa = true;
683                for (unsigned i = 0; i < intrin->num_components; i++)
684                   value.ssa[i] = &intrin->dest.ssa;
685             }
686             state->progress = true;
687          } else {
688             value.is_ssa = true;
689             for (unsigned i = 0; i < intrin->num_components; i++)
690                value.ssa[i] = &intrin->dest.ssa;
691          }
692 
693          /* Now that we have a value, we're going to store it back so that we
694           * have the right value next time we come looking for it.  In order
695           * to do this, we need an exact match, not just something that
696           * contains what we're looking for.
697           */
698          struct copy_entry *store_entry =
699             lookup_entry_for_deref(state, src, derefs_equal_bit);
700          if (!store_entry)
701             store_entry = copy_entry_create(state, src);
702 
703          /* Set up a store to this entry with the value of the load.  This way
704           * we can potentially remove subsequent loads.  However, we use a
705           * NULL instruction so we don't try and delete the load on a
706           * subsequent store.
707           */
708          store_to_entry(state, store_entry, &value,
709                         ((1 << intrin->num_components) - 1), NULL);
710          break;
711       }
712 
713       case nir_intrinsic_store_var: {
714          struct value value = {
715             .is_ssa = true
716          };
717 
718          for (unsigned i = 0; i < intrin->num_components; i++)
719             value.ssa[i] = intrin->src[0].ssa;
720 
721          nir_deref_var *dst = intrin->variables[0];
722          unsigned wrmask = nir_intrinsic_write_mask(intrin);
723          struct copy_entry *entry =
724             get_entry_and_kill_aliases(state, dst, wrmask);
725          store_to_entry(state, entry, &value, wrmask, &intrin->instr);
726          break;
727       }
728 
729       case nir_intrinsic_copy_var: {
730          nir_deref_var *dst = intrin->variables[0];
731          nir_deref_var *src = intrin->variables[1];
732 
733          if (compare_derefs(src, dst) & derefs_equal_bit) {
734             /* This is a no-op self-copy.  Get rid of it */
735             nir_instr_remove(instr);
736             continue;
737          }
738 
739          mark_aliased_entries_as_read(state, src, 0xf);
740 
741          struct copy_entry *src_entry =
742             lookup_entry_for_deref(state, src, derefs_a_contains_b_bit);
743          struct value value;
744          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
745             if (value.is_ssa) {
746                nir_store_deref_var(b, dst, value.ssa[0], 0xf);
747                intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b));
748             } else {
749                /* If this would be a no-op self-copy, don't bother. */
750                if (compare_derefs(value.deref, dst) & derefs_equal_bit)
751                   continue;
752 
753                /* Just turn it into a copy of a different deref */
754                ralloc_steal(intrin, value.deref);
755                intrin->variables[1] = value.deref;
756 
757                /* Put it back in again. */
758                nir_builder_instr_insert(b, instr);
759             }
760 
761             state->progress = true;
762          } else {
763             value = (struct value) {
764                .is_ssa = false,
765                { .deref = src },
766             };
767          }
768 
769          struct copy_entry *dst_entry =
770             get_entry_and_kill_aliases(state, dst, 0xf);
771          store_to_entry(state, dst_entry, &value, 0xf, &intrin->instr);
772          break;
773       }
774 
775       default:
776          break;
777       }
778    }
779 }
780 
781 bool
nir_opt_copy_prop_vars(nir_shader * shader)782 nir_opt_copy_prop_vars(nir_shader *shader)
783 {
784    struct copy_prop_var_state state;
785 
786    state.shader = shader;
787    state.mem_ctx = ralloc_context(NULL);
788    list_inithead(&state.copies);
789    list_inithead(&state.copy_free_list);
790 
791    bool global_progress = false;
792    nir_foreach_function(function, shader) {
793       if (!function->impl)
794          continue;
795 
796       nir_builder b;
797       nir_builder_init(&b, function->impl);
798 
799       state.progress = false;
800       nir_foreach_block(block, function->impl)
801          copy_prop_vars_block(&state, &b, block);
802 
803       if (state.progress) {
804          nir_metadata_preserve(function->impl, nir_metadata_block_index |
805                                                nir_metadata_dominance);
806          global_progress = true;
807       }
808    }
809 
810    ralloc_free(state.mem_ctx);
811 
812    return global_progress;
813 }
814