1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_copy_propagation_elements.cpp
26  *
27  * Replaces usage of recently-copied components of variables with the
28  * previous copy of the variable.
29  *
30  * This should reduce the number of MOV instructions in the generated
31  * programs and help triggering other optimizations that live in GLSL
32  * level.
33  */
34 
35 #include "ir.h"
36 #include "ir_rvalue_visitor.h"
37 #include "ir_basic_block.h"
38 #include "ir_optimization.h"
39 #include "compiler/glsl_types.h"
40 #include "util/hash_table.h"
41 #include "util/set.h"
42 
43 static bool debug = false;
44 
45 namespace {
46 
47 class acp_entry
48 {
49 public:
50    DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
51 
52    /* If set, rhs_full indicates that this ACP entry represents a
53     * whole-variable copy.  The rhs_element[] array will still be filled,
54     * to allow the swizzling from its components in case the variable
55     * was a vector (and to simplify some of the erase() and write_vector()
56     * logic).
57     */
58 
59    ir_variable *rhs_full;
60    ir_variable *rhs_element[4];
61    unsigned rhs_channel[4];
62 
63    /* Set of variables that use the variable associated with this acp_entry as
64     * RHS.  This has the "reverse references" of rhs_full/rhs_element.  It is
65     * used to speed up invalidating those references when the acp_entry
66     * changes.
67     */
68    set *dsts;
69 };
70 
71 class copy_propagation_state {
72 public:
73    DECLARE_RZALLOC_CXX_OPERATORS(copy_propagation_state);
74 
75    static
create(void * mem_ctx)76    copy_propagation_state* create(void *mem_ctx)
77    {
78       return new (mem_ctx) copy_propagation_state(NULL);
79    }
80 
clone()81    copy_propagation_state* clone()
82    {
83       return new (ralloc_parent(this)) copy_propagation_state(this);
84    }
85 
erase_all()86    void erase_all()
87    {
88       /* Individual elements were allocated from a linear allocator, so will
89        * be destroyed when the state is destroyed.
90        */
91       _mesa_hash_table_clear(acp, NULL);
92       fallback = NULL;
93    }
94 
erase(ir_variable * var,unsigned write_mask)95    void erase(ir_variable *var, unsigned write_mask)
96    {
97       acp_entry *entry = pull_acp(var);
98       entry->rhs_full = NULL;
99 
100       for (int i = 0; i < 4; i++) {
101          if (!entry->rhs_element[i])
102             continue;
103          if ((write_mask & (1 << i)) == 0)
104             continue;
105 
106          ir_variable *to_remove = entry->rhs_element[i];
107          entry->rhs_element[i] = NULL;
108          remove_unused_var_from_dsts(entry, var, to_remove);
109       }
110 
111       /* TODO: Check write mask, and possibly not clear everything. */
112 
113       /* For any usage of our variable on the RHS, clear it out. */
114       set_foreach(entry->dsts, set_entry) {
115          ir_variable *dst_var = (ir_variable *)set_entry->key;
116          acp_entry *dst_entry = pull_acp(dst_var);
117          for (int i = 0; i < 4; i++) {
118             if (dst_entry->rhs_element[i] == var)
119                dst_entry->rhs_element[i] = NULL;
120          }
121          if (dst_entry->rhs_full == var)
122             dst_entry->rhs_full = NULL;
123          _mesa_set_remove(entry->dsts, set_entry);
124       }
125    }
126 
read(ir_variable * var)127    acp_entry *read(ir_variable *var)
128    {
129       for (copy_propagation_state *s = this; s != NULL; s = s->fallback) {
130          hash_entry *ht_entry = _mesa_hash_table_search(s->acp, var);
131          if (ht_entry)
132             return (acp_entry *) ht_entry->data;
133       }
134       return NULL;
135    }
136 
write_elements(ir_variable * lhs,ir_variable * rhs,unsigned write_mask,int swizzle[4])137    void write_elements(ir_variable *lhs, ir_variable *rhs, unsigned write_mask, int swizzle[4])
138    {
139       acp_entry *lhs_entry = pull_acp(lhs);
140       lhs_entry->rhs_full = NULL;
141 
142       for (int i = 0; i < 4; i++) {
143          if ((write_mask & (1 << i)) == 0)
144             continue;
145          ir_variable *to_remove = lhs_entry->rhs_element[i];
146          lhs_entry->rhs_element[i] = rhs;
147          lhs_entry->rhs_channel[i] = swizzle[i];
148 
149          remove_unused_var_from_dsts(lhs_entry, lhs, to_remove);
150       }
151 
152       acp_entry *rhs_entry = pull_acp(rhs);
153       _mesa_set_add(rhs_entry->dsts, lhs);
154    }
155 
write_full(ir_variable * lhs,ir_variable * rhs)156    void write_full(ir_variable *lhs, ir_variable *rhs)
157    {
158       acp_entry *lhs_entry = pull_acp(lhs);
159       if (lhs_entry->rhs_full == rhs)
160          return;
161 
162       if (lhs_entry->rhs_full) {
163          remove_from_dsts(lhs_entry->rhs_full, lhs);
164       } else if (lhs->type->is_vector()) {
165          for (int i = 0; i < 4; i++) {
166             if (lhs_entry->rhs_element[i])
167                remove_from_dsts(lhs_entry->rhs_element[i], lhs);
168          }
169       }
170 
171       lhs_entry->rhs_full = rhs;
172       acp_entry *rhs_entry = pull_acp(rhs);
173       _mesa_set_add(rhs_entry->dsts, lhs);
174 
175       if (lhs->type->is_vector()) {
176          for (int i = 0; i < 4; i++) {
177             lhs_entry->rhs_element[i] = rhs;
178             lhs_entry->rhs_channel[i] = i;
179          }
180       }
181    }
182 
remove_unused_var_from_dsts(acp_entry * lhs_entry,ir_variable * lhs,ir_variable * var)183    void remove_unused_var_from_dsts(acp_entry *lhs_entry, ir_variable *lhs, ir_variable *var)
184    {
185       if (!var)
186          return;
187 
188       /* If lhs still uses var, don't remove anything. */
189       for (int j = 0; j < 4; j++) {
190          if (lhs_entry->rhs_element[j] == var)
191             return;
192       }
193 
194       acp_entry *element = pull_acp(var);
195       assert(element);
196       _mesa_set_remove_key(element->dsts, lhs);
197    }
198 
199 private:
copy_propagation_state(copy_propagation_state * fallback)200    explicit copy_propagation_state(copy_propagation_state *fallback)
201    {
202       this->fallback = fallback;
203       /* Use 'this' as context for the table, no explicit destruction
204        * needed later.
205        */
206       acp = _mesa_pointer_hash_table_create(this);
207       lin_ctx = linear_alloc_parent(this, 0);
208    }
209 
pull_acp(ir_variable * var)210    acp_entry *pull_acp(ir_variable *var)
211    {
212       hash_entry *ht_entry = _mesa_hash_table_search(acp, var);
213       if (ht_entry)
214          return (acp_entry *) ht_entry->data;
215 
216       /* If not found, create one and copy data from fallback if available. */
217       acp_entry *entry = new(lin_ctx) acp_entry();
218       _mesa_hash_table_insert(acp, var, entry);
219 
220       bool found = false;
221       for (copy_propagation_state *s = fallback; s != NULL; s = s->fallback) {
222          hash_entry *fallback_ht_entry = _mesa_hash_table_search(s->acp, var);
223          if (fallback_ht_entry) {
224             acp_entry *fallback_entry = (acp_entry *) fallback_ht_entry->data;
225             *entry = *fallback_entry;
226             entry->dsts = _mesa_set_clone(fallback_entry->dsts, this);
227             found = true;
228             break;
229          }
230       }
231 
232       if (!found) {
233          entry->dsts = _mesa_pointer_set_create(this);
234       }
235 
236       return entry;
237    }
238 
239    void
remove_from_dsts(ir_variable * var,ir_variable * to_remove)240    remove_from_dsts(ir_variable *var, ir_variable *to_remove)
241    {
242       acp_entry *entry = pull_acp(var);
243       assert(entry);
244       _mesa_set_remove_key(entry->dsts, to_remove);
245    }
246 
247    /** Available Copy to Propagate table, from variable to the entry
248     *  containing the current sources that can be used. */
249    hash_table *acp;
250 
251    /** When a state is cloned, entries are copied on demand from fallback. */
252    copy_propagation_state *fallback;
253 
254    void *lin_ctx;
255 };
256 
257 class kill_entry : public exec_node
258 {
259 public:
260    /* override operator new from exec_node */
261    DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
262 
kill_entry(ir_variable * var,int write_mask)263    kill_entry(ir_variable *var, int write_mask)
264    {
265       this->var = var;
266       this->write_mask = write_mask;
267    }
268 
269    ir_variable *var;
270    unsigned int write_mask;
271 };
272 
273 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
274 public:
ir_copy_propagation_elements_visitor()275    ir_copy_propagation_elements_visitor()
276    {
277       this->progress = false;
278       this->killed_all = false;
279       this->mem_ctx = ralloc_context(NULL);
280       this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
281       this->shader_mem_ctx = NULL;
282       this->kills = new(mem_ctx) exec_list;
283       this->state = copy_propagation_state::create(mem_ctx);
284    }
~ir_copy_propagation_elements_visitor()285    ~ir_copy_propagation_elements_visitor()
286    {
287       ralloc_free(mem_ctx);
288    }
289 
290    virtual ir_visitor_status visit(ir_dereference_variable *);
291 
292    void handle_loop(ir_loop *, bool keep_acp);
293    virtual ir_visitor_status visit_enter(class ir_loop *);
294    virtual ir_visitor_status visit_enter(class ir_function_signature *);
295    virtual ir_visitor_status visit_leave(class ir_assignment *);
296    virtual ir_visitor_status visit_enter(class ir_call *);
297    virtual ir_visitor_status visit_enter(class ir_if *);
298    virtual ir_visitor_status visit_leave(class ir_swizzle *);
299 
300    void handle_rvalue(ir_rvalue **rvalue);
301 
302    void add_copy(ir_assignment *ir);
303    void kill(kill_entry *k);
304    void handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all);
305 
306    copy_propagation_state *state;
307 
308    /**
309     * List of kill_entry: The variables whose values were killed in this
310     * block.
311     */
312    exec_list *kills;
313 
314    bool progress;
315 
316    bool killed_all;
317 
318    /* Context for our local data structures. */
319    void *mem_ctx;
320    void *lin_ctx;
321    /* Context for allocating new shader nodes. */
322    void *shader_mem_ctx;
323 };
324 
325 } /* unnamed namespace */
326 
327 ir_visitor_status
visit(ir_dereference_variable * ir)328 ir_copy_propagation_elements_visitor::visit(ir_dereference_variable *ir)
329 {
330    if (this->in_assignee)
331       return visit_continue;
332 
333    const acp_entry *entry = state->read(ir->var);
334    if (entry && entry->rhs_full) {
335       ir->var = (ir_variable *) entry->rhs_full;
336       progress = true;
337    }
338 
339    return visit_continue;
340 }
341 
342 ir_visitor_status
visit_enter(ir_function_signature * ir)343 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
344 {
345    /* Treat entry into a function signature as a completely separate
346     * block.  Any instructions at global scope will be shuffled into
347     * main() at link time, so they're irrelevant to us.
348     */
349    exec_list *orig_kills = this->kills;
350    bool orig_killed_all = this->killed_all;
351 
352    this->kills = new(mem_ctx) exec_list;
353    this->killed_all = false;
354 
355    copy_propagation_state *orig_state = state;
356    this->state = copy_propagation_state::create(mem_ctx);
357 
358    visit_list_elements(this, &ir->body);
359 
360    delete this->state;
361    this->state = orig_state;
362 
363    ralloc_free(this->kills);
364    this->kills = orig_kills;
365    this->killed_all = orig_killed_all;
366 
367    return visit_continue_with_parent;
368 }
369 
370 ir_visitor_status
visit_leave(ir_assignment * ir)371 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
372 {
373    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
374    ir_variable *var = ir->lhs->variable_referenced();
375 
376    kill_entry *k;
377 
378    if (lhs && var->type->is_vector())
379       k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
380    else
381       k = new(this->lin_ctx) kill_entry(var, ~0);
382 
383    kill(k);
384 
385    add_copy(ir);
386 
387    return visit_continue;
388 }
389 
390 ir_visitor_status
visit_leave(ir_swizzle *)391 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
392 {
393    /* Don't visit the values of swizzles since they are handled while
394     * visiting the swizzle itself.
395     */
396    return visit_continue;
397 }
398 
399 /**
400  * Replaces dereferences of ACP RHS variables with ACP LHS variables.
401  *
402  * This is where the actual copy propagation occurs.  Note that the
403  * rewriting of ir_dereference means that the ir_dereference instance
404  * must not be shared by multiple IR operations!
405  */
406 void
handle_rvalue(ir_rvalue ** ir)407 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
408 {
409    int swizzle_chan[4];
410    ir_dereference_variable *deref_var;
411    ir_variable *source[4] = {NULL, NULL, NULL, NULL};
412    int source_chan[4] = {0, 0, 0, 0};
413    int chans;
414    bool noop_swizzle = true;
415 
416    if (!*ir)
417       return;
418 
419    ir_swizzle *swizzle = (*ir)->as_swizzle();
420    if (swizzle) {
421       deref_var = swizzle->val->as_dereference_variable();
422       if (!deref_var)
423 	 return;
424 
425       swizzle_chan[0] = swizzle->mask.x;
426       swizzle_chan[1] = swizzle->mask.y;
427       swizzle_chan[2] = swizzle->mask.z;
428       swizzle_chan[3] = swizzle->mask.w;
429       chans = swizzle->type->vector_elements;
430    } else {
431       deref_var = (*ir)->as_dereference_variable();
432       if (!deref_var)
433 	 return;
434 
435       swizzle_chan[0] = 0;
436       swizzle_chan[1] = 1;
437       swizzle_chan[2] = 2;
438       swizzle_chan[3] = 3;
439       chans = deref_var->type->vector_elements;
440    }
441 
442    if (this->in_assignee)
443       return;
444 
445    ir_variable *var = deref_var->var;
446 
447    /* Try to find ACP entries covering swizzle_chan[], hoping they're
448     * the same source variable.
449     */
450 
451    const acp_entry *entry = state->read(var);
452    if (entry) {
453       for (int c = 0; c < chans; c++) {
454          unsigned index = swizzle_chan[c];
455          ir_variable *src = entry->rhs_element[index];
456          if (!src)
457             continue;
458          source[c] = src;
459          source_chan[c] = entry->rhs_channel[index];
460          if (source_chan[c] != swizzle_chan[c])
461             noop_swizzle = false;
462       }
463    }
464 
465    /* Make sure all channels are copying from the same source variable. */
466    if (!source[0])
467       return;
468    for (int c = 1; c < chans; c++) {
469       if (source[c] != source[0])
470 	 return;
471    }
472 
473    if (!shader_mem_ctx)
474       shader_mem_ctx = ralloc_parent(deref_var);
475 
476    /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
477     * of itself, which would just be deleted by opt_noop_swizzle).
478     */
479    if (source[0] == var && noop_swizzle)
480       return;
481 
482    if (debug) {
483       printf("Copy propagation from:\n");
484       (*ir)->print();
485    }
486 
487    deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
488    *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
489 					source_chan[0],
490 					source_chan[1],
491 					source_chan[2],
492 					source_chan[3],
493 					chans);
494    progress = true;
495 
496    if (debug) {
497       printf("to:\n");
498       (*ir)->print();
499       printf("\n");
500    }
501 }
502 
503 
504 ir_visitor_status
visit_enter(ir_call * ir)505 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
506 {
507    /* Do copy propagation on call parameters, but skip any out params */
508    foreach_two_lists(formal_node, &ir->callee->parameters,
509                      actual_node, &ir->actual_parameters) {
510       ir_variable *sig_param = (ir_variable *) formal_node;
511       ir_rvalue *ir = (ir_rvalue *) actual_node;
512       if (sig_param->data.mode != ir_var_function_out
513           && sig_param->data.mode != ir_var_function_inout) {
514          ir->accept(this);
515       }
516    }
517 
518    if (!ir->callee->is_intrinsic()) {
519       state->erase_all();
520       this->killed_all = true;
521    } else {
522       if (ir->return_deref) {
523          kill(new(this->lin_ctx) kill_entry(ir->return_deref->var, ~0));
524       }
525 
526       foreach_two_lists(formal_node, &ir->callee->parameters,
527                         actual_node, &ir->actual_parameters) {
528          ir_variable *sig_param = (ir_variable *) formal_node;
529          if (sig_param->data.mode == ir_var_function_out ||
530              sig_param->data.mode == ir_var_function_inout) {
531             ir_rvalue *ir = (ir_rvalue *) actual_node;
532             ir_variable *var = ir->variable_referenced();
533             kill(new(this->lin_ctx) kill_entry(var, ~0));
534          }
535       }
536    }
537 
538    return visit_continue_with_parent;
539 }
540 
541 void
handle_if_block(exec_list * instructions,exec_list * kills,bool * killed_all)542 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions, exec_list *kills, bool *killed_all)
543 {
544    exec_list *orig_kills = this->kills;
545    bool orig_killed_all = this->killed_all;
546 
547    this->kills = kills;
548    this->killed_all = false;
549 
550    /* Populate the initial acp with a copy of the original */
551    copy_propagation_state *orig_state = state;
552    this->state = orig_state->clone();
553 
554    visit_list_elements(this, instructions);
555 
556    delete this->state;
557    this->state = orig_state;
558 
559    *killed_all = this->killed_all;
560    this->kills = orig_kills;
561    this->killed_all = orig_killed_all;
562 }
563 
564 ir_visitor_status
visit_enter(ir_if * ir)565 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
566 {
567    ir->condition->accept(this);
568 
569    exec_list *new_kills = new(mem_ctx) exec_list;
570    bool then_killed_all = false;
571    bool else_killed_all = false;
572 
573    handle_if_block(&ir->then_instructions, new_kills, &then_killed_all);
574    handle_if_block(&ir->else_instructions, new_kills, &else_killed_all);
575 
576    if (then_killed_all || else_killed_all) {
577       state->erase_all();
578       killed_all = true;
579    } else {
580       foreach_in_list_safe(kill_entry, k, new_kills)
581          kill(k);
582    }
583 
584    ralloc_free(new_kills);
585 
586    /* handle_if_block() already descended into the children. */
587    return visit_continue_with_parent;
588 }
589 
590 void
handle_loop(ir_loop * ir,bool keep_acp)591 ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
592 {
593    exec_list *orig_kills = this->kills;
594    bool orig_killed_all = this->killed_all;
595 
596    this->kills = new(mem_ctx) exec_list;
597    this->killed_all = false;
598 
599    copy_propagation_state *orig_state = state;
600 
601    if (keep_acp) {
602       /* Populate the initial acp with a copy of the original */
603       this->state = orig_state->clone();
604    } else {
605       this->state = copy_propagation_state::create(mem_ctx);
606    }
607 
608    visit_list_elements(this, &ir->body_instructions);
609 
610    delete this->state;
611    this->state = orig_state;
612 
613    if (this->killed_all)
614       this->state->erase_all();
615 
616    exec_list *new_kills = this->kills;
617    this->kills = orig_kills;
618    this->killed_all = this->killed_all || orig_killed_all;
619 
620    foreach_in_list_safe(kill_entry, k, new_kills) {
621       kill(k);
622    }
623 
624    ralloc_free(new_kills);
625 }
626 
627 ir_visitor_status
visit_enter(ir_loop * ir)628 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
629 {
630    handle_loop(ir, false);
631    handle_loop(ir, true);
632 
633    /* already descended into the children. */
634    return visit_continue_with_parent;
635 }
636 
637 /* Remove any entries currently in the ACP for this kill. */
638 void
kill(kill_entry * k)639 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
640 {
641    state->erase(k->var, k->write_mask);
642 
643    /* If we were on a list, remove ourselves before inserting */
644    if (k->next)
645       k->remove();
646 
647    this->kills->push_tail(k);
648 }
649 
650 /**
651  * Adds directly-copied channels between vector variables to the available
652  * copy propagation list.
653  */
654 void
add_copy(ir_assignment * ir)655 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
656 {
657    if (ir->condition)
658       return;
659 
660    {
661       ir_variable *lhs_var = ir->whole_variable_written();
662       ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
663 
664       if (lhs_var != NULL && rhs && rhs->var != NULL && lhs_var != rhs->var) {
665          if (lhs_var->data.mode == ir_var_shader_storage ||
666              lhs_var->data.mode == ir_var_shader_shared ||
667              rhs->var->data.mode == ir_var_shader_storage ||
668              rhs->var->data.mode == ir_var_shader_shared ||
669              lhs_var->data.precise != rhs->var->data.precise) {
670             return;
671          }
672          state->write_full(lhs_var, rhs->var);
673          return;
674       }
675    }
676 
677    int orig_swizzle[4] = {0, 1, 2, 3};
678    int swizzle[4];
679 
680    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
681    if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
682       return;
683 
684    if (lhs->var->data.mode == ir_var_shader_storage ||
685        lhs->var->data.mode == ir_var_shader_shared)
686       return;
687 
688    ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
689    if (!rhs) {
690       ir_swizzle *swiz = ir->rhs->as_swizzle();
691       if (!swiz)
692 	 return;
693 
694       rhs = swiz->val->as_dereference_variable();
695       if (!rhs)
696 	 return;
697 
698       orig_swizzle[0] = swiz->mask.x;
699       orig_swizzle[1] = swiz->mask.y;
700       orig_swizzle[2] = swiz->mask.z;
701       orig_swizzle[3] = swiz->mask.w;
702    }
703 
704    if (rhs->var->data.mode == ir_var_shader_storage ||
705        rhs->var->data.mode == ir_var_shader_shared)
706       return;
707 
708    /* Move the swizzle channels out to the positions they match in the
709     * destination.  We don't want to have to rewrite the swizzle[]
710     * array every time we clear a bit of the write_mask.
711     */
712    int j = 0;
713    for (int i = 0; i < 4; i++) {
714       if (ir->write_mask & (1 << i))
715 	 swizzle[i] = orig_swizzle[j++];
716    }
717 
718    int write_mask = ir->write_mask;
719    if (lhs->var == rhs->var) {
720       /* If this is a copy from the variable to itself, then we need
721        * to be sure not to include the updated channels from this
722        * instruction in the set of new source channels to be
723        * copy-propagated from.
724        */
725       for (int i = 0; i < 4; i++) {
726 	 if (ir->write_mask & (1 << orig_swizzle[i]))
727 	    write_mask &= ~(1 << i);
728       }
729    }
730 
731    if (lhs->var->data.precise != rhs->var->data.precise)
732       return;
733 
734    state->write_elements(lhs->var, rhs->var, write_mask, swizzle);
735 }
736 
737 bool
do_copy_propagation_elements(exec_list * instructions)738 do_copy_propagation_elements(exec_list *instructions)
739 {
740    ir_copy_propagation_elements_visitor v;
741 
742    visit_list_elements(&v, instructions);
743 
744    return v.progress;
745 }
746