1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_copy_propagation_elements.cpp
26  *
27  * Replaces usage of recently-copied components of variables with the
28  * previous copy of the variable.
29  *
30  * This pass can be compared with opt_copy_propagation, which operands
31  * on arbitrary whole-variable copies.  However, in order to handle
32  * the copy propagation of swizzled variables or writemasked writes,
33  * we want to track things on a channel-wise basis.  I found that
34  * trying to mix the swizzled/writemasked support here with the
35  * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
36  * so this is separate despite the ACP handling being somewhat
37  * similar.
38  *
39  * This should reduce the number of MOV instructions in the generated
40  * programs unless copy propagation is also done on the LIR, and may
41  * help anyway by triggering other optimizations that live in the HIR.
42  */
43 
44 #include "ir.h"
45 #include "ir_rvalue_visitor.h"
46 #include "ir_basic_block.h"
47 #include "ir_optimization.h"
48 #include "glsl_types.h"
49 
50 static bool debug = false;
51 
52 namespace {
53 
54 class acp_entry : public exec_node
55 {
56 public:
acp_entry(ir_variable * lhs,ir_variable * rhs,int write_mask,int swizzle[4])57    acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
58    {
59       this->lhs = lhs;
60       this->rhs = rhs;
61       this->write_mask = write_mask;
62       memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
63    }
64 
acp_entry(acp_entry * a)65    acp_entry(acp_entry *a)
66    {
67       this->lhs = a->lhs;
68       this->rhs = a->rhs;
69       this->write_mask = a->write_mask;
70       memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle));
71    }
72 
73    ir_variable *lhs;
74    ir_variable *rhs;
75    unsigned int write_mask;
76    int swizzle[4];
77 };
78 
79 
80 class kill_entry : public exec_node
81 {
82 public:
kill_entry(ir_variable * var,int write_mask)83    kill_entry(ir_variable *var, int write_mask)
84    {
85       this->var = var;
86       this->write_mask = write_mask;
87    }
88 
89    ir_variable *var;
90    unsigned int write_mask;
91 };
92 
93 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
94 public:
ir_copy_propagation_elements_visitor()95    ir_copy_propagation_elements_visitor()
96    {
97       this->progress = false;
98       this->killed_all = false;
99       this->mem_ctx = ralloc_context(NULL);
100       this->shader_mem_ctx = NULL;
101       this->acp = new(mem_ctx) exec_list;
102       this->kills = new(mem_ctx) exec_list;
103    }
~ir_copy_propagation_elements_visitor()104    ~ir_copy_propagation_elements_visitor()
105    {
106       ralloc_free(mem_ctx);
107    }
108 
109    virtual ir_visitor_status visit_enter(class ir_loop *);
110    virtual ir_visitor_status visit_enter(class ir_function_signature *);
111    virtual ir_visitor_status visit_leave(class ir_assignment *);
112    virtual ir_visitor_status visit_enter(class ir_call *);
113    virtual ir_visitor_status visit_enter(class ir_if *);
114    virtual ir_visitor_status visit_leave(class ir_swizzle *);
115 
116    void handle_rvalue(ir_rvalue **rvalue);
117 
118    void add_copy(ir_assignment *ir);
119    void kill(kill_entry *k);
120    void handle_if_block(exec_list *instructions);
121 
122    /** List of acp_entry: The available copies to propagate */
123    exec_list *acp;
124    /**
125     * List of kill_entry: The variables whose values were killed in this
126     * block.
127     */
128    exec_list *kills;
129 
130    bool progress;
131 
132    bool killed_all;
133 
134    /* Context for our local data structures. */
135    void *mem_ctx;
136    /* Context for allocating new shader nodes. */
137    void *shader_mem_ctx;
138 };
139 
140 } /* unnamed namespace */
141 
142 ir_visitor_status
visit_enter(ir_function_signature * ir)143 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
144 {
145    /* Treat entry into a function signature as a completely separate
146     * block.  Any instructions at global scope will be shuffled into
147     * main() at link time, so they're irrelevant to us.
148     */
149    exec_list *orig_acp = this->acp;
150    exec_list *orig_kills = this->kills;
151    bool orig_killed_all = this->killed_all;
152 
153    this->acp = new(mem_ctx) exec_list;
154    this->kills = new(mem_ctx) exec_list;
155    this->killed_all = false;
156 
157    visit_list_elements(this, &ir->body);
158 
159    this->kills = orig_kills;
160    this->acp = orig_acp;
161    this->killed_all = orig_killed_all;
162 
163    return visit_continue_with_parent;
164 }
165 
166 ir_visitor_status
visit_leave(ir_assignment * ir)167 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
168 {
169    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
170    ir_variable *var = ir->lhs->variable_referenced();
171 
172    if (var->type->is_scalar() || var->type->is_vector()) {
173       kill_entry *k;
174 
175       if (lhs)
176 	 k = new(mem_ctx) kill_entry(var, ir->write_mask);
177       else
178 	 k = new(mem_ctx) kill_entry(var, ~0);
179 
180       kill(k);
181    }
182 
183    add_copy(ir);
184 
185    return visit_continue;
186 }
187 
188 ir_visitor_status
visit_leave(ir_swizzle * ir)189 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *ir)
190 {
191    /* Don't visit the values of swizzles since they are handled while
192     * visiting the swizzle itself.
193     */
194    return visit_continue;
195 }
196 
197 /**
198  * Replaces dereferences of ACP RHS variables with ACP LHS variables.
199  *
200  * This is where the actual copy propagation occurs.  Note that the
201  * rewriting of ir_dereference means that the ir_dereference instance
202  * must not be shared by multiple IR operations!
203  */
204 void
handle_rvalue(ir_rvalue ** ir)205 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
206 {
207    int swizzle_chan[4];
208    ir_dereference_variable *deref_var;
209    ir_variable *source[4] = {NULL, NULL, NULL, NULL};
210    int source_chan[4];
211    int chans;
212 
213    if (!*ir)
214       return;
215 
216    ir_swizzle *swizzle = (*ir)->as_swizzle();
217    if (swizzle) {
218       deref_var = swizzle->val->as_dereference_variable();
219       if (!deref_var)
220 	 return;
221 
222       swizzle_chan[0] = swizzle->mask.x;
223       swizzle_chan[1] = swizzle->mask.y;
224       swizzle_chan[2] = swizzle->mask.z;
225       swizzle_chan[3] = swizzle->mask.w;
226       chans = swizzle->type->vector_elements;
227    } else {
228       deref_var = (*ir)->as_dereference_variable();
229       if (!deref_var)
230 	 return;
231 
232       swizzle_chan[0] = 0;
233       swizzle_chan[1] = 1;
234       swizzle_chan[2] = 2;
235       swizzle_chan[3] = 3;
236       chans = deref_var->type->vector_elements;
237    }
238 
239    if (this->in_assignee)
240       return;
241 
242    ir_variable *var = deref_var->var;
243 
244    /* Try to find ACP entries covering swizzle_chan[], hoping they're
245     * the same source variable.
246     */
247    foreach_iter(exec_list_iterator, iter, *this->acp) {
248       acp_entry *entry = (acp_entry *)iter.get();
249 
250       if (var == entry->lhs) {
251 	 for (int c = 0; c < chans; c++) {
252 	    if (entry->write_mask & (1 << swizzle_chan[c])) {
253 	       source[c] = entry->rhs;
254 	       source_chan[c] = entry->swizzle[swizzle_chan[c]];
255 	    }
256 	 }
257       }
258    }
259 
260    /* Make sure all channels are copying from the same source variable. */
261    if (!source[0])
262       return;
263    for (int c = 1; c < chans; c++) {
264       if (source[c] != source[0])
265 	 return;
266    }
267 
268    if (!shader_mem_ctx)
269       shader_mem_ctx = ralloc_parent(deref_var);
270 
271    if (debug) {
272       printf("Copy propagation from:\n");
273       (*ir)->print();
274    }
275 
276    deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
277    *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
278 					source_chan[0],
279 					source_chan[1],
280 					source_chan[2],
281 					source_chan[3],
282 					chans);
283 
284    if (debug) {
285       printf("to:\n");
286       (*ir)->print();
287       printf("\n");
288    }
289 }
290 
291 
292 ir_visitor_status
visit_enter(ir_call * ir)293 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
294 {
295    /* Do copy propagation on call parameters, but skip any out params */
296    exec_list_iterator sig_param_iter = ir->callee->parameters.iterator();
297    foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
298       ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
299       ir_instruction *ir = (ir_instruction *)iter.get();
300       if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
301          ir->accept(this);
302       }
303       sig_param_iter.next();
304    }
305 
306    /* Since we're unlinked, we don't (necessarily) know the side effects of
307     * this call.  So kill all copies.
308     */
309    acp->make_empty();
310    this->killed_all = true;
311 
312    return visit_continue_with_parent;
313 }
314 
315 void
handle_if_block(exec_list * instructions)316 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
317 {
318    exec_list *orig_acp = this->acp;
319    exec_list *orig_kills = this->kills;
320    bool orig_killed_all = this->killed_all;
321 
322    this->acp = new(mem_ctx) exec_list;
323    this->kills = new(mem_ctx) exec_list;
324    this->killed_all = false;
325 
326    /* Populate the initial acp with a copy of the original */
327    foreach_iter(exec_list_iterator, iter, *orig_acp) {
328       acp_entry *a = (acp_entry *)iter.get();
329       this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
330    }
331 
332    visit_list_elements(this, instructions);
333 
334    if (this->killed_all) {
335       orig_acp->make_empty();
336    }
337 
338    exec_list *new_kills = this->kills;
339    this->kills = orig_kills;
340    this->acp = orig_acp;
341    this->killed_all = this->killed_all || orig_killed_all;
342 
343    /* Move the new kills into the parent block's list, removing them
344     * from the parent's ACP list in the process.
345     */
346    foreach_list_safe(node, new_kills) {
347       kill_entry *k = (kill_entry *)node;
348       kill(k);
349    }
350 }
351 
352 ir_visitor_status
visit_enter(ir_if * ir)353 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
354 {
355    ir->condition->accept(this);
356 
357    handle_if_block(&ir->then_instructions);
358    handle_if_block(&ir->else_instructions);
359 
360    /* handle_if_block() already descended into the children. */
361    return visit_continue_with_parent;
362 }
363 
364 ir_visitor_status
visit_enter(ir_loop * ir)365 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
366 {
367    exec_list *orig_acp = this->acp;
368    exec_list *orig_kills = this->kills;
369    bool orig_killed_all = this->killed_all;
370 
371    /* FINISHME: For now, the initial acp for loops is totally empty.
372     * We could go through once, then go through again with the acp
373     * cloned minus the killed entries after the first run through.
374     */
375    this->acp = new(mem_ctx) exec_list;
376    this->kills = new(mem_ctx) exec_list;
377    this->killed_all = false;
378 
379    visit_list_elements(this, &ir->body_instructions);
380 
381    if (this->killed_all) {
382       orig_acp->make_empty();
383    }
384 
385    exec_list *new_kills = this->kills;
386    this->kills = orig_kills;
387    this->acp = orig_acp;
388    this->killed_all = this->killed_all || orig_killed_all;
389 
390    foreach_list_safe(node, new_kills) {
391       kill_entry *k = (kill_entry *)node;
392       kill(k);
393    }
394 
395    /* already descended into the children. */
396    return visit_continue_with_parent;
397 }
398 
399 /* Remove any entries currently in the ACP for this kill. */
400 void
kill(kill_entry * k)401 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
402 {
403    foreach_list_safe(node, acp) {
404       acp_entry *entry = (acp_entry *)node;
405 
406       if (entry->lhs == k->var) {
407 	 entry->write_mask = entry->write_mask & ~k->write_mask;
408 	 if (entry->write_mask == 0) {
409 	    entry->remove();
410 	    continue;
411 	 }
412       }
413       if (entry->rhs == k->var) {
414 	 entry->remove();
415       }
416    }
417 
418    /* If we were on a list, remove ourselves before inserting */
419    if (k->next)
420       k->remove();
421 
422    this->kills->push_tail(k);
423 }
424 
425 /**
426  * Adds directly-copied channels between vector variables to the available
427  * copy propagation list.
428  */
429 void
add_copy(ir_assignment * ir)430 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
431 {
432    acp_entry *entry;
433    int orig_swizzle[4] = {0, 1, 2, 3};
434    int swizzle[4];
435 
436    if (ir->condition)
437       return;
438 
439    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
440    if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
441       return;
442 
443    ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
444    if (!rhs) {
445       ir_swizzle *swiz = ir->rhs->as_swizzle();
446       if (!swiz)
447 	 return;
448 
449       rhs = swiz->val->as_dereference_variable();
450       if (!rhs)
451 	 return;
452 
453       orig_swizzle[0] = swiz->mask.x;
454       orig_swizzle[1] = swiz->mask.y;
455       orig_swizzle[2] = swiz->mask.z;
456       orig_swizzle[3] = swiz->mask.w;
457    }
458 
459    /* Move the swizzle channels out to the positions they match in the
460     * destination.  We don't want to have to rewrite the swizzle[]
461     * array every time we clear a bit of the write_mask.
462     */
463    int j = 0;
464    for (int i = 0; i < 4; i++) {
465       if (ir->write_mask & (1 << i))
466 	 swizzle[i] = orig_swizzle[j++];
467    }
468 
469    int write_mask = ir->write_mask;
470    if (lhs->var == rhs->var) {
471       /* If this is a copy from the variable to itself, then we need
472        * to be sure not to include the updated channels from this
473        * instruction in the set of new source channels to be
474        * copy-propagated from.
475        */
476       for (int i = 0; i < 4; i++) {
477 	 if (ir->write_mask & (1 << orig_swizzle[i]))
478 	    write_mask &= ~(1 << i);
479       }
480    }
481 
482    entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask,
483 					swizzle);
484    this->acp->push_tail(entry);
485 }
486 
487 bool
do_copy_propagation_elements(exec_list * instructions)488 do_copy_propagation_elements(exec_list *instructions)
489 {
490    ir_copy_propagation_elements_visitor v;
491 
492    visit_list_elements(&v, instructions);
493 
494    return v.progress;
495 }
496