1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_rebalance_tree.cpp
26  *
27  * Rebalances a reduction expression tree.
28  *
29  * For reduction operations (e.g., x + y + z + w) we generate an expression
30  * tree like
31  *
32  *        +
33  *       / \
34  *      +   w
35  *     / \
36  *    +   z
37  *   / \
38  *  x   y
39  *
40  * which we can rebalance into
41  *
42  *       +
43  *      / \
44  *     /   \
45  *    +     +
46  *   / \   / \
47  *  x   y z   w
48  *
49  * to get a better instruction scheduling.
50  *
51  * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
52  * and Bette L. Warren.
53  *
54  * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
55  * explanation of the of the tree_to_vine() (rightward rotation) and
56  * vine_to_tree() (leftward rotation) algorithms.
57  */
58 
59 #include "ir.h"
60 #include "ir_visitor.h"
61 #include "ir_rvalue_visitor.h"
62 #include "ir_optimization.h"
63 #include "main/macros.h" /* for MAX2 */
64 
65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in
66  * tree_to_vine(). We'd rather not leave a binary expression with only one
67  * operand, so trivial modifications (the ternary operators below) are needed
68  * to ensure that we only rotate around the ir_expression nodes of the tree.
69  */
70 static unsigned
tree_to_vine(ir_expression * root)71 tree_to_vine(ir_expression *root)
72 {
73    unsigned size = 0;
74    ir_rvalue *vine_tail = root;
75    ir_rvalue *remainder = root->operands[1];
76 
77    while (remainder != NULL) {
78       ir_expression *remainder_temp = remainder->as_expression();
79       ir_expression *remainder_left = remainder_temp ?
80          remainder_temp->operands[0]->as_expression() : NULL;
81 
82       if (remainder_left == NULL) {
83          /* move vine_tail down one */
84          vine_tail = remainder;
85          remainder = remainder->as_expression() ?
86             ((ir_expression *)remainder)->operands[1] : NULL;
87          size++;
88       } else {
89          /* rotate */
90          ir_expression *tempptr = remainder_left;
91          ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
92          tempptr->operands[1] = remainder;
93          remainder = tempptr;
94          ((ir_expression *)vine_tail)->operands[1] = tempptr;
95       }
96    }
97 
98    return size;
99 }
100 
101 static void
compression(ir_expression * root,unsigned count)102 compression(ir_expression *root, unsigned count)
103 {
104    ir_expression *scanner = root;
105 
106    for (unsigned i = 0; i < count; i++) {
107       ir_expression *child = (ir_expression *)scanner->operands[1];
108       scanner->operands[1] = child->operands[1];
109       scanner = (ir_expression *)scanner->operands[1];
110       child->operands[1] = scanner->operands[0];
111       scanner->operands[0] = child;
112    }
113 }
114 
115 static void
vine_to_tree(ir_expression * root,unsigned size)116 vine_to_tree(ir_expression *root, unsigned size)
117 {
118    int n = size - 1;
119    for (int m = n / 2; m > 0; m = n / 2) {
120       compression(root, m);
121       n -= m + 1;
122    }
123 }
124 
125 namespace {
126 
127 class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
128 public:
ir_rebalance_visitor()129    ir_rebalance_visitor()
130    {
131       progress = false;
132    }
133 
134    virtual ir_visitor_status visit_enter(ir_assignment *ir);
135 
136    void handle_rvalue(ir_rvalue **rvalue);
137 
138    bool progress;
139 };
140 
141 struct is_reduction_data {
142    ir_expression_operation operation;
143    const glsl_type *type;
144    unsigned num_expr;
145    bool is_reduction;
146    bool contains_constant;
147 };
148 
149 } /* anonymous namespace */
150 
151 ir_visitor_status
visit_enter(ir_assignment * ir)152 ir_rebalance_visitor::visit_enter(ir_assignment *ir)
153 {
154    ir_variable *var = ir->lhs->variable_referenced();
155    if (var->data.invariant || var->data.precise) {
156       /* If we're assigning to an invariant variable, just bail.  Tree
157        * rebalancing (reassociation) isn't precision-safe.
158        */
159       return visit_continue_with_parent;
160    } else {
161       return visit_continue;
162    }
163 }
164 
165 static bool
is_reduction_operation(ir_expression_operation operation)166 is_reduction_operation(ir_expression_operation operation)
167 {
168    switch (operation) {
169    case ir_binop_add:
170    case ir_binop_mul:
171    case ir_binop_bit_and:
172    case ir_binop_bit_xor:
173    case ir_binop_bit_or:
174    case ir_binop_logic_and:
175    case ir_binop_logic_xor:
176    case ir_binop_logic_or:
177    case ir_binop_min:
178    case ir_binop_max:
179       return true;
180    default:
181       return false;
182    }
183 }
184 
185 /* Note that this function does not attempt to recognize that reduction trees
186  * are already balanced.
187  *
188  * We return false from this function for a number of reasons other than an
189  * expression tree not being a mathematical reduction. Namely,
190  *
191  *    - if the tree contains multiple constants that we may be able to combine.
192  *    - if the tree contains matrices:
193  *       - they might contain vec4's with many constant components that we can
194  *         simplify after splitting.
195  *       - applying the matrix chain ordering optimization is more than just
196  *         balancing an expression tree.
197  *    - if the tree contains operations on multiple types.
198  *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
199  *      would trick the visiting pass.
200  */
201 static void
is_reduction(ir_instruction * ir,void * data)202 is_reduction(ir_instruction *ir, void *data)
203 {
204    struct is_reduction_data *ird = (struct is_reduction_data *)data;
205    if (!ird->is_reduction)
206       return;
207 
208    /* We don't want to balance a tree that contains multiple constants, since
209     * we'll be able to constant fold them if they're not in separate subtrees.
210     */
211    if (ir->as_constant()) {
212       if (ird->contains_constant) {
213          ird->is_reduction = false;
214       }
215       ird->contains_constant = true;
216       return;
217    }
218 
219    /* Array/record dereferences have subtrees that are not part of the expr
220     * tree we're balancing. Skip trees containing them.
221     */
222    if (ir->ir_type == ir_type_dereference_array ||
223        ir->ir_type == ir_type_dereference_record) {
224       ird->is_reduction = false;
225       return;
226    }
227 
228    ir_expression *expr = ir->as_expression();
229    if (!expr)
230       return;
231 
232    /* Non-constant matrices might still contain constant vec4 that we can
233     * constant fold once split up. Handling matrices will need some more
234     * work.
235     */
236    if (expr->type->is_matrix() ||
237        expr->operands[0]->type->is_matrix() ||
238        (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
239       ird->is_reduction = false;
240       return;
241    }
242 
243    if (ird->type != NULL && ird->type != expr->type) {
244       ird->is_reduction = false;
245       return;
246    }
247    ird->type = expr->type;
248 
249    ird->num_expr++;
250    if (is_reduction_operation(expr->operation)) {
251       if (ird->operation != 0 && ird->operation != expr->operation)
252          ird->is_reduction = false;
253       ird->operation = expr->operation;
254    } else {
255       ird->is_reduction = false;
256    }
257 }
258 
259 static ir_rvalue *
handle_expression(ir_expression * expr)260 handle_expression(ir_expression *expr)
261 {
262    struct is_reduction_data ird;
263    ird.operation = (ir_expression_operation)0;
264    ird.type = NULL;
265    ird.num_expr = 0;
266    ird.is_reduction = true;
267    ird.contains_constant = false;
268 
269    visit_tree(expr, is_reduction, (void *)&ird);
270 
271    if (ird.is_reduction && ird.num_expr > 2) {
272       ir_constant z = ir_constant(0.0f);
273       ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
274 
275       unsigned size = tree_to_vine(&pseudo_root);
276       vine_to_tree(&pseudo_root, size);
277 
278       expr = (ir_expression *)pseudo_root.operands[1];
279    }
280    return expr;
281 }
282 
283 static void
update_types(ir_instruction * ir,void *)284 update_types(ir_instruction *ir, void *)
285 {
286    ir_expression *expr = ir->as_expression();
287    if (!expr)
288       return;
289 
290    const glsl_type *const new_type =
291       glsl_type::get_instance(expr->type->base_type,
292                               MAX2(expr->operands[0]->type->vector_elements,
293                                    expr->operands[1]->type->vector_elements),
294                               1);
295    assert(new_type != glsl_type::error_type);
296    expr->type = new_type;
297 }
298 
299 void
handle_rvalue(ir_rvalue ** rvalue)300 ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
301 {
302    if (!*rvalue)
303       return;
304 
305    ir_expression *expr = (*rvalue)->as_expression();
306    if (!expr || !is_reduction_operation(expr->operation))
307       return;
308 
309    ir_rvalue *new_rvalue = handle_expression(expr);
310 
311    /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
312     * or some other set of cases) new_rvalue will point to the same root as
313     * before.
314     *
315     * Similarly, if the tree rooted at *rvalue was a reduction and was already
316     * balanced, the algorithm will rearrange the tree but will ultimately
317     * return an identical tree, so this check will handle that as well and
318     * will not set progress = true.
319     */
320    if (new_rvalue == *rvalue)
321       return;
322 
323    visit_tree(new_rvalue, NULL, NULL, update_types);
324 
325    *rvalue = new_rvalue;
326    this->progress = true;
327 }
328 
329 bool
do_rebalance_tree(exec_list * instructions)330 do_rebalance_tree(exec_list *instructions)
331 {
332    ir_rebalance_visitor v;
333 
334    v.run(instructions);
335 
336    return v.progress;
337 }
338