1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_minmax.cpp
26  *
27  * Drop operands from an expression tree of only min/max operations if they
28  * can be proven to not contribute to the final result.
29  *
30  * The algorithm is similar to alpha-beta pruning on a minmax search.
31  */
32 
33 #include "ir.h"
34 #include "ir_visitor.h"
35 #include "ir_rvalue_visitor.h"
36 #include "ir_optimization.h"
37 #include "ir_builder.h"
38 #include "program/prog_instruction.h"
39 #include "compiler/glsl_types.h"
40 #include "main/macros.h"
41 
42 using namespace ir_builder;
43 
44 namespace {
45 
46 enum compare_components_result {
47    LESS,
48    LESS_OR_EQUAL,
49    EQUAL,
50    GREATER_OR_EQUAL,
51    GREATER,
52    MIXED
53 };
54 
55 class minmax_range {
56 public:
minmax_range(ir_constant * low=NULL,ir_constant * high=NULL)57    minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
58    {
59       this->low = low;
60       this->high = high;
61    }
62 
63    /* low is the lower limit of the range, high is the higher limit. NULL on
64     * low means negative infinity (unlimited) and on high positive infinity
65     * (unlimited). Because of the two interpretations of the value NULL,
66     * arbitrary comparison between ir_constants is impossible.
67     */
68    ir_constant *low;
69    ir_constant *high;
70 };
71 
72 class ir_minmax_visitor : public ir_rvalue_enter_visitor {
73 public:
ir_minmax_visitor()74    ir_minmax_visitor()
75       : progress(false)
76    {
77    }
78 
79    ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
80 
81    void handle_rvalue(ir_rvalue **rvalue);
82 
83    bool progress;
84 };
85 
86 /*
87  * Returns LESS if all vector components of `a' are strictly lower than of `b',
88  * GREATER if all vector components of `a' are strictly greater than of `b',
89  * MIXED if some vector components of `a' are strictly lower than of `b' while
90  * others are strictly greater, or EQUAL otherwise.
91  */
92 static enum compare_components_result
compare_components(ir_constant * a,ir_constant * b)93 compare_components(ir_constant *a, ir_constant *b)
94 {
95    assert(a != NULL);
96    assert(b != NULL);
97 
98    assert(a->type->base_type == b->type->base_type);
99 
100    unsigned a_inc = a->type->is_scalar() ? 0 : 1;
101    unsigned b_inc = b->type->is_scalar() ? 0 : 1;
102    unsigned components = MAX2(a->type->components(), b->type->components());
103 
104    bool foundless = false;
105    bool foundgreater = false;
106    bool foundequal = false;
107 
108    for (unsigned i = 0, c0 = 0, c1 = 0;
109         i < components;
110         c0 += a_inc, c1 += b_inc, ++i) {
111       switch (a->type->base_type) {
112       case GLSL_TYPE_UINT:
113          if (a->value.u[c0] < b->value.u[c1])
114             foundless = true;
115          else if (a->value.u[c0] > b->value.u[c1])
116             foundgreater = true;
117          else
118             foundequal = true;
119          break;
120       case GLSL_TYPE_INT:
121          if (a->value.i[c0] < b->value.i[c1])
122             foundless = true;
123          else if (a->value.i[c0] > b->value.i[c1])
124             foundgreater = true;
125          else
126             foundequal = true;
127          break;
128       case GLSL_TYPE_FLOAT:
129          if (a->value.f[c0] < b->value.f[c1])
130             foundless = true;
131          else if (a->value.f[c0] > b->value.f[c1])
132             foundgreater = true;
133          else
134             foundequal = true;
135          break;
136       case GLSL_TYPE_DOUBLE:
137          if (a->value.d[c0] < b->value.d[c1])
138             foundless = true;
139          else if (a->value.d[c0] > b->value.d[c1])
140             foundgreater = true;
141          else
142             foundequal = true;
143          break;
144       default:
145          unreachable("not reached");
146       }
147    }
148 
149    if (foundless && foundgreater) {
150       /* Some components are strictly lower, others are strictly greater */
151       return MIXED;
152    }
153 
154    if (foundequal) {
155        /* It is not mixed, but it is not strictly lower or greater */
156       if (foundless)
157          return LESS_OR_EQUAL;
158       if (foundgreater)
159          return GREATER_OR_EQUAL;
160       return EQUAL;
161    }
162 
163    /* All components are strictly lower or strictly greater */
164    return foundless ? LESS : GREATER;
165 }
166 
167 static ir_constant *
combine_constant(bool ismin,ir_constant * a,ir_constant * b)168 combine_constant(bool ismin, ir_constant *a, ir_constant *b)
169 {
170    void *mem_ctx = ralloc_parent(a);
171    ir_constant *c = a->clone(mem_ctx, NULL);
172    for (unsigned i = 0; i < c->type->components(); i++) {
173       switch (c->type->base_type) {
174       case GLSL_TYPE_UINT:
175          if ((ismin && b->value.u[i] < c->value.u[i]) ||
176              (!ismin && b->value.u[i] > c->value.u[i]))
177             c->value.u[i] = b->value.u[i];
178          break;
179       case GLSL_TYPE_INT:
180          if ((ismin && b->value.i[i] < c->value.i[i]) ||
181              (!ismin && b->value.i[i] > c->value.i[i]))
182             c->value.i[i] = b->value.i[i];
183          break;
184       case GLSL_TYPE_FLOAT:
185          if ((ismin && b->value.f[i] < c->value.f[i]) ||
186              (!ismin && b->value.f[i] > c->value.f[i]))
187             c->value.f[i] = b->value.f[i];
188          break;
189       case GLSL_TYPE_DOUBLE:
190          if ((ismin && b->value.d[i] < c->value.d[i]) ||
191              (!ismin && b->value.d[i] > c->value.d[i]))
192             c->value.d[i] = b->value.d[i];
193          break;
194       default:
195          assert(!"not reached");
196       }
197    }
198    return c;
199 }
200 
201 static ir_constant *
smaller_constant(ir_constant * a,ir_constant * b)202 smaller_constant(ir_constant *a, ir_constant *b)
203 {
204    assert(a != NULL);
205    assert(b != NULL);
206 
207    enum compare_components_result ret = compare_components(a, b);
208    if (ret == MIXED)
209       return combine_constant(true, a, b);
210    else if (ret < EQUAL)
211       return a;
212    else
213       return b;
214 }
215 
216 static ir_constant *
larger_constant(ir_constant * a,ir_constant * b)217 larger_constant(ir_constant *a, ir_constant *b)
218 {
219    assert(a != NULL);
220    assert(b != NULL);
221 
222    enum compare_components_result ret = compare_components(a, b);
223    if (ret == MIXED)
224       return combine_constant(false, a, b);
225    else if (ret < EQUAL)
226       return b;
227    else
228       return a;
229 }
230 
231 /* Combines two ranges by doing an element-wise min() / max() depending on the
232  * operation.
233  */
234 static minmax_range
combine_range(minmax_range r0,minmax_range r1,bool ismin)235 combine_range(minmax_range r0, minmax_range r1, bool ismin)
236 {
237    minmax_range ret;
238 
239    if (!r0.low) {
240       ret.low = ismin ? r0.low : r1.low;
241    } else if (!r1.low) {
242       ret.low = ismin ? r1.low : r0.low;
243    } else {
244       ret.low = ismin ? smaller_constant(r0.low, r1.low) :
245          larger_constant(r0.low, r1.low);
246    }
247 
248    if (!r0.high) {
249       ret.high = ismin ? r1.high : r0.high;
250    } else if (!r1.high) {
251       ret.high = ismin ? r0.high : r1.high;
252    } else {
253       ret.high = ismin ? smaller_constant(r0.high, r1.high) :
254          larger_constant(r0.high, r1.high);
255    }
256 
257    return ret;
258 }
259 
260 /* Returns a range so that lower limit is the larger of the two lower limits,
261  * and higher limit is the smaller of the two higher limits.
262  */
263 static minmax_range
range_intersection(minmax_range r0,minmax_range r1)264 range_intersection(minmax_range r0, minmax_range r1)
265 {
266    minmax_range ret;
267 
268    if (!r0.low)
269       ret.low = r1.low;
270    else if (!r1.low)
271       ret.low = r0.low;
272    else
273       ret.low = larger_constant(r0.low, r1.low);
274 
275    if (!r0.high)
276       ret.high = r1.high;
277    else if (!r1.high)
278       ret.high = r0.high;
279    else
280       ret.high = smaller_constant(r0.high, r1.high);
281 
282    return ret;
283 }
284 
285 static minmax_range
get_range(ir_rvalue * rval)286 get_range(ir_rvalue *rval)
287 {
288    ir_expression *expr = rval->as_expression();
289    if (expr && (expr->operation == ir_binop_min ||
290                 expr->operation == ir_binop_max)) {
291       minmax_range r0 = get_range(expr->operands[0]);
292       minmax_range r1 = get_range(expr->operands[1]);
293       return combine_range(r0, r1, expr->operation == ir_binop_min);
294    }
295 
296    ir_constant *c = rval->as_constant();
297    if (c) {
298       return minmax_range(c, c);
299    }
300 
301    return minmax_range();
302 }
303 
304 /**
305  * Prunes a min/max expression considering the base range of the parent
306  * min/max expression.
307  *
308  * @param baserange the range that the parents of this min/max expression
309  * in the min/max tree will clamp its value to.
310  */
311 ir_rvalue *
prune_expression(ir_expression * expr,minmax_range baserange)312 ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
313 {
314    assert(expr->operation == ir_binop_min ||
315           expr->operation == ir_binop_max);
316 
317    bool ismin = expr->operation == ir_binop_min;
318    minmax_range limits[2];
319 
320    /* Recurse to get the ranges for each of the subtrees of this
321     * expression. We need to do this as a separate step because we need to
322     * know the ranges of each of the subtrees before we prune either one.
323     * Consider something like this:
324     *
325     *        max
326     *     /       \
327     *    max     max
328     *   /   \   /   \
329     *  3    a   b    2
330     *
331     * We would like to prune away the max on the bottom-right, but to do so
332     * we need to know the range of the expression on the left beforehand,
333     * and there's no guarantee that we will visit either subtree in a
334     * particular order.
335     */
336    for (unsigned i = 0; i < 2; ++i)
337       limits[i] = get_range(expr->operands[i]);
338 
339    for (unsigned i = 0; i < 2; ++i) {
340       bool is_redundant = false;
341 
342       enum compare_components_result cr = LESS;
343       if (ismin) {
344          /* If this operand will always be greater than the other one, it's
345           * redundant.
346           */
347          if (limits[i].low && limits[1 - i].high) {
348                cr = compare_components(limits[i].low, limits[1 - i].high);
349             if (cr >= EQUAL && cr != MIXED)
350                is_redundant = true;
351          }
352          /* If this operand is always greater than baserange, then even if
353           * it's smaller than the other one it'll get clamped, so it's
354           * redundant.
355           */
356          if (!is_redundant && limits[i].low && baserange.high) {
357             cr = compare_components(limits[i].low, baserange.high);
358             if (cr > EQUAL && cr != MIXED)
359                is_redundant = true;
360          }
361       } else {
362          /* If this operand will always be lower than the other one, it's
363           * redundant.
364           */
365          if (limits[i].high && limits[1 - i].low) {
366             cr = compare_components(limits[i].high, limits[1 - i].low);
367             if (cr <= EQUAL)
368                is_redundant = true;
369          }
370          /* If this operand is always lower than baserange, then even if
371           * it's greater than the other one it'll get clamped, so it's
372           * redundant.
373           */
374          if (!is_redundant && limits[i].high && baserange.low) {
375             cr = compare_components(limits[i].high, baserange.low);
376             if (cr < EQUAL)
377                is_redundant = true;
378          }
379       }
380 
381       if (is_redundant) {
382          progress = true;
383 
384          /* Recurse if necessary. */
385          ir_expression *op_expr = expr->operands[1 - i]->as_expression();
386          if (op_expr && (op_expr->operation == ir_binop_min ||
387                          op_expr->operation == ir_binop_max)) {
388             return prune_expression(op_expr, baserange);
389          }
390 
391          return expr->operands[1 - i];
392       } else if (cr == MIXED) {
393          /* If we have mixed vector operands, we can try to resolve the minmax
394           * expression by doing a component-wise minmax:
395           *
396           *             min                          min
397           *           /    \                       /    \
398           *         min     a       ===>        [1,1]    a
399           *       /    \
400           *    [1,3]   [3,1]
401           *
402           */
403          ir_constant *a = expr->operands[0]->as_constant();
404          ir_constant *b = expr->operands[1]->as_constant();
405          if (a && b)
406             return combine_constant(ismin, a, b);
407       }
408    }
409 
410    /* Now recurse to operands giving them the proper baserange. The baserange
411     * to pass is the intersection of our baserange and the other operand's
412     * limit with one of the ranges unlimited. If we can't compute a valid
413     * intersection, we use the current baserange.
414     */
415    for (unsigned i = 0; i < 2; ++i) {
416       ir_expression *op_expr = expr->operands[i]->as_expression();
417       if (op_expr && (op_expr->operation == ir_binop_min ||
418                       op_expr->operation == ir_binop_max)) {
419          /* We can only compute a new baserange for this operand if we managed
420           * to compute a valid range for the other operand.
421           */
422          if (ismin)
423             limits[1 - i].low = NULL;
424          else
425             limits[1 - i].high = NULL;
426          minmax_range base = range_intersection(limits[1 - i], baserange);
427          expr->operands[i] = prune_expression(op_expr, base);
428       }
429    }
430 
431    /* If we got here we could not discard any of the operands of the minmax
432     * expression, but we can still try to resolve the expression if both
433     * operands are constant. We do this after the loop above, to make sure
434     * that if our operands are minmax expressions we have tried to prune them
435     * first (hopefully reducing them to constants).
436     */
437    ir_constant *a = expr->operands[0]->as_constant();
438    ir_constant *b = expr->operands[1]->as_constant();
439    if (a && b)
440       return combine_constant(ismin, a, b);
441 
442    return expr;
443 }
444 
445 static ir_rvalue *
swizzle_if_required(ir_expression * expr,ir_rvalue * rval)446 swizzle_if_required(ir_expression *expr, ir_rvalue *rval)
447 {
448    if (expr->type->is_vector() && rval->type->is_scalar()) {
449       return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements);
450    } else {
451       return rval;
452    }
453 }
454 
455 void
handle_rvalue(ir_rvalue ** rvalue)456 ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue)
457 {
458    if (!*rvalue)
459       return;
460 
461    ir_expression *expr = (*rvalue)->as_expression();
462    if (!expr || (expr->operation != ir_binop_min &&
463                  expr->operation != ir_binop_max))
464       return;
465 
466    ir_rvalue *new_rvalue = prune_expression(expr, minmax_range());
467    if (new_rvalue == *rvalue)
468       return;
469 
470    /* If the expression type is a vector and the optimization leaves a scalar
471     * as the result, we need to turn it into a vector.
472     */
473    *rvalue = swizzle_if_required(expr, new_rvalue);
474 
475    progress = true;
476 }
477 
478 }
479 
480 bool
do_minmax_prune(exec_list * instructions)481 do_minmax_prune(exec_list *instructions)
482 {
483    ir_minmax_visitor v;
484 
485    visit_list_elements(&v, instructions);
486 
487    return v.progress;
488 }
489