1 /*
2  * Copyright © 2010 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /**
25  * \file opt_algebraic.cpp
26  *
27  * Takes advantage of association, commutivity, and other algebraic
28  * properties to simplify expressions.
29  */
30 
31 #include "ir.h"
32 #include "ir_visitor.h"
33 #include "ir_rvalue_visitor.h"
34 #include "ir_optimization.h"
35 #include "ir_builder.h"
36 #include "compiler/glsl_types.h"
37 
38 using namespace ir_builder;
39 
40 namespace {
41 
42 /**
43  * Visitor class for replacing expressions with ir_constant values.
44  */
45 
46 class ir_algebraic_visitor : public ir_rvalue_visitor {
47 public:
ir_algebraic_visitor(bool native_integers,const struct gl_shader_compiler_options * options)48    ir_algebraic_visitor(bool native_integers,
49                         const struct gl_shader_compiler_options *options)
50       : options(options)
51    {
52       this->progress = false;
53       this->mem_ctx = NULL;
54       this->native_integers = native_integers;
55    }
56 
~ir_algebraic_visitor()57    virtual ~ir_algebraic_visitor()
58    {
59    }
60 
61    virtual ir_visitor_status visit_enter(ir_assignment *ir);
62 
63    ir_rvalue *handle_expression(ir_expression *ir);
64    void handle_rvalue(ir_rvalue **rvalue);
65    bool reassociate_constant(ir_expression *ir1,
66 			     int const_index,
67 			     ir_constant *constant,
68 			     ir_expression *ir2);
69    void reassociate_operands(ir_expression *ir1,
70 			     int op1,
71 			     ir_expression *ir2,
72 			     int op2);
73    ir_rvalue *swizzle_if_required(ir_expression *expr,
74 				  ir_rvalue *operand);
75 
76    const struct gl_shader_compiler_options *options;
77    void *mem_ctx;
78 
79    bool native_integers;
80    bool progress;
81 };
82 
83 } /* unnamed namespace */
84 
85 ir_visitor_status
visit_enter(ir_assignment * ir)86 ir_algebraic_visitor::visit_enter(ir_assignment *ir)
87 {
88    ir_variable *var = ir->lhs->variable_referenced();
89    if (var->data.invariant || var->data.precise) {
90       /* If we're assigning to an invariant or precise variable, just bail.
91        * Most of the algebraic optimizations aren't precision-safe.
92        *
93        * FINISHME: Find out which optimizations are precision-safe and enable
94        * then only for invariant or precise trees.
95        */
96       return visit_continue_with_parent;
97    } else {
98       return visit_continue;
99    }
100 }
101 
102 static inline bool
is_vec_zero(ir_constant * ir)103 is_vec_zero(ir_constant *ir)
104 {
105    return (ir == NULL) ? false : ir->is_zero();
106 }
107 
108 static inline bool
is_vec_one(ir_constant * ir)109 is_vec_one(ir_constant *ir)
110 {
111    return (ir == NULL) ? false : ir->is_one();
112 }
113 
114 static inline bool
is_vec_two(ir_constant * ir)115 is_vec_two(ir_constant *ir)
116 {
117    return (ir == NULL) ? false : ir->is_value(2.0, 2);
118 }
119 
120 static inline bool
is_vec_four(ir_constant * ir)121 is_vec_four(ir_constant *ir)
122 {
123    return (ir == NULL) ? false : ir->is_value(4.0, 4);
124 }
125 
126 static inline bool
is_vec_negative_one(ir_constant * ir)127 is_vec_negative_one(ir_constant *ir)
128 {
129    return (ir == NULL) ? false : ir->is_negative_one();
130 }
131 
132 static inline bool
is_valid_vec_const(ir_constant * ir)133 is_valid_vec_const(ir_constant *ir)
134 {
135    if (ir == NULL)
136       return false;
137 
138    if (!ir->type->is_scalar() && !ir->type->is_vector())
139       return false;
140 
141    return true;
142 }
143 
144 static inline bool
is_less_than_one(ir_constant * ir)145 is_less_than_one(ir_constant *ir)
146 {
147    assert(ir->type->is_float());
148 
149    if (!is_valid_vec_const(ir))
150       return false;
151 
152    unsigned component = 0;
153    for (int c = 0; c < ir->type->vector_elements; c++) {
154       if (ir->get_float_component(c) < 1.0f)
155          component++;
156    }
157 
158    return (component == ir->type->vector_elements);
159 }
160 
161 static inline bool
is_greater_than_zero(ir_constant * ir)162 is_greater_than_zero(ir_constant *ir)
163 {
164    assert(ir->type->is_float());
165 
166    if (!is_valid_vec_const(ir))
167       return false;
168 
169    unsigned component = 0;
170    for (int c = 0; c < ir->type->vector_elements; c++) {
171       if (ir->get_float_component(c) > 0.0f)
172          component++;
173    }
174 
175    return (component == ir->type->vector_elements);
176 }
177 
178 static void
update_type(ir_expression * ir)179 update_type(ir_expression *ir)
180 {
181    if (ir->operands[0]->type->is_vector())
182       ir->type = ir->operands[0]->type;
183    else
184       ir->type = ir->operands[1]->type;
185 }
186 
187 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
188 static ir_expression *
try_replace_with_dot(ir_expression * expr0,ir_expression * expr1,void * mem_ctx)189 try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
190 {
191    if (expr0 && expr0->operation == ir_binop_add &&
192        expr0->type->is_float() &&
193        expr1 && expr1->operation == ir_binop_add &&
194        expr1->type->is_float()) {
195       ir_swizzle *x = expr0->operands[0]->as_swizzle();
196       ir_swizzle *y = expr0->operands[1]->as_swizzle();
197       ir_swizzle *z = expr1->operands[0]->as_swizzle();
198       ir_swizzle *w = expr1->operands[1]->as_swizzle();
199 
200       if (!x || x->mask.num_components != 1 ||
201           !y || y->mask.num_components != 1 ||
202           !z || z->mask.num_components != 1 ||
203           !w || w->mask.num_components != 1) {
204          return NULL;
205       }
206 
207       bool swiz_seen[4] = {false, false, false, false};
208       swiz_seen[x->mask.x] = true;
209       swiz_seen[y->mask.x] = true;
210       swiz_seen[z->mask.x] = true;
211       swiz_seen[w->mask.x] = true;
212 
213       if (!swiz_seen[0] || !swiz_seen[1] ||
214           !swiz_seen[2] || !swiz_seen[3]) {
215          return NULL;
216       }
217 
218       if (x->val->equals(y->val) &&
219           x->val->equals(z->val) &&
220           x->val->equals(w->val)) {
221          return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
222       }
223    }
224    return NULL;
225 }
226 
227 void
reassociate_operands(ir_expression * ir1,int op1,ir_expression * ir2,int op2)228 ir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
229 					   int op1,
230 					   ir_expression *ir2,
231 					   int op2)
232 {
233    ir_rvalue *temp = ir2->operands[op2];
234    ir2->operands[op2] = ir1->operands[op1];
235    ir1->operands[op1] = temp;
236 
237    /* Update the type of ir2.  The type of ir1 won't have changed --
238     * base types matched, and at least one of the operands of the 2
239     * binops is still a vector if any of them were.
240     */
241    update_type(ir2);
242 
243    this->progress = true;
244 }
245 
246 /**
247  * Reassociates a constant down a tree of adds or multiplies.
248  *
249  * Consider (2 * (a * (b * 0.5))).  We want to end up with a * b.
250  */
251 bool
reassociate_constant(ir_expression * ir1,int const_index,ir_constant * constant,ir_expression * ir2)252 ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
253 					   ir_constant *constant,
254 					   ir_expression *ir2)
255 {
256    if (!ir2 || ir1->operation != ir2->operation)
257       return false;
258 
259    /* Don't want to even think about matrices. */
260    if (ir1->operands[0]->type->is_matrix() ||
261        ir1->operands[1]->type->is_matrix() ||
262        ir2->operands[0]->type->is_matrix() ||
263        ir2->operands[1]->type->is_matrix())
264       return false;
265 
266    void *mem_ctx = ralloc_parent(ir2);
267 
268    ir_constant *ir2_const[2];
269    ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx);
270    ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx);
271 
272    if (ir2_const[0] && ir2_const[1])
273       return false;
274 
275    if (ir2_const[0]) {
276       reassociate_operands(ir1, const_index, ir2, 1);
277       return true;
278    } else if (ir2_const[1]) {
279       reassociate_operands(ir1, const_index, ir2, 0);
280       return true;
281    }
282 
283    if (reassociate_constant(ir1, const_index, constant,
284 			    ir2->operands[0]->as_expression())) {
285       update_type(ir2);
286       return true;
287    }
288 
289    if (reassociate_constant(ir1, const_index, constant,
290 			    ir2->operands[1]->as_expression())) {
291       update_type(ir2);
292       return true;
293    }
294 
295    return false;
296 }
297 
298 /* When eliminating an expression and just returning one of its operands,
299  * we may need to swizzle that operand out to a vector if the expression was
300  * vector type.
301  */
302 ir_rvalue *
swizzle_if_required(ir_expression * expr,ir_rvalue * operand)303 ir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
304 					  ir_rvalue *operand)
305 {
306    if (expr->type->is_vector() && operand->type->is_scalar()) {
307       return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
308 				     expr->type->vector_elements);
309    } else
310       return operand;
311 }
312 
313 ir_rvalue *
handle_expression(ir_expression * ir)314 ir_algebraic_visitor::handle_expression(ir_expression *ir)
315 {
316    ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
317    ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
318 
319    if (ir->operation == ir_binop_mul &&
320        ir->operands[0]->type->is_matrix() &&
321        ir->operands[1]->type->is_vector()) {
322       ir_expression *matrix_mul = ir->operands[0]->as_expression();
323 
324       if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
325          matrix_mul->operands[0]->type->is_matrix() &&
326          matrix_mul->operands[1]->type->is_matrix()) {
327 
328          return mul(matrix_mul->operands[0],
329                     mul(matrix_mul->operands[1], ir->operands[1]));
330       }
331    }
332 
333    assert(ir->num_operands <= 4);
334    for (unsigned i = 0; i < ir->num_operands; i++) {
335       if (ir->operands[i]->type->is_matrix())
336 	 return ir;
337 
338       op_const[i] =
339          ir->operands[i]->constant_expression_value(ralloc_parent(ir));
340       op_expr[i] = ir->operands[i]->as_expression();
341    }
342 
343    if (this->mem_ctx == NULL)
344       this->mem_ctx = ralloc_parent(ir);
345 
346    switch (ir->operation) {
347    case ir_unop_bit_not:
348       if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
349          return op_expr[0]->operands[0];
350       break;
351 
352    case ir_unop_abs:
353       if (op_expr[0] == NULL)
354 	 break;
355 
356       switch (op_expr[0]->operation) {
357       case ir_unop_abs:
358       case ir_unop_neg:
359          return abs(op_expr[0]->operands[0]);
360       default:
361          break;
362       }
363       break;
364 
365    case ir_unop_neg:
366       if (op_expr[0] == NULL)
367 	 break;
368 
369       if (op_expr[0]->operation == ir_unop_neg) {
370          return op_expr[0]->operands[0];
371       }
372       break;
373 
374    case ir_unop_exp:
375       if (op_expr[0] == NULL)
376 	 break;
377 
378       if (op_expr[0]->operation == ir_unop_log) {
379          return op_expr[0]->operands[0];
380       }
381       break;
382 
383    case ir_unop_log:
384       if (op_expr[0] == NULL)
385 	 break;
386 
387       if (op_expr[0]->operation == ir_unop_exp) {
388          return op_expr[0]->operands[0];
389       }
390       break;
391 
392    case ir_unop_exp2:
393       if (op_expr[0] == NULL)
394 	 break;
395 
396       if (op_expr[0]->operation == ir_unop_log2) {
397          return op_expr[0]->operands[0];
398       }
399 
400       if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
401          for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
402             ir_expression *log2_expr =
403                op_expr[0]->operands[log2_pos]->as_expression();
404 
405             if (log2_expr && log2_expr->operation == ir_unop_log2) {
406                return new(mem_ctx) ir_expression(ir_binop_pow,
407                                                  ir->type,
408                                                  log2_expr->operands[0],
409                                                  op_expr[0]->operands[1 - log2_pos]);
410             }
411          }
412       }
413       break;
414 
415    case ir_unop_log2:
416       if (op_expr[0] == NULL)
417 	 break;
418 
419       if (op_expr[0]->operation == ir_unop_exp2) {
420          return op_expr[0]->operands[0];
421       }
422       break;
423 
424    case ir_unop_f2i:
425    case ir_unop_f2u:
426       if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
427          return new(mem_ctx) ir_expression(ir->operation,
428                                            ir->type,
429                                            op_expr[0]->operands[0]);
430       }
431       break;
432 
433    case ir_unop_logic_not: {
434       enum ir_expression_operation new_op = ir_unop_logic_not;
435 
436       if (op_expr[0] == NULL)
437 	 break;
438 
439       switch (op_expr[0]->operation) {
440       case ir_binop_less:    new_op = ir_binop_gequal;  break;
441       case ir_binop_gequal:  new_op = ir_binop_less;    break;
442       case ir_binop_equal:   new_op = ir_binop_nequal;  break;
443       case ir_binop_nequal:  new_op = ir_binop_equal;   break;
444       case ir_binop_all_equal:   new_op = ir_binop_any_nequal;  break;
445       case ir_binop_any_nequal:  new_op = ir_binop_all_equal;   break;
446 
447       default:
448 	 /* The default case handler is here to silence a warning from GCC.
449 	  */
450 	 break;
451       }
452 
453       if (new_op != ir_unop_logic_not) {
454 	 return new(mem_ctx) ir_expression(new_op,
455 					   ir->type,
456 					   op_expr[0]->operands[0],
457 					   op_expr[0]->operands[1]);
458       }
459 
460       break;
461    }
462 
463    case ir_unop_saturate:
464       if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
465          ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
466          ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
467 
468          if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
469              b2f_1 && b2f_1->operation == ir_unop_b2f) {
470             return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
471          }
472       }
473       break;
474 
475       /* This macro CANNOT use the do { } while(true) mechanism because
476        * then the breaks apply to the loop instead of the switch!
477        */
478 #define HANDLE_PACK_UNPACK_INVERSE(inverse_operation)                   \
479       {                                                                 \
480          ir_expression *const op = ir->operands[0]->as_expression();    \
481          if (op == NULL)                                                \
482             break;                                                      \
483          if (op->operation == (inverse_operation))                      \
484             return op->operands[0];                                     \
485          break;                                                         \
486       }
487 
488    case ir_unop_unpack_uint_2x32:
489       HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32);
490    case ir_unop_pack_uint_2x32:
491       HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32);
492    case ir_unop_unpack_int_2x32:
493       HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32);
494    case ir_unop_pack_int_2x32:
495       HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32);
496    case ir_unop_unpack_double_2x32:
497       HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32);
498    case ir_unop_pack_double_2x32:
499       HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32);
500 
501 #undef HANDLE_PACK_UNPACK_INVERSE
502 
503    case ir_binop_add:
504       if (is_vec_zero(op_const[0]))
505 	 return ir->operands[1];
506       if (is_vec_zero(op_const[1]))
507 	 return ir->operands[0];
508 
509       /* Reassociate addition of constants so that we can do constant
510        * folding.
511        */
512       if (op_const[0] && !op_const[1])
513 	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
514       if (op_const[1] && !op_const[0])
515 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
516 
517       /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
518       if (options->OptimizeForAOS) {
519          ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
520                                                     mem_ctx);
521          if (expr)
522             return expr;
523       }
524 
525       /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
526        *
527        * (-x + y) * a + x
528        * (x * -a) + (y * a) + x
529        * x + (x * -a) + (y * a)
530        * x * (1 - a) + y * a
531        * lrp(x, y, a)
532        */
533       for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
534          ir_expression *mul = op_expr[mul_pos];
535 
536          if (!mul || mul->operation != ir_binop_mul)
537             continue;
538 
539          /* Multiply found on one of the operands. Now check for an
540           * inner addition operation.
541           */
542          for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
543             ir_expression *inner_add =
544                mul->operands[inner_add_pos]->as_expression();
545 
546             if (!inner_add || inner_add->operation != ir_binop_add)
547                continue;
548 
549             /* Inner addition found on one of the operands. Now check for
550              * one of the operands of the inner addition to be the negative
551              * of x_operand.
552              */
553             for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
554                ir_expression *neg =
555                   inner_add->operands[neg_pos]->as_expression();
556 
557                if (!neg || neg->operation != ir_unop_neg)
558                   continue;
559 
560                ir_rvalue *x_operand = ir->operands[1 - mul_pos];
561 
562                if (!neg->operands[0]->equals(x_operand))
563                   continue;
564 
565                ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
566                ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
567 
568                if (x_operand->type != y_operand->type ||
569                    x_operand->type != a_operand->type)
570                   continue;
571 
572                return lrp(x_operand, y_operand, a_operand);
573             }
574          }
575       }
576 
577       break;
578 
579    case ir_binop_sub:
580       if (is_vec_zero(op_const[0]))
581 	 return neg(ir->operands[1]);
582       if (is_vec_zero(op_const[1]))
583 	 return ir->operands[0];
584       break;
585 
586    case ir_binop_mul:
587       if (is_vec_one(op_const[0]))
588 	 return ir->operands[1];
589       if (is_vec_one(op_const[1]))
590 	 return ir->operands[0];
591 
592       if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
593 	 return ir_constant::zero(ir, ir->type);
594 
595       if (is_vec_negative_one(op_const[0]))
596          return neg(ir->operands[1]);
597       if (is_vec_negative_one(op_const[1]))
598          return neg(ir->operands[0]);
599 
600       if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
601           op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
602          return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
603       }
604 
605       /* Reassociate multiplication of constants so that we can do
606        * constant folding.
607        */
608       if (op_const[0] && !op_const[1])
609 	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
610       if (op_const[1] && !op_const[0])
611 	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
612 
613       /* Optimizes
614        *
615        *    (mul (floor (add (abs x) 0.5) (sign x)))
616        *
617        * into
618        *
619        *    (trunc (add x (mul (sign x) 0.5)))
620        */
621       for (int i = 0; i < 2; i++) {
622          ir_expression *sign_expr = ir->operands[i]->as_expression();
623          ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
624 
625          if (!sign_expr || sign_expr->operation != ir_unop_sign ||
626              !floor_expr || floor_expr->operation != ir_unop_floor)
627             continue;
628 
629          ir_expression *add_expr = floor_expr->operands[0]->as_expression();
630          if (!add_expr || add_expr->operation != ir_binop_add)
631             continue;
632 
633          for (int j = 0; j < 2; j++) {
634             ir_expression *abs_expr = add_expr->operands[j]->as_expression();
635             if (!abs_expr || abs_expr->operation != ir_unop_abs)
636                continue;
637 
638             ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
639             if (!point_five || !point_five->is_value(0.5, 0))
640                continue;
641 
642             if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
643                return trunc(add(abs_expr->operands[0],
644                                 mul(sign_expr, point_five)));
645             }
646          }
647       }
648       break;
649 
650    case ir_binop_div:
651       if (is_vec_one(op_const[0]) && (
652                 ir->type->is_float() || ir->type->is_double())) {
653 	 return new(mem_ctx) ir_expression(ir_unop_rcp,
654 					   ir->operands[1]->type,
655 					   ir->operands[1],
656 					   NULL);
657       }
658       if (is_vec_one(op_const[1]))
659 	 return ir->operands[0];
660       break;
661 
662    case ir_binop_dot:
663       if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
664 	 return ir_constant::zero(mem_ctx, ir->type);
665 
666       for (int i = 0; i < 2; i++) {
667          if (!op_const[i])
668             continue;
669 
670          unsigned components[4] = { 0 }, count = 0;
671 
672          for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
673             if (op_const[i]->is_zero())
674                continue;
675 
676             components[count] = c;
677             count++;
678          }
679 
680          /* No channels had zero values; bail. */
681          if (count >= op_const[i]->type->vector_elements)
682             break;
683 
684          ir_expression_operation op = count == 1 ?
685             ir_binop_mul : ir_binop_dot;
686 
687          /* Swizzle both operands to remove the channels that were zero. */
688          return new(mem_ctx)
689             ir_expression(op, ir->type,
690                           new(mem_ctx) ir_swizzle(ir->operands[0],
691                                                   components, count),
692                           new(mem_ctx) ir_swizzle(ir->operands[1],
693                                                   components, count));
694       }
695       break;
696 
697    case ir_binop_less:
698    case ir_binop_gequal:
699    case ir_binop_equal:
700    case ir_binop_nequal:
701       for (int add_pos = 0; add_pos < 2; add_pos++) {
702          ir_expression *add = op_expr[add_pos];
703 
704          if (!add || add->operation != ir_binop_add)
705             continue;
706 
707          ir_constant *zero = op_const[1 - add_pos];
708          if (!is_vec_zero(zero))
709             continue;
710 
711          /* Depending of the zero position we want to optimize
712           * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
713           */
714          if (add_pos == 1) {
715             return new(mem_ctx) ir_expression(ir->operation,
716                                               neg(add->operands[0]),
717                                               add->operands[1]);
718          } else {
719             return new(mem_ctx) ir_expression(ir->operation,
720                                               add->operands[0],
721                                               neg(add->operands[1]));
722          }
723       }
724       break;
725 
726    case ir_binop_all_equal:
727    case ir_binop_any_nequal:
728       if (ir->operands[0]->type->is_scalar() &&
729           ir->operands[1]->type->is_scalar())
730          return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
731                                            ? ir_binop_equal : ir_binop_nequal,
732                                            ir->operands[0],
733                                            ir->operands[1]);
734       break;
735 
736    case ir_binop_rshift:
737    case ir_binop_lshift:
738       /* 0 >> x == 0 */
739       if (is_vec_zero(op_const[0]))
740          return ir->operands[0];
741       /* x >> 0 == x */
742       if (is_vec_zero(op_const[1]))
743          return ir->operands[0];
744       break;
745 
746    case ir_binop_logic_and:
747       if (is_vec_one(op_const[0])) {
748 	 return ir->operands[1];
749       } else if (is_vec_one(op_const[1])) {
750 	 return ir->operands[0];
751       } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
752 	 return ir_constant::zero(mem_ctx, ir->type);
753       } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
754                  op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
755          /* De Morgan's Law:
756           *    (not A) and (not B) === not (A or B)
757           */
758          return logic_not(logic_or(op_expr[0]->operands[0],
759                                    op_expr[1]->operands[0]));
760       } else if (ir->operands[0]->equals(ir->operands[1])) {
761          /* (a && a) == a */
762          return ir->operands[0];
763       }
764       break;
765 
766    case ir_binop_logic_xor:
767       if (is_vec_zero(op_const[0])) {
768 	 return ir->operands[1];
769       } else if (is_vec_zero(op_const[1])) {
770 	 return ir->operands[0];
771       } else if (is_vec_one(op_const[0])) {
772 	 return logic_not(ir->operands[1]);
773       } else if (is_vec_one(op_const[1])) {
774 	 return logic_not(ir->operands[0]);
775       } else if (ir->operands[0]->equals(ir->operands[1])) {
776          /* (a ^^ a) == false */
777 	 return ir_constant::zero(mem_ctx, ir->type);
778       }
779       break;
780 
781    case ir_binop_logic_or:
782       if (is_vec_zero(op_const[0])) {
783 	 return ir->operands[1];
784       } else if (is_vec_zero(op_const[1])) {
785 	 return ir->operands[0];
786       } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
787 	 ir_constant_data data;
788 
789 	 for (unsigned i = 0; i < 16; i++)
790 	    data.b[i] = true;
791 
792 	 return new(mem_ctx) ir_constant(ir->type, &data);
793       } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
794                  op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
795          /* De Morgan's Law:
796           *    (not A) or (not B) === not (A and B)
797           */
798          return logic_not(logic_and(op_expr[0]->operands[0],
799                                     op_expr[1]->operands[0]));
800       } else if (ir->operands[0]->equals(ir->operands[1])) {
801          /* (a || a) == a */
802          return ir->operands[0];
803       }
804       break;
805 
806    case ir_binop_pow:
807       /* 1^x == 1 */
808       if (is_vec_one(op_const[0]))
809          return op_const[0];
810 
811       /* x^1 == x */
812       if (is_vec_one(op_const[1]))
813          return ir->operands[0];
814 
815       /* pow(2,x) == exp2(x) */
816       if (is_vec_two(op_const[0]))
817          return expr(ir_unop_exp2, ir->operands[1]);
818 
819       if (is_vec_two(op_const[1])) {
820          ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
821                                               ir_var_temporary);
822          base_ir->insert_before(x);
823          base_ir->insert_before(assign(x, ir->operands[0]));
824          return mul(x, x);
825       }
826 
827       if (is_vec_four(op_const[1])) {
828          ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
829                                               ir_var_temporary);
830          base_ir->insert_before(x);
831          base_ir->insert_before(assign(x, ir->operands[0]));
832 
833          ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
834                                                     "squared",
835                                                     ir_var_temporary);
836          base_ir->insert_before(squared);
837          base_ir->insert_before(assign(squared, mul(x, x)));
838          return mul(squared, squared);
839       }
840 
841       break;
842 
843    case ir_binop_min:
844    case ir_binop_max:
845       if (!ir->type->is_float() || options->EmitNoSat)
846          break;
847 
848       /* Replace min(max) operations and its commutative combinations with
849        * a saturate operation
850        */
851       for (int op = 0; op < 2; op++) {
852          ir_expression *inner_expr = op_expr[op];
853          ir_constant *outer_const = op_const[1 - op];
854          ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
855             ir_binop_min : ir_binop_max;
856 
857          if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
858             continue;
859 
860          /* One of these has to be a constant */
861          if (!inner_expr->operands[0]->as_constant() &&
862              !inner_expr->operands[1]->as_constant())
863             break;
864 
865          /* Found a min(max) combination. Now try to see if its operands
866           * meet our conditions that we can do just a single saturate operation
867           */
868          for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
869             ir_rvalue *x = inner_expr->operands[minmax_op];
870             ir_rvalue *y = inner_expr->operands[1 - minmax_op];
871 
872             ir_constant *inner_const = y->as_constant();
873             if (!inner_const)
874                continue;
875 
876             /* min(max(x, 0.0), 1.0) is sat(x) */
877             if (ir->operation == ir_binop_min &&
878                 inner_const->is_zero() &&
879                 outer_const->is_one())
880                return saturate(x);
881 
882             /* max(min(x, 1.0), 0.0) is sat(x) */
883             if (ir->operation == ir_binop_max &&
884                 inner_const->is_one() &&
885                 outer_const->is_zero())
886                return saturate(x);
887 
888             /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
889             if (ir->operation == ir_binop_min &&
890                 inner_const->is_zero() &&
891                 is_less_than_one(outer_const))
892                return saturate(expr(ir_binop_min, x, outer_const));
893 
894             /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
895             if (ir->operation == ir_binop_max &&
896                 is_less_than_one(inner_const) &&
897                 outer_const->is_zero())
898                return saturate(expr(ir_binop_min, x, inner_const));
899 
900             /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
901             if (ir->operation == ir_binop_max &&
902                 inner_const->is_one() &&
903                 is_greater_than_zero(outer_const))
904                return saturate(expr(ir_binop_max, x, outer_const));
905 
906             /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
907             if (ir->operation == ir_binop_min &&
908                 is_greater_than_zero(inner_const) &&
909                 outer_const->is_one())
910                return saturate(expr(ir_binop_max, x, inner_const));
911          }
912       }
913 
914       break;
915 
916    case ir_unop_rcp:
917       if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
918 	 return op_expr[0]->operands[0];
919 
920       if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
921                          op_expr[0]->operation == ir_unop_exp)) {
922          return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
923                                            neg(op_expr[0]->operands[0]));
924       }
925 
926       /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
927        * its IR level, so we can always apply this transformation.
928        */
929       if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
930          return sqrt(op_expr[0]->operands[0]);
931 
932       /* As far as we know, all backends are OK with rsq. */
933       if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
934 	 return rsq(op_expr[0]->operands[0]);
935       }
936 
937       break;
938 
939    case ir_triop_fma:
940       /* Operands are op0 * op1 + op2. */
941       if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
942          return ir->operands[2];
943       } else if (is_vec_zero(op_const[2])) {
944          return mul(ir->operands[0], ir->operands[1]);
945       } else if (is_vec_one(op_const[0])) {
946          return add(ir->operands[1], ir->operands[2]);
947       } else if (is_vec_one(op_const[1])) {
948          return add(ir->operands[0], ir->operands[2]);
949       }
950       break;
951 
952    case ir_triop_lrp:
953       /* Operands are (x, y, a). */
954       if (is_vec_zero(op_const[2])) {
955          return ir->operands[0];
956       } else if (is_vec_one(op_const[2])) {
957          return ir->operands[1];
958       } else if (ir->operands[0]->equals(ir->operands[1])) {
959          return ir->operands[0];
960       } else if (is_vec_zero(op_const[0])) {
961          return mul(ir->operands[1], ir->operands[2]);
962       } else if (is_vec_zero(op_const[1])) {
963          unsigned op2_components = ir->operands[2]->type->vector_elements;
964          ir_constant *one;
965 
966          switch (ir->type->base_type) {
967          case GLSL_TYPE_FLOAT:
968             one = new(mem_ctx) ir_constant(1.0f, op2_components);
969             break;
970          case GLSL_TYPE_DOUBLE:
971             one = new(mem_ctx) ir_constant(1.0, op2_components);
972             break;
973          default:
974             one = NULL;
975             unreachable("unexpected type");
976          }
977 
978          return mul(ir->operands[0], add(one, neg(ir->operands[2])));
979       }
980       break;
981 
982    case ir_triop_csel:
983       if (is_vec_one(op_const[0]))
984 	 return ir->operands[1];
985       if (is_vec_zero(op_const[0]))
986 	 return ir->operands[2];
987       break;
988 
989    /* Remove interpolateAt* instructions for demoted inputs. They are
990     * assigned a constant expression to facilitate this.
991     */
992    case ir_unop_interpolate_at_centroid:
993    case ir_binop_interpolate_at_offset:
994    case ir_binop_interpolate_at_sample:
995       if (op_const[0])
996          return ir->operands[0];
997       break;
998 
999    default:
1000       break;
1001    }
1002 
1003    return ir;
1004 }
1005 
1006 void
handle_rvalue(ir_rvalue ** rvalue)1007 ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
1008 {
1009    if (!*rvalue)
1010       return;
1011 
1012    ir_expression *expr = (*rvalue)->as_expression();
1013    if (!expr || expr->operation == ir_quadop_vector)
1014       return;
1015 
1016    ir_rvalue *new_rvalue = handle_expression(expr);
1017    if (new_rvalue == *rvalue)
1018       return;
1019 
1020    /* If the expr used to be some vec OP scalar returning a vector, and the
1021     * optimization gave us back a scalar, we still need to turn it into a
1022     * vector.
1023     */
1024    *rvalue = swizzle_if_required(expr, new_rvalue);
1025 
1026    this->progress = true;
1027 }
1028 
1029 bool
do_algebraic(exec_list * instructions,bool native_integers,const struct gl_shader_compiler_options * options)1030 do_algebraic(exec_list *instructions, bool native_integers,
1031              const struct gl_shader_compiler_options *options)
1032 {
1033    ir_algebraic_visitor v(native_integers, options);
1034 
1035    visit_list_elements(&v, instructions);
1036 
1037    return v.progress;
1038 }
1039