1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir.h"
25 
26 /**
27  * \file nir_opt_move_comparisons.c
28  *
29  * This pass moves ALU comparison operations just before their first use.
30  *
31  * It only moves instructions within a single basic block; cross-block
32  * movement is left to global code motion.
33  *
34  * Many GPUs generate condition codes for comparisons, and use predication
35  * for conditional selects and control flow.  In a sequence such as:
36  *
37  *     vec1 32 ssa_1 = flt a b
38  *     <some other operations>
39  *     vec1 32 ssa_2 = bcsel ssa_1 c d
40  *
41  * the backend would likely do the comparison, producing condition codes,
42  * then save those to a boolean value.  The intervening operations might
43  * trash the condition codes.  Then, in order to do the bcsel, it would
44  * need to re-populate the condition code register based on the boolean.
45  *
46  * By moving the comparison just before the bcsel, the condition codes could
47  * be used directly.  This eliminates the need to reload them from the boolean
48  * (generally eliminating an instruction).  It may also eliminate the need to
49  * create a boolean value altogether (unless it's used elsewhere), which could
50  * lower register pressure.
51  */
52 
53 static bool
is_comparison(nir_op op)54 is_comparison(nir_op op)
55 {
56    switch (op) {
57    case nir_op_flt:
58    case nir_op_fge:
59    case nir_op_feq:
60    case nir_op_fne:
61    case nir_op_ilt:
62    case nir_op_ult:
63    case nir_op_ige:
64    case nir_op_uge:
65    case nir_op_ieq:
66    case nir_op_ine:
67    case nir_op_i2b:
68    case nir_op_f2b:
69    case nir_op_inot:
70    case nir_op_fnot:
71       return true;
72    default:
73       return false;
74    }
75 }
76 
77 static bool
move_comparison_source(nir_src * src,nir_block * block,nir_instr * before)78 move_comparison_source(nir_src *src, nir_block *block, nir_instr *before)
79 {
80    if (!src->is_ssa)
81       return false;
82 
83    nir_instr *src_instr = src->ssa->parent_instr;
84 
85    if (src_instr->block == block &&
86        src_instr->type == nir_instr_type_alu &&
87        is_comparison(nir_instr_as_alu(src_instr)->op)) {
88 
89       exec_node_remove(&src_instr->node);
90 
91       if (before)
92          exec_node_insert_node_before(&before->node, &src_instr->node);
93       else
94          exec_list_push_tail(&block->instr_list, &src_instr->node);
95 
96       return true;
97    }
98 
99    return false;
100 }
101 
102 static bool
move_comparison_source_cb(nir_src * src,void * data)103 move_comparison_source_cb(nir_src *src, void *data)
104 {
105    bool *progress = data;
106 
107    nir_instr *instr = src->parent_instr;
108    if (move_comparison_source(src, instr->block, instr))
109       *progress = true;
110 
111    return true; /* nir_foreach_src should keep going */
112 }
113 
114 static bool
move_comparisons(nir_block * block)115 move_comparisons(nir_block *block)
116 {
117    bool progress = false;
118 
119    /* We use a simple approach: walk instructions backwards.
120     *
121     * If the instruction's source is a comparison from the same block,
122     * simply move it here.  This may break SSA if it's used earlier in
123     * the block as well.  However, as we walk backwards, we'll find the
124     * earlier use and move it again, further up.  It eventually ends up
125     * dominating all uses again, restoring SSA form.
126     *
127     * Before walking instructions, we consider the if-condition at the
128     * end of the block, if one exists.  It's effectively a use at the
129     * bottom of the block.
130     */
131    nir_if *iff = nir_block_get_following_if(block);
132    if (iff) {
133       progress |= move_comparison_source(&iff->condition, block, NULL);
134    }
135 
136    nir_foreach_instr_reverse(instr, block) {
137       /* The sources of phi instructions happen after the predecessor block
138        * but before this block.  (Yes, that's between blocks).  This means
139        * that we don't need to move them in order for them to be correct.
140        * We could move them to encourage comparisons that are used in a phi to
141        * the end of the block, doing so correctly would make the pass
142        * substantially more complicated and wouldn't gain us anything since
143        * the phi can't use a flag value anyway.
144        */
145       if (instr->type == nir_instr_type_phi) {
146          /* We're going backwards so everything else is a phi too */
147          break;
148       } else if (instr->type == nir_instr_type_alu) {
149          /* Walk ALU instruction sources backwards so that bcsel's boolean
150           * condition is processed last.
151           */
152          nir_alu_instr *alu = nir_instr_as_alu(instr);
153          for (int i = nir_op_infos[alu->op].num_inputs - 1; i >= 0; i--) {
154             progress |= move_comparison_source(&alu->src[i].src,
155                                                block, instr);
156          }
157       } else {
158          nir_foreach_src(instr, move_comparison_source_cb, &progress);
159       }
160    }
161 
162    return progress;
163 }
164 
165 bool
nir_opt_move_comparisons(nir_shader * shader)166 nir_opt_move_comparisons(nir_shader *shader)
167 {
168    bool progress = false;
169 
170    nir_foreach_function(func, shader) {
171       if (!func->impl)
172          continue;
173 
174       nir_foreach_block(block, func->impl) {
175          if (move_comparisons(block)) {
176             nir_metadata_preserve(func->impl, nir_metadata_block_index |
177                                               nir_metadata_dominance |
178                                               nir_metadata_live_ssa_defs);
179             progress = true;
180          }
181       }
182    }
183 
184    return progress;
185 }
186