1 /*
2  * Copyright © 2014 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "brw_vec4.h"
25 #include "brw_vec4_live_variables.h"
26 #include "brw_cfg.h"
27 
28 /** @file brw_vec4_dead_code_eliminate.cpp
29  *
30  * Dataflow-aware dead code elimination.
31  *
32  * Walks the instruction list from the bottom, removing instructions that
33  * have results that both aren't used in later blocks and haven't been read
34  * yet in the tail end of this block.
35  */
36 
37 using namespace brw;
38 
39 bool
dead_code_eliminate()40 vec4_visitor::dead_code_eliminate()
41 {
42    bool progress = false;
43 
44    const vec4_live_variables &live_vars = live_analysis.require();
45    int num_vars = live_vars.num_vars;
46    BITSET_WORD *live = rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(num_vars));
47    BITSET_WORD *flag_live = rzalloc_array(NULL, BITSET_WORD, 1);
48 
49    foreach_block_reverse_safe(block, cfg) {
50       memcpy(live, live_vars.block_data[block->num].liveout,
51              sizeof(BITSET_WORD) * BITSET_WORDS(num_vars));
52       memcpy(flag_live, live_vars.block_data[block->num].flag_liveout,
53              sizeof(BITSET_WORD));
54 
55       foreach_inst_in_block_reverse_safe(vec4_instruction, inst, block) {
56          if ((inst->dst.file == VGRF && !inst->has_side_effects()) ||
57              (inst->dst.is_null() && inst->writes_flag())){
58             bool result_live[4] = { false };
59             if (inst->dst.file == VGRF) {
60                for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
61                   for (int c = 0; c < 4; c++) {
62                      const unsigned v = var_from_reg(alloc, inst->dst, c, i);
63                      result_live[c] |= BITSET_TEST(live, v);
64                   }
65                }
66             } else {
67                for (unsigned c = 0; c < 4; c++)
68                   result_live[c] = BITSET_TEST(flag_live, c);
69             }
70 
71             /* If the instruction can't do writemasking, then it's all or
72              * nothing.
73              */
74             if (!inst->can_do_writemask(devinfo)) {
75                bool result = result_live[0] | result_live[1] |
76                              result_live[2] | result_live[3];
77                result_live[0] = result;
78                result_live[1] = result;
79                result_live[2] = result;
80                result_live[3] = result;
81             }
82 
83             if (inst->writes_flag()) {
84                /* Independently calculate the usage of the flag components and
85                 * the destination value components.
86                 */
87                uint8_t flag_mask = inst->dst.writemask;
88                uint8_t dest_mask = inst->dst.writemask;
89 
90                for (int c = 0; c < 4; c++) {
91                   if (!result_live[c] && dest_mask & (1 << c))
92                      dest_mask &= ~(1 << c);
93 
94                   if (!BITSET_TEST(flag_live, c))
95                      flag_mask &= ~(1 << c);
96                }
97 
98                if (inst->dst.writemask != (flag_mask | dest_mask)) {
99                   progress = true;
100                   inst->dst.writemask = flag_mask | dest_mask;
101                }
102 
103                /* If none of the destination components are read, replace the
104                 * destination register with the NULL register.
105                 */
106                if (dest_mask == 0) {
107                   progress = true;
108                   inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
109                }
110             } else {
111                for (int c = 0; c < 4; c++) {
112                   if (!result_live[c] && inst->dst.writemask & (1 << c)) {
113                      inst->dst.writemask &= ~(1 << c);
114                      progress = true;
115 
116                      if (inst->dst.writemask == 0) {
117                         if (inst->writes_accumulator) {
118                            inst->dst = dst_reg(retype(brw_null_reg(), inst->dst.type));
119                         } else {
120                            inst->opcode = BRW_OPCODE_NOP;
121                            break;
122                         }
123                      }
124                   }
125                }
126             }
127          }
128 
129          if (inst->dst.is_null() && inst->writes_flag()) {
130             bool combined_live = false;
131             for (unsigned c = 0; c < 4; c++)
132                combined_live |= BITSET_TEST(flag_live, c);
133 
134             if (!combined_live) {
135                inst->opcode = BRW_OPCODE_NOP;
136                progress = true;
137             }
138          }
139 
140          if (inst->dst.file == VGRF && !inst->predicate &&
141              !inst->is_align1_partial_write()) {
142             for (unsigned i = 0; i < DIV_ROUND_UP(inst->size_written, 16); i++) {
143                for (int c = 0; c < 4; c++) {
144                   if (inst->dst.writemask & (1 << c)) {
145                      const unsigned v = var_from_reg(alloc, inst->dst, c, i);
146                      BITSET_CLEAR(live, v);
147                   }
148                }
149             }
150          }
151 
152          if (inst->writes_flag() && !inst->predicate && inst->exec_size == 8) {
153             for (unsigned c = 0; c < 4; c++)
154                BITSET_CLEAR(flag_live, c);
155          }
156 
157          if (inst->opcode == BRW_OPCODE_NOP) {
158             inst->remove(block);
159             continue;
160          }
161 
162          for (int i = 0; i < 3; i++) {
163             if (inst->src[i].file == VGRF) {
164                for (unsigned j = 0; j < DIV_ROUND_UP(inst->size_read(i), 16); j++) {
165                   for (int c = 0; c < 4; c++) {
166                      const unsigned v = var_from_reg(alloc, inst->src[i], c, j);
167                      BITSET_SET(live, v);
168                   }
169                }
170             }
171          }
172 
173          for (unsigned c = 0; c < 4; c++) {
174             if (inst->reads_flag(c)) {
175                BITSET_SET(flag_live, c);
176             }
177          }
178       }
179    }
180 
181    ralloc_free(live);
182    ralloc_free(flag_live);
183 
184    if (progress)
185       invalidate_analysis(DEPENDENCY_INSTRUCTIONS);
186 
187    return progress;
188 }
189