1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2016 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #define MAX_INSTRUCTION (1 << 30)
26 
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "vc4_context.h"
30 #include "vc4_qir.h"
31 
32 struct partial_update_state {
33         struct qinst *insts[4];
34         uint8_t channels;
35 };
36 
37 static uint32_t
int_hash(const void * key)38 int_hash(const void *key)
39 {
40         return _mesa_hash_data(key, sizeof(int));
41 }
42 
43 static bool
int_compare(const void * key1,const void * key2)44 int_compare(const void *key1, const void *key2)
45 {
46         return *(const int *)key1 == *(const int *)key2;
47 }
48 
49 static int
qir_reg_to_var(struct qreg reg)50 qir_reg_to_var(struct qreg reg)
51 {
52         if (reg.file == QFILE_TEMP)
53                 return reg.index;
54 
55         return -1;
56 }
57 
58 static void
qir_setup_use(struct vc4_compile * c,struct qblock * block,int ip,struct qreg src)59 qir_setup_use(struct vc4_compile *c, struct qblock *block, int ip,
60               struct qreg src)
61 {
62         int var = qir_reg_to_var(src);
63         if (var == -1)
64                 return;
65 
66         c->temp_start[var] = MIN2(c->temp_start[var], ip);
67         c->temp_end[var] = MAX2(c->temp_end[var], ip);
68 
69         /* The use[] bitset marks when the block makes
70          * use of a variable without having completely
71          * defined that variable within the block.
72          */
73         if (!BITSET_TEST(block->def, var))
74                 BITSET_SET(block->use, var);
75 }
76 
77 static struct partial_update_state *
get_partial_update_state(struct hash_table * partial_update_ht,struct qinst * inst)78 get_partial_update_state(struct hash_table *partial_update_ht,
79                          struct qinst *inst)
80 {
81         struct hash_entry *entry =
82                 _mesa_hash_table_search(partial_update_ht,
83                                         &inst->dst.index);
84         if (entry)
85                 return entry->data;
86 
87         struct partial_update_state *state =
88                 rzalloc(partial_update_ht, struct partial_update_state);
89 
90         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
91 
92         return state;
93 }
94 
95 static void
qir_setup_def(struct vc4_compile * c,struct qblock * block,int ip,struct hash_table * partial_update_ht,struct qinst * inst)96 qir_setup_def(struct vc4_compile *c, struct qblock *block, int ip,
97               struct hash_table *partial_update_ht, struct qinst *inst)
98 {
99         /* The def[] bitset marks when an initialization in a
100          * block completely screens off previous updates of
101          * that variable.
102          */
103         int var = qir_reg_to_var(inst->dst);
104         if (var == -1)
105                 return;
106 
107         c->temp_start[var] = MIN2(c->temp_start[var], ip);
108         c->temp_end[var] = MAX2(c->temp_end[var], ip);
109 
110         /* If we've already tracked this as a def, or already used it within
111          * the block, there's nothing to do.
112          */
113         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
114                 return;
115 
116         /* Easy, common case: unconditional full register update.
117          *
118          * We treat conditioning on the exec mask as the same as not being
119          * conditional.  This makes sure that if the register gets set on
120          * either side of an if, it is treated as being screened off before
121          * the if.  Otherwise, if there was no intervening def, its live
122          * interval doesn't extend back to the start of he program, and if too
123          * many registers did that we'd fail to register allocate.
124          */
125         if ((inst->cond == QPU_COND_ALWAYS ||
126              inst->cond_is_exec_mask) && !inst->dst.pack) {
127                 BITSET_SET(block->def, var);
128                 return;
129         }
130 
131         /* Finally, look at the condition code and packing and mark it as a
132          * def.  We need to make sure that we understand sequences
133          * instructions like:
134          *
135          *     mov.zs t0, t1
136          *     mov.zc t0, t2
137          *
138          * or:
139          *
140          *     mmov t0.8a, t1
141          *     mmov t0.8b, t2
142          *     mmov t0.8c, t3
143          *     mmov t0.8d, t4
144          *
145          * as defining the temp within the block, because otherwise dst's live
146          * range will get extended up the control flow to the top of the
147          * program.
148          */
149         struct partial_update_state *state =
150                 get_partial_update_state(partial_update_ht, inst);
151         uint8_t mask = qir_channels_written(inst);
152 
153         if (inst->cond == QPU_COND_ALWAYS) {
154                 state->channels |= mask;
155         } else {
156                 for (int i = 0; i < 4; i++) {
157                         if (!(mask & (1 << i)))
158                                 continue;
159 
160                         if (state->insts[i] &&
161                             state->insts[i]->cond ==
162                             qpu_cond_complement(inst->cond))
163                                 state->channels |= 1 << i;
164                         else
165                                 state->insts[i] = inst;
166                 }
167         }
168 
169         if (state->channels == 0xf)
170                 BITSET_SET(block->def, var);
171 }
172 
173 static void
sf_state_clear(struct hash_table * partial_update_ht)174 sf_state_clear(struct hash_table *partial_update_ht)
175 {
176         struct hash_entry *entry;
177 
178         hash_table_foreach(partial_update_ht, entry) {
179                 struct partial_update_state *state = entry->data;
180 
181                 for (int i = 0; i < 4; i++) {
182                         if (state->insts[i] && state->insts[i]->cond)
183                                 state->insts[i] = NULL;
184                 }
185         }
186 }
187 
188 /* Sets up the def/use arrays for when variables are used-before-defined or
189  * defined-before-used in the block.
190  *
191  * Also initializes the temp_start/temp_end to cover just the instruction IPs
192  * where the variable is used, which will be extended later in
193  * qir_compute_start_end().
194  */
195 static void
qir_setup_def_use(struct vc4_compile * c)196 qir_setup_def_use(struct vc4_compile *c)
197 {
198         struct hash_table *partial_update_ht =
199                 _mesa_hash_table_create(c, int_hash, int_compare);
200         int ip = 0;
201 
202         qir_for_each_block(block, c) {
203                 block->start_ip = ip;
204 
205                 _mesa_hash_table_clear(partial_update_ht, NULL);
206 
207                 qir_for_each_inst(inst, block) {
208                         for (int i = 0; i < qir_get_nsrc(inst); i++)
209                                 qir_setup_use(c, block, ip, inst->src[i]);
210 
211                         qir_setup_def(c, block, ip, partial_update_ht, inst);
212 
213                         if (inst->sf)
214                                 sf_state_clear(partial_update_ht);
215 
216                         switch (inst->op) {
217                         case QOP_FRAG_Z:
218                         case QOP_FRAG_W:
219                                 /* The payload registers have values
220                                  * implicitly loaded at the start of the
221                                  * program.
222                                  */
223                                 if (inst->dst.file == QFILE_TEMP)
224                                         c->temp_start[inst->dst.index] = 0;
225                                 break;
226                         default:
227                                 break;
228                         }
229                         ip++;
230                 }
231                 block->end_ip = ip;
232         }
233 
234         _mesa_hash_table_destroy(partial_update_ht, NULL);
235 }
236 
237 static bool
qir_live_variables_dataflow(struct vc4_compile * c,int bitset_words)238 qir_live_variables_dataflow(struct vc4_compile *c, int bitset_words)
239 {
240         bool cont = false;
241 
242         qir_for_each_block_rev(block, c) {
243                 /* Update live_out: Any successor using the variable
244                  * on entrance needs us to have the variable live on
245                  * exit.
246                  */
247                 qir_for_each_successor(succ, block) {
248                         for (int i = 0; i < bitset_words; i++) {
249                                 BITSET_WORD new_live_out = (succ->live_in[i] &
250                                                             ~block->live_out[i]);
251                                 if (new_live_out) {
252                                         block->live_out[i] |= new_live_out;
253                                         cont = true;
254                                 }
255                         }
256                 }
257 
258                 /* Update live_in */
259                 for (int i = 0; i < bitset_words; i++) {
260                         BITSET_WORD new_live_in = (block->use[i] |
261                                                    (block->live_out[i] &
262                                                     ~block->def[i]));
263                         if (new_live_in & ~block->live_in[i]) {
264                                 block->live_in[i] |= new_live_in;
265                                 cont = true;
266                         }
267                 }
268         }
269 
270         return cont;
271 }
272 
273 /**
274  * Extend the start/end ranges for each variable to account for the
275  * new information calculated from control flow.
276  */
277 static void
qir_compute_start_end(struct vc4_compile * c,int num_vars)278 qir_compute_start_end(struct vc4_compile *c, int num_vars)
279 {
280         qir_for_each_block(block, c) {
281                 for (int i = 0; i < num_vars; i++) {
282                         if (BITSET_TEST(block->live_in, i)) {
283                                 c->temp_start[i] = MIN2(c->temp_start[i],
284                                                         block->start_ip);
285                                 c->temp_end[i] = MAX2(c->temp_end[i],
286                                                       block->start_ip);
287                         }
288 
289                         if (BITSET_TEST(block->live_out, i)) {
290                                 c->temp_start[i] = MIN2(c->temp_start[i],
291                                                         block->end_ip);
292                                 c->temp_end[i] = MAX2(c->temp_end[i],
293                                                       block->end_ip);
294                         }
295                 }
296         }
297 }
298 
299 void
qir_calculate_live_intervals(struct vc4_compile * c)300 qir_calculate_live_intervals(struct vc4_compile *c)
301 {
302         int bitset_words = BITSET_WORDS(c->num_temps);
303 
304         /* If we called this function more than once, then we should be
305          * freeing the previous arrays.
306          */
307         assert(!c->temp_start);
308 
309         c->temp_start = rzalloc_array(c, int, c->num_temps);
310         c->temp_end = rzalloc_array(c, int, c->num_temps);
311 
312         for (int i = 0; i < c->num_temps; i++) {
313                 c->temp_start[i] = MAX_INSTRUCTION;
314                 c->temp_end[i] = -1;
315         }
316 
317         qir_for_each_block(block, c) {
318                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
319                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
320                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
321                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
322         }
323 
324         qir_setup_def_use(c);
325 
326         while (qir_live_variables_dataflow(c, bitset_words))
327                 ;
328 
329         qir_compute_start_end(c, c->num_temps);
330 
331         if (vc4_debug & VC4_DEBUG_SHADERDB) {
332                 int last_ip = 0;
333                 for (int i = 0; i < c->num_temps; i++)
334                         last_ip = MAX2(last_ip, c->temp_end[i]);
335 
336                 int reg_pressure = 0;
337                 int max_reg_pressure = 0;
338                 for (int i = 0; i < last_ip; i++) {
339                         for (int j = 0; j < c->num_temps; j++) {
340                                 if (c->temp_start[j] == i)
341                                         reg_pressure++;
342                                 if (c->temp_end[j] == i)
343                                         reg_pressure--;
344                         }
345                         max_reg_pressure = MAX2(max_reg_pressure, reg_pressure);
346                 }
347 
348                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d max temps\n",
349                         qir_get_stage_name(c->stage),
350                         c->program_id, c->variant_id,
351                         max_reg_pressure);
352         }
353 }
354