1 /*
2  * Copyright © 2012 Intel Corporation
3  * Copyright © 2016 Broadcom
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 
25 #define MAX_INSTRUCTION (1 << 30)
26 
27 #include "util/ralloc.h"
28 #include "util/register_allocate.h"
29 #include "v3d_compiler.h"
30 
31 struct partial_update_state {
32         struct qinst *insts[4];
33         uint8_t channels;
34 };
35 
36 static uint32_t
int_hash(const void * key)37 int_hash(const void *key)
38 {
39         return _mesa_hash_data(key, sizeof(int));
40 }
41 
42 static bool
int_compare(const void * key1,const void * key2)43 int_compare(const void *key1, const void *key2)
44 {
45         return *(const int *)key1 == *(const int *)key2;
46 }
47 
48 static int
vir_reg_to_var(struct qreg reg)49 vir_reg_to_var(struct qreg reg)
50 {
51         if (reg.file == QFILE_TEMP)
52                 return reg.index;
53 
54         return -1;
55 }
56 
57 static void
vir_setup_use(struct v3d_compile * c,struct qblock * block,int ip,struct qreg src)58 vir_setup_use(struct v3d_compile *c, struct qblock *block, int ip,
59               struct qreg src)
60 {
61         int var = vir_reg_to_var(src);
62         if (var == -1)
63                 return;
64 
65         c->temp_start[var] = MIN2(c->temp_start[var], ip);
66         c->temp_end[var] = MAX2(c->temp_end[var], ip);
67 
68         /* The use[] bitset marks when the block makes
69          * use of a variable without having completely
70          * defined that variable within the block.
71          */
72         if (!BITSET_TEST(block->def, var))
73                 BITSET_SET(block->use, var);
74 }
75 
76 static struct partial_update_state *
get_partial_update_state(struct hash_table * partial_update_ht,struct qinst * inst)77 get_partial_update_state(struct hash_table *partial_update_ht,
78                          struct qinst *inst)
79 {
80         struct hash_entry *entry =
81                 _mesa_hash_table_search(partial_update_ht,
82                                         &inst->dst.index);
83         if (entry)
84                 return entry->data;
85 
86         struct partial_update_state *state =
87                 rzalloc(partial_update_ht, struct partial_update_state);
88 
89         _mesa_hash_table_insert(partial_update_ht, &inst->dst.index, state);
90 
91         return state;
92 }
93 
94 static void
vir_setup_def(struct v3d_compile * c,struct qblock * block,int ip,struct hash_table * partial_update_ht,struct qinst * inst)95 vir_setup_def(struct v3d_compile *c, struct qblock *block, int ip,
96               struct hash_table *partial_update_ht, struct qinst *inst)
97 {
98         if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU)
99                 return;
100 
101         /* The def[] bitset marks when an initialization in a
102          * block completely screens off previous updates of
103          * that variable.
104          */
105         int var = vir_reg_to_var(inst->dst);
106         if (var == -1)
107                 return;
108 
109         c->temp_start[var] = MIN2(c->temp_start[var], ip);
110         c->temp_end[var] = MAX2(c->temp_end[var], ip);
111 
112         /* If we've already tracked this as a def, or already used it within
113          * the block, there's nothing to do.
114          */
115         if (BITSET_TEST(block->use, var) || BITSET_TEST(block->def, var))
116                 return;
117 
118         /* Easy, common case: unconditional full register update.
119          *
120          * We treat conditioning on the exec mask as the same as not being
121          * conditional.  This makes sure that if the register gets set on
122          * either side of an if, it is treated as being screened off before
123          * the if.  Otherwise, if there was no intervening def, its live
124          * interval doesn't extend back to the start of he program, and if too
125          * many registers did that we'd fail to register allocate.
126          */
127         if (((inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
128               inst->qpu.flags.mc == V3D_QPU_COND_NONE) ||
129              inst->cond_is_exec_mask) &&
130             inst->qpu.alu.add.output_pack == V3D_QPU_PACK_NONE &&
131             inst->qpu.alu.mul.output_pack == V3D_QPU_PACK_NONE) {
132                 BITSET_SET(block->def, var);
133                 return;
134         }
135 
136         /* Finally, look at the condition code and packing and mark it as a
137          * def.  We need to make sure that we understand sequences
138          * instructions like:
139          *
140          *     mov.zs t0, t1
141          *     mov.zc t0, t2
142          *
143          * or:
144          *
145          *     mmov t0.8a, t1
146          *     mmov t0.8b, t2
147          *     mmov t0.8c, t3
148          *     mmov t0.8d, t4
149          *
150          * as defining the temp within the block, because otherwise dst's live
151          * range will get extended up the control flow to the top of the
152          * program.
153          */
154         struct partial_update_state *state =
155                 get_partial_update_state(partial_update_ht, inst);
156         uint8_t mask = 0xf; /* XXX vir_channels_written(inst); */
157 
158         if (inst->qpu.flags.ac == V3D_QPU_COND_NONE &&
159             inst->qpu.flags.mc == V3D_QPU_COND_NONE) {
160                 state->channels |= mask;
161         } else {
162                 for (int i = 0; i < 4; i++) {
163                         if (!(mask & (1 << i)))
164                                 continue;
165 
166                         /* XXXif (state->insts[i] &&
167                             state->insts[i]->cond ==
168                             qpu_cond_complement(inst->cond))
169                                 state->channels |= 1 << i;
170                         else
171                         */
172                                 state->insts[i] = inst;
173                 }
174         }
175 
176         if (state->channels == 0xf)
177                 BITSET_SET(block->def, var);
178 }
179 
180 static void
sf_state_clear(struct hash_table * partial_update_ht)181 sf_state_clear(struct hash_table *partial_update_ht)
182 {
183         struct hash_entry *entry;
184 
185         hash_table_foreach(partial_update_ht, entry) {
186                 struct partial_update_state *state = entry->data;
187 
188                 for (int i = 0; i < 4; i++) {
189                         if (state->insts[i] &&
190                             (state->insts[i]->qpu.flags.ac != V3D_QPU_COND_NONE ||
191                              state->insts[i]->qpu.flags.mc != V3D_QPU_COND_NONE))
192                                 state->insts[i] = NULL;
193                 }
194         }
195 }
196 
197 /* Sets up the def/use arrays for when variables are used-before-defined or
198  * defined-before-used in the block.
199  *
200  * Also initializes the temp_start/temp_end to cover just the instruction IPs
201  * where the variable is used, which will be extended later in
202  * vir_compute_start_end().
203  */
204 static void
vir_setup_def_use(struct v3d_compile * c)205 vir_setup_def_use(struct v3d_compile *c)
206 {
207         struct hash_table *partial_update_ht =
208                 _mesa_hash_table_create(c, int_hash, int_compare);
209         int ip = 0;
210 
211         vir_for_each_block(block, c) {
212                 block->start_ip = ip;
213 
214                 _mesa_hash_table_clear(partial_update_ht, NULL);
215 
216                 vir_for_each_inst(inst, block) {
217                         for (int i = 0; i < vir_get_nsrc(inst); i++)
218                                 vir_setup_use(c, block, ip, inst->src[i]);
219 
220                         vir_setup_def(c, block, ip, partial_update_ht, inst);
221 
222                         if (false /* XXX inst->uf */)
223                                 sf_state_clear(partial_update_ht);
224 
225                         /* Payload registers: r0/1/2 contain W, centroid W,
226                          * and Z at program start.  Register allocation will
227                          * force their nodes to R0/1/2.
228                          */
229                         if (inst->src[0].file == QFILE_REG) {
230                                 switch (inst->src[0].index) {
231                                 case 0:
232                                 case 1:
233                                 case 2:
234                                         c->temp_start[inst->dst.index] = 0;
235                                         break;
236                                 }
237                         }
238 
239                         ip++;
240                 }
241                 block->end_ip = ip;
242         }
243 
244         _mesa_hash_table_destroy(partial_update_ht, NULL);
245 }
246 
247 static bool
vir_live_variables_dataflow(struct v3d_compile * c,int bitset_words)248 vir_live_variables_dataflow(struct v3d_compile *c, int bitset_words)
249 {
250         bool cont = false;
251 
252         vir_for_each_block_rev(block, c) {
253                 /* Update live_out: Any successor using the variable
254                  * on entrance needs us to have the variable live on
255                  * exit.
256                  */
257                 vir_for_each_successor(succ, block) {
258                         for (int i = 0; i < bitset_words; i++) {
259                                 BITSET_WORD new_live_out = (succ->live_in[i] &
260                                                             ~block->live_out[i]);
261                                 if (new_live_out) {
262                                         block->live_out[i] |= new_live_out;
263                                         cont = true;
264                                 }
265                         }
266                 }
267 
268                 /* Update live_in */
269                 for (int i = 0; i < bitset_words; i++) {
270                         BITSET_WORD new_live_in = (block->use[i] |
271                                                    (block->live_out[i] &
272                                                     ~block->def[i]));
273                         if (new_live_in & ~block->live_in[i]) {
274                                 block->live_in[i] |= new_live_in;
275                                 cont = true;
276                         }
277                 }
278         }
279 
280         return cont;
281 }
282 
283 /**
284  * Extend the start/end ranges for each variable to account for the
285  * new information calculated from control flow.
286  */
287 static void
vir_compute_start_end(struct v3d_compile * c,int num_vars)288 vir_compute_start_end(struct v3d_compile *c, int num_vars)
289 {
290         vir_for_each_block(block, c) {
291                 for (int i = 0; i < num_vars; i++) {
292                         if (BITSET_TEST(block->live_in, i)) {
293                                 c->temp_start[i] = MIN2(c->temp_start[i],
294                                                         block->start_ip);
295                                 c->temp_end[i] = MAX2(c->temp_end[i],
296                                                       block->start_ip);
297                         }
298 
299                         if (BITSET_TEST(block->live_out, i)) {
300                                 c->temp_start[i] = MIN2(c->temp_start[i],
301                                                         block->end_ip);
302                                 c->temp_end[i] = MAX2(c->temp_end[i],
303                                                       block->end_ip);
304                         }
305                 }
306         }
307 }
308 
309 void
vir_calculate_live_intervals(struct v3d_compile * c)310 vir_calculate_live_intervals(struct v3d_compile *c)
311 {
312         int bitset_words = BITSET_WORDS(c->num_temps);
313 
314         /* If we called this function more than once, then we should be
315          * freeing the previous arrays.
316          */
317         assert(!c->temp_start);
318 
319         c->temp_start = rzalloc_array(c, int, c->num_temps);
320         c->temp_end = rzalloc_array(c, int, c->num_temps);
321 
322         for (int i = 0; i < c->num_temps; i++) {
323                 c->temp_start[i] = MAX_INSTRUCTION;
324                 c->temp_end[i] = -1;
325         }
326 
327         vir_for_each_block(block, c) {
328                 block->def = rzalloc_array(c, BITSET_WORD, bitset_words);
329                 block->use = rzalloc_array(c, BITSET_WORD, bitset_words);
330                 block->live_in = rzalloc_array(c, BITSET_WORD, bitset_words);
331                 block->live_out = rzalloc_array(c, BITSET_WORD, bitset_words);
332         }
333 
334         vir_setup_def_use(c);
335 
336         while (vir_live_variables_dataflow(c, bitset_words))
337                 ;
338 
339         vir_compute_start_end(c, c->num_temps);
340 }
341