1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include "util/ralloc.h"
26 #include "util/register_allocate.h"
27 #include "util/u_debug.h"
28 
29 #include "ppir.h"
30 #include "lima_context.h"
31 
32 #define PPIR_FULL_REG_NUM  6
33 
34 #define PPIR_VEC1_REG_NUM       (PPIR_FULL_REG_NUM * 4) /* x, y, z, w */
35 #define PPIR_VEC2_REG_NUM       (PPIR_FULL_REG_NUM * 3) /* xy, yz, zw */
36 #define PPIR_VEC3_REG_NUM       (PPIR_FULL_REG_NUM * 2) /* xyz, yzw */
37 #define PPIR_VEC4_REG_NUM       PPIR_FULL_REG_NUM       /* xyzw */
38 #define PPIR_HEAD_VEC1_REG_NUM  PPIR_FULL_REG_NUM       /* x */
39 #define PPIR_HEAD_VEC2_REG_NUM  PPIR_FULL_REG_NUM       /* xy */
40 #define PPIR_HEAD_VEC3_REG_NUM  PPIR_FULL_REG_NUM       /* xyz */
41 #define PPIR_HEAD_VEC4_REG_NUM  PPIR_FULL_REG_NUM       /* xyzw */
42 
43 #define PPIR_VEC1_REG_BASE       0
44 #define PPIR_VEC2_REG_BASE       (PPIR_VEC1_REG_BASE + PPIR_VEC1_REG_NUM)
45 #define PPIR_VEC3_REG_BASE       (PPIR_VEC2_REG_BASE + PPIR_VEC2_REG_NUM)
46 #define PPIR_VEC4_REG_BASE       (PPIR_VEC3_REG_BASE + PPIR_VEC3_REG_NUM)
47 #define PPIR_HEAD_VEC1_REG_BASE  (PPIR_VEC4_REG_BASE + PPIR_VEC4_REG_NUM)
48 #define PPIR_HEAD_VEC2_REG_BASE  (PPIR_HEAD_VEC1_REG_BASE + PPIR_HEAD_VEC1_REG_NUM)
49 #define PPIR_HEAD_VEC3_REG_BASE  (PPIR_HEAD_VEC2_REG_BASE + PPIR_HEAD_VEC2_REG_NUM)
50 #define PPIR_HEAD_VEC4_REG_BASE  (PPIR_HEAD_VEC3_REG_BASE + PPIR_HEAD_VEC3_REG_NUM)
51 #define PPIR_REG_COUNT           (PPIR_HEAD_VEC4_REG_BASE + PPIR_HEAD_VEC4_REG_NUM)
52 
53 enum ppir_ra_reg_class {
54    ppir_ra_reg_class_vec1,
55    ppir_ra_reg_class_vec2,
56    ppir_ra_reg_class_vec3,
57    ppir_ra_reg_class_vec4,
58 
59    /* 4 reg class for load/store instr regs:
60     * load/store instr has no swizzle field, so the (virtual) register
61     * must be allocated at the beginning of a (physical) register,
62     */
63    ppir_ra_reg_class_head_vec1,
64    ppir_ra_reg_class_head_vec2,
65    ppir_ra_reg_class_head_vec3,
66    ppir_ra_reg_class_head_vec4,
67 
68    ppir_ra_reg_class_num,
69 };
70 
71 static const int ppir_ra_reg_base[ppir_ra_reg_class_num + 1] = {
72    [ppir_ra_reg_class_vec1]       = PPIR_VEC1_REG_BASE,
73    [ppir_ra_reg_class_vec2]       = PPIR_VEC2_REG_BASE,
74    [ppir_ra_reg_class_vec3]       = PPIR_VEC3_REG_BASE,
75    [ppir_ra_reg_class_vec4]       = PPIR_VEC4_REG_BASE,
76    [ppir_ra_reg_class_head_vec1]  = PPIR_HEAD_VEC1_REG_BASE,
77    [ppir_ra_reg_class_head_vec2]  = PPIR_HEAD_VEC2_REG_BASE,
78    [ppir_ra_reg_class_head_vec3]  = PPIR_HEAD_VEC3_REG_BASE,
79    [ppir_ra_reg_class_head_vec4]  = PPIR_HEAD_VEC4_REG_BASE,
80    [ppir_ra_reg_class_num]        = PPIR_REG_COUNT,
81 };
82 
83 static unsigned int *
84 ppir_ra_reg_q_values[ppir_ra_reg_class_num] = {
85    (unsigned int []) {1, 2, 3, 4, 1, 2, 3, 4},
86    (unsigned int []) {2, 3, 3, 3, 1, 2, 3, 3},
87    (unsigned int []) {2, 2, 2, 2, 1, 2, 2, 2},
88    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
89    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
90    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
91    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
92    (unsigned int []) {1, 1, 1, 1, 1, 1, 1, 1},
93 };
94 
ppir_regalloc_init(void * mem_ctx)95 struct ra_regs *ppir_regalloc_init(void *mem_ctx)
96 {
97    struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
98    if (!ret)
99       return NULL;
100 
101    /* (x, y, z, w) (xy, yz, zw) (xyz, yzw) (xyzw) (x) (xy) (xyz) (xyzw) */
102    static const int class_reg_num[ppir_ra_reg_class_num] = {
103       4, 3, 2, 1, 1, 1, 1, 1,
104    };
105    /* base reg (x, y, z, w) confliction with other regs */
106    for (int h = 0; h < 4; h++) {
107       int base_reg_mask = 1 << h;
108       for (int i = 1; i < ppir_ra_reg_class_num; i++) {
109          int class_reg_base_mask = (1 << ((i % 4) + 1)) - 1;
110          for (int j = 0; j < class_reg_num[i]; j++) {
111             if (base_reg_mask & (class_reg_base_mask << j)) {
112                for (int k = 0; k < PPIR_FULL_REG_NUM; k++) {
113                   ra_add_reg_conflict(ret, k * 4 + h,
114                      ppir_ra_reg_base[i] + k * class_reg_num[i] + j);
115                }
116             }
117          }
118       }
119    }
120    /* build all other confliction by the base reg confliction */
121    for (int i = 0; i < PPIR_VEC1_REG_NUM; i++)
122       ra_make_reg_conflicts_transitive(ret, i);
123 
124    for (int i = 0; i < ppir_ra_reg_class_num; i++)
125       ra_alloc_reg_class(ret);
126 
127    int reg_index = 0;
128    for (int i = 0; i < ppir_ra_reg_class_num; i++) {
129       while (reg_index < ppir_ra_reg_base[i + 1])
130          ra_class_add_reg(ret, i, reg_index++);
131    }
132 
133    ra_set_finalize(ret, ppir_ra_reg_q_values);
134    return ret;
135 }
136 
ppir_regalloc_update_reglist_ssa(ppir_compiler * comp)137 static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
138 {
139    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
140       list_for_each_entry(ppir_node, node, &block->node_list, list) {
141          if (node->is_end)
142             continue;
143 
144          if (!node->instr || node->op == ppir_op_const)
145             continue;
146 
147          ppir_dest *dest = ppir_node_get_dest(node);
148          if (dest) {
149             ppir_reg *reg = NULL;
150 
151             if (dest->type == ppir_target_ssa) {
152                reg = &dest->ssa;
153                list_addtail(&reg->list, &comp->reg_list);
154             }
155          }
156       }
157    }
158 }
159 
get_phy_reg_index(int reg)160 static int get_phy_reg_index(int reg)
161 {
162    int i;
163 
164    for (i = 0; i < ppir_ra_reg_class_num; i++) {
165       if (reg < ppir_ra_reg_base[i + 1]) {
166          reg -= ppir_ra_reg_base[i];
167          break;
168       }
169    }
170 
171    if (i < ppir_ra_reg_class_head_vec1)
172       return reg / (4 - i) * 4 + reg % (4 - i);
173    else
174       return reg * 4;
175 }
176 
ppir_regalloc_print_result(ppir_compiler * comp)177 static void ppir_regalloc_print_result(ppir_compiler *comp)
178 {
179    printf("======ppir regalloc result======\n");
180    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
181       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
182          printf("%03d:", instr->index);
183          for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
184             ppir_node *node = instr->slots[i];
185             if (!node)
186                continue;
187 
188             printf(" (%d|", node->index);
189 
190             ppir_dest *dest = ppir_node_get_dest(node);
191             if (dest)
192                printf("%d", ppir_target_get_dest_reg_index(dest));
193 
194             printf("|");
195 
196             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
197                if (i)
198                   printf(" ");
199                printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i)));
200             }
201 
202             printf(")");
203          }
204          printf("\n");
205       }
206    }
207    printf("--------------------------\n");
208 }
209 
create_new_instr_after(ppir_block * block,ppir_instr * ref,ppir_node * node)210 static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
211                                    ppir_node *node)
212 {
213    ppir_instr *newinstr = ppir_instr_create(block);
214    if (unlikely(!newinstr))
215       return false;
216 
217    list_del(&newinstr->list);
218    list_add(&newinstr->list, &ref->list);
219 
220    if (!ppir_instr_insert_node(newinstr, node))
221       return false;
222 
223    list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
224       instr->seq++;
225    }
226    newinstr->seq = ref->seq+1;
227    newinstr->scheduled = true;
228    return true;
229 }
230 
create_new_instr_before(ppir_block * block,ppir_instr * ref,ppir_node * node)231 static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
232                                     ppir_node *node)
233 {
234    ppir_instr *newinstr = ppir_instr_create(block);
235    if (unlikely(!newinstr))
236       return false;
237 
238    list_del(&newinstr->list);
239    list_addtail(&newinstr->list, &ref->list);
240 
241    if (!ppir_instr_insert_node(newinstr, node))
242       return false;
243 
244    list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
245       instr->seq++;
246    }
247    newinstr->seq = ref->seq-1;
248    newinstr->scheduled = true;
249    return true;
250 }
251 
ppir_update_spilled_src(ppir_compiler * comp,ppir_block * block,ppir_node * node,ppir_src * src,ppir_node ** fill_node)252 static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
253                                     ppir_node *node, ppir_src *src,
254                                     ppir_node **fill_node)
255 {
256    /* nodes might have multiple references to the same value.
257     * avoid creating unnecessary loads for the same fill by
258     * saving the node resulting from the temporary load */
259    if (*fill_node)
260       goto update_src;
261 
262    int num_components = src->reg->num_components;
263 
264    /* alloc new node to load value */
265    ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
266    if (!load_node)
267       return false;
268    list_addtail(&load_node->list, &node->list);
269    comp->num_fills++;
270 
271    ppir_load_node *load = ppir_node_to_load(load_node);
272 
273    load->index = -comp->prog->stack_size; /* index sizes are negative */
274    load->num_components = num_components;
275 
276    ppir_dest *ld_dest = &load->dest;
277    ld_dest->type = ppir_target_pipeline;
278    ld_dest->pipeline = ppir_pipeline_reg_uniform;
279    ld_dest->write_mask = u_bit_consecutive(0, num_components);
280 
281    /* If the uniform slot is empty, we can insert the load_temp
282     * there and use it directly. Exceptionally, if the node is in the
283     * varying or texld slot, this doesn't work. */
284    if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
285         node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
286         node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
287       ppir_node_target_assign(src, load_node);
288       *fill_node = load_node;
289       return ppir_instr_insert_node(node->instr, load_node);
290    }
291 
292    /* Uniform slot was taken, so fall back to a new instruction with a mov */
293    if (!create_new_instr_before(block, node->instr, load_node))
294       return false;
295 
296    /* Create move node */
297    ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
298    if (unlikely(!move_node))
299       return false;
300    list_addtail(&move_node->list, &node->list);
301 
302    ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
303 
304    move_alu->num_src = 1;
305    move_alu->src->type = ppir_target_pipeline;
306    move_alu->src->pipeline = ppir_pipeline_reg_uniform;
307    for (int i = 0; i < 4; i++)
308       move_alu->src->swizzle[i] = i;
309 
310    ppir_dest *alu_dest = &move_alu->dest;
311    alu_dest->type = ppir_target_ssa;
312    alu_dest->ssa.num_components = num_components;
313    alu_dest->ssa.spilled = true;
314    alu_dest->write_mask = u_bit_consecutive(0, num_components);
315 
316    list_addtail(&alu_dest->ssa.list, &comp->reg_list);
317 
318    if (!ppir_instr_insert_node(load_node->instr, move_node))
319       return false;
320 
321    /* insert the new node as predecessor */
322    ppir_node_foreach_pred_safe(node, dep) {
323       ppir_node *pred = dep->pred;
324       ppir_node_remove_dep(dep);
325       ppir_node_add_dep(load_node, pred, ppir_dep_src);
326    }
327    ppir_node_add_dep(node, move_node, ppir_dep_src);
328    ppir_node_add_dep(move_node, load_node, ppir_dep_src);
329 
330    *fill_node = move_node;
331 
332 update_src:
333    /* switch node src to use the fill node dest */
334    ppir_node_target_assign(src, *fill_node);
335 
336    return true;
337 }
338 
ppir_update_spilled_dest_load(ppir_compiler * comp,ppir_block * block,ppir_node * node)339 static bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block,
340                                           ppir_node *node)
341 {
342    ppir_dest *dest = ppir_node_get_dest(node);
343    assert(dest != NULL);
344    assert(dest->type == ppir_target_register);
345    ppir_reg *reg = dest->reg;
346    int num_components = reg->num_components;
347 
348    /* alloc new node to load value */
349    ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
350    if (!load_node)
351       return NULL;
352    list_addtail(&load_node->list, &node->list);
353    comp->num_fills++;
354 
355    ppir_load_node *load = ppir_node_to_load(load_node);
356 
357    load->index = -comp->prog->stack_size; /* index sizes are negative */
358    load->num_components = num_components;
359 
360    load->dest.type = ppir_target_pipeline;
361    load->dest.pipeline = ppir_pipeline_reg_uniform;
362    load->dest.write_mask = u_bit_consecutive(0, num_components);
363 
364    /* New instruction is needed since we're updating a dest register
365     * and we can't write to the uniform pipeline reg */
366    if (!create_new_instr_before(block, node->instr, load_node))
367       return false;
368 
369    /* Create move node */
370    ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
371    if (unlikely(!move_node))
372       return false;
373    list_addtail(&move_node->list, &node->list);
374 
375    ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
376 
377    move_alu->num_src = 1;
378    move_alu->src->type = ppir_target_pipeline;
379    move_alu->src->pipeline = ppir_pipeline_reg_uniform;
380    for (int i = 0; i < 4; i++)
381       move_alu->src->swizzle[i] = i;
382 
383    move_alu->dest.type = ppir_target_register;
384    move_alu->dest.reg = reg;
385    move_alu->dest.write_mask = u_bit_consecutive(0, num_components);
386 
387    if (!ppir_instr_insert_node(load_node->instr, move_node))
388       return false;
389 
390    ppir_node_foreach_pred_safe(node, dep) {
391       ppir_node *pred = dep->pred;
392       ppir_node_remove_dep(dep);
393       ppir_node_add_dep(load_node, pred, ppir_dep_src);
394    }
395    ppir_node_add_dep(node, move_node, ppir_dep_src);
396    ppir_node_add_dep(move_node, load_node, ppir_dep_src);
397 
398    return true;
399 }
400 
ppir_update_spilled_dest(ppir_compiler * comp,ppir_block * block,ppir_node * node)401 static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
402                                      ppir_node *node)
403 {
404    ppir_dest *dest = ppir_node_get_dest(node);
405    assert(dest != NULL);
406    ppir_reg *reg = ppir_dest_get_reg(dest);
407 
408    /* alloc new node to store value */
409    ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
410    if (!store_node)
411       return false;
412    list_addtail(&store_node->list, &node->list);
413    comp->num_spills++;
414 
415    ppir_store_node *store = ppir_node_to_store(store_node);
416 
417    store->index = -comp->prog->stack_size; /* index sizes are negative */
418 
419    ppir_node_target_assign(&store->src, node);
420    store->num_components = reg->num_components;
421 
422    /* insert the new node as successor */
423    ppir_node_foreach_succ_safe(node, dep) {
424       ppir_node *succ = dep->succ;
425       ppir_node_remove_dep(dep);
426       ppir_node_add_dep(succ, store_node, ppir_dep_src);
427    }
428    ppir_node_add_dep(store_node, node, ppir_dep_src);
429 
430    /* If the store temp slot is empty, we can insert the store_temp
431     * there and use it directly. Exceptionally, if the node is in the
432     * combine slot, this doesn't work. */
433    if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
434         node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE)
435       return ppir_instr_insert_node(node->instr, store_node);
436 
437    /* Not possible to merge store, so fall back to a new instruction */
438    return create_new_instr_after(block, node->instr, store_node);
439 }
440 
ppir_regalloc_spill_reg(ppir_compiler * comp,ppir_reg * chosen)441 static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
442 {
443    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
444       list_for_each_entry(ppir_node, node, &block->node_list, list) {
445 
446          ppir_dest *dest = ppir_node_get_dest(node);
447          if (dest && ppir_dest_get_reg(dest) == chosen) {
448             /* If dest is a register, it might be updating only some its
449              * components, so need to load the existing value first */
450             if (dest->type == ppir_target_register) {
451                if (!ppir_update_spilled_dest_load(comp, block, node))
452                   return false;
453             }
454             if (!ppir_update_spilled_dest(comp, block, node))
455                return false;
456          }
457 
458          ppir_node *fill_node = NULL;
459          /* nodes might have multiple references to the same value.
460           * avoid creating unnecessary loads for the same fill by
461           * saving the node resulting from the temporary load */
462          for (int i = 0; i < ppir_node_get_src_num(node); i++) {
463             ppir_src *src = ppir_node_get_src(node, i);
464             ppir_reg *reg = ppir_src_get_reg(src);
465             if (reg == chosen) {
466                if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
467                   return false;
468             }
469          }
470       }
471    }
472 
473    return true;
474 }
475 
ppir_regalloc_choose_spill_node(ppir_compiler * comp,struct ra_graph * g)476 static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
477                                                  struct ra_graph *g)
478 {
479    float spill_costs[list_length(&comp->reg_list)];
480    /* experimentally determined, it seems to be worth scaling cost of
481     * regs in instructions that have used uniform/store_temp slots,
482     * but not too much as to offset the num_components base cost. */
483    const float slot_scale = 1.1f;
484 
485    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
486       if (reg->spilled) {
487          /* not considered for spilling */
488          spill_costs[reg->regalloc_index] = 0.0f;
489          continue;
490       }
491 
492       /* It is beneficial to spill registers with higher component number,
493        * so increase the cost of spilling registers with few components */
494       float spill_cost = 4.0f / (float)reg->num_components;
495       spill_costs[reg->regalloc_index] = spill_cost;
496    }
497 
498    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
499       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
500          if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) {
501             for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
502                ppir_node *node = instr->slots[i];
503                if (!node)
504                   continue;
505                for (int j = 0; j < ppir_node_get_src_num(node); j++) {
506                   ppir_src *src = ppir_node_get_src(node, j);
507                   if (!src)
508                      continue;
509                   ppir_reg *reg = ppir_src_get_reg(src);
510                   if (!reg)
511                      continue;
512 
513                   spill_costs[reg->regalloc_index] *= slot_scale;
514                }
515             }
516          }
517          if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) {
518             for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
519                ppir_node *node = instr->slots[i];
520                if (!node)
521                   continue;
522                ppir_dest *dest = ppir_node_get_dest(node);
523                if (!dest)
524                   continue;
525                ppir_reg *reg = ppir_dest_get_reg(dest);
526                if (!reg)
527                   continue;
528 
529                spill_costs[reg->regalloc_index] *= slot_scale;
530             }
531          }
532       }
533    }
534 
535    for (int i = 0; i < list_length(&comp->reg_list); i++)
536       ra_set_node_spill_cost(g, i, spill_costs[i]);
537 
538    int r = ra_get_best_spill_node(g);
539    if (r == -1)
540       return NULL;
541 
542    ppir_reg *chosen = NULL;
543    int i = 0;
544    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
545       if (i++ == r) {
546          chosen = reg;
547          break;
548       }
549    }
550    assert(chosen);
551    chosen->spilled = true;
552    chosen->is_head = true; /* store_temp unable to do swizzle */
553 
554    return chosen;
555 }
556 
ppir_regalloc_reset_liveness_info(ppir_compiler * comp)557 static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
558 {
559    int idx = 0;
560 
561    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
562       reg->regalloc_index = idx++;
563    }
564 
565    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
566 
567       if (block->live_in)
568          ralloc_free(block->live_in);
569       block->live_in = rzalloc_array(comp,
570             struct ppir_liveness, list_length(&comp->reg_list));
571 
572       if (block->live_in_set)
573          _mesa_set_destroy(block->live_in_set, NULL);
574       block->live_in_set = _mesa_set_create(comp,
575                                             _mesa_hash_pointer,
576                                             _mesa_key_pointer_equal);
577 
578       if (block->live_out)
579          ralloc_free(block->live_out);
580       block->live_out = rzalloc_array(comp,
581             struct ppir_liveness, list_length(&comp->reg_list));
582 
583       if (block->live_out_set)
584          _mesa_set_destroy(block->live_out_set, NULL);
585       block->live_out_set = _mesa_set_create(comp,
586                                              _mesa_hash_pointer,
587                                              _mesa_key_pointer_equal);
588 
589       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
590 
591          if (instr->live_in)
592             ralloc_free(instr->live_in);
593          instr->live_in = rzalloc_array(comp,
594                struct ppir_liveness, list_length(&comp->reg_list));
595 
596          if (instr->live_in_set)
597             _mesa_set_destroy(instr->live_in_set, NULL);
598          instr->live_in_set = _mesa_set_create(comp,
599                                                _mesa_hash_pointer,
600                                                _mesa_key_pointer_equal);
601 
602          if (instr->live_internal)
603             ralloc_free(instr->live_internal);
604          instr->live_internal = rzalloc_array(comp,
605                struct ppir_liveness, list_length(&comp->reg_list));
606 
607          if (instr->live_internal_set)
608             _mesa_set_destroy(instr->live_internal_set, NULL);
609          instr->live_internal_set = _mesa_set_create(comp,
610                                                _mesa_hash_pointer,
611                                                _mesa_key_pointer_equal);
612 
613          if (instr->live_out)
614             ralloc_free(instr->live_out);
615          instr->live_out = rzalloc_array(comp,
616                struct ppir_liveness, list_length(&comp->reg_list));
617 
618          if (instr->live_out_set)
619             _mesa_set_destroy(instr->live_out_set, NULL);
620          instr->live_out_set = _mesa_set_create(comp,
621                                                 _mesa_hash_pointer,
622                                                 _mesa_key_pointer_equal);
623       }
624    }
625 }
626 
ppir_all_interference(ppir_compiler * comp,struct ra_graph * g,struct set * liveness)627 static void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
628                                   struct set *liveness)
629 {
630    set_foreach(liveness, entry1) {
631       set_foreach(liveness, entry2) {
632          const struct ppir_liveness *r1 = entry1->key;
633          const struct ppir_liveness *r2 = entry2->key;
634          ra_add_node_interference(g, r1->reg->regalloc_index,
635                                      r2->reg->regalloc_index);
636       }
637       _mesa_set_remove(liveness, entry1);
638    }
639 }
640 
641 int lima_ppir_force_spilling = 0;
642 
ppir_regalloc_prog_try(ppir_compiler * comp,bool * spilled)643 static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
644 {
645    ppir_regalloc_reset_liveness_info(comp);
646 
647    struct ra_graph *g = ra_alloc_interference_graph(
648       comp->ra, list_length(&comp->reg_list));
649 
650    int n = 0;
651    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
652       int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
653       if (reg->is_head)
654          c += 4;
655       ra_set_node_class(g, n++, c);
656    }
657 
658    ppir_liveness_analysis(comp);
659 
660    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
661       list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
662          set_foreach(instr->live_internal_set, entry) {
663             _mesa_set_add(instr->live_in_set, entry->key);
664             _mesa_set_add(instr->live_out_set, entry->key);
665          }
666          ppir_all_interference(comp, g, instr->live_in_set);
667          ppir_all_interference(comp, g, instr->live_out_set);
668       }
669    }
670 
671    *spilled = false;
672    bool ok = ra_allocate(g);
673    if (!ok || (comp->force_spilling-- > 0)) {
674       ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
675       if (chosen) {
676          /* stack_size will be used to assemble the frame reg in lima_draw.
677           * It is also be used in the spilling code, as negative indices
678           * starting from -1, to create stack addresses. */
679          comp->prog->stack_size++;
680          if (!ppir_regalloc_spill_reg(comp, chosen))
681             goto err_out;
682          /* Ask the outer loop to call back in. */
683          *spilled = true;
684 
685          ppir_debug("spilled register %d/%d, num_components: %d\n",
686                     chosen->regalloc_index, list_length(&comp->reg_list),
687                     chosen->num_components);
688          goto err_out;
689       }
690 
691       ppir_error("regalloc fail\n");
692       goto err_out;
693    }
694 
695    n = 0;
696    list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
697       int reg_index = ra_get_node_reg(g, n++);
698       reg->index = get_phy_reg_index(reg_index);
699    }
700 
701    ralloc_free(g);
702 
703    if (lima_debug & LIMA_DEBUG_PP)
704       ppir_regalloc_print_result(comp);
705 
706    return true;
707 
708 err_out:
709    ralloc_free(g);
710    return false;
711 }
712 
ppir_regalloc_prog(ppir_compiler * comp)713 bool ppir_regalloc_prog(ppir_compiler *comp)
714 {
715    bool spilled = false;
716    comp->prog->stack_size = 0;
717 
718    /* Set from an environment variable to force spilling
719     * for debugging purposes, see lima_screen.c */
720    comp->force_spilling = lima_ppir_force_spilling;
721 
722    ppir_regalloc_update_reglist_ssa(comp);
723 
724    /* No registers? Probably shader consists of discard instruction */
725    if (list_is_empty(&comp->reg_list))
726       return true;
727 
728    /* this will most likely succeed in the first
729     * try, except for very complicated shaders */
730    while (!ppir_regalloc_prog_try(comp, &spilled))
731       if (!spilled)
732          return false;
733 
734    return true;
735 }
736