1 /*
2  * Copyright (C) 2019 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include <stdio.h>
28 #include <assert.h>
29 #include <stdlib.h>
30 #include <string.h>
31 #include <limits.h>
32 #include "util/macros.h"
33 #include "util/u_math.h"
34 #include "lcra.h"
35 
36 /* This module is the reference implementation of "Linearly Constrained
37  * Register Allocation". The paper is available in PDF form
38  * (https://people.collabora.com/~alyssa/LCRA.pdf) as well as Markdown+LaTeX
39  * (https://gitlab.freedesktop.org/alyssa/lcra/blob/master/LCRA.md)
40  */
41 
42 struct lcra_state *
lcra_alloc_equations(unsigned node_count,unsigned class_count)43 lcra_alloc_equations(
44                 unsigned node_count, unsigned class_count)
45 {
46         struct lcra_state *l = calloc(1, sizeof(*l));
47 
48         l->node_count = node_count;
49         l->class_count = class_count;
50 
51         l->alignment = calloc(sizeof(l->alignment[0]), node_count);
52         l->linear = calloc(sizeof(l->linear[0]), node_count * node_count);
53         l->modulus = calloc(sizeof(l->modulus[0]), node_count);
54         l->class = calloc(sizeof(l->class[0]), node_count);
55         l->class_start = calloc(sizeof(l->class_start[0]), class_count);
56         l->class_disjoint = calloc(sizeof(l->class_disjoint[0]), class_count * class_count);
57         l->class_size = calloc(sizeof(l->class_size[0]), class_count);
58         l->spill_cost = calloc(sizeof(l->spill_cost[0]), node_count);
59         l->solutions = calloc(sizeof(l->solutions[0]), node_count);
60 
61         memset(l->solutions, ~0, sizeof(l->solutions[0]) * node_count);
62 
63         return l;
64 }
65 
66 void
lcra_free(struct lcra_state * l)67 lcra_free(struct lcra_state *l)
68 {
69         if (!l)
70                 return;
71 
72         free(l->alignment);
73         free(l->linear);
74         free(l->modulus);
75         free(l->class);
76         free(l->class_start);
77         free(l->class_disjoint);
78         free(l->class_size);
79         free(l->spill_cost);
80         free(l->solutions);
81 
82         free(l);
83 }
84 
85 void
lcra_set_alignment(struct lcra_state * l,unsigned node,unsigned align_log2,unsigned bound)86 lcra_set_alignment(struct lcra_state *l, unsigned node, unsigned align_log2, unsigned bound)
87 {
88         l->alignment[node] = (align_log2 + 1) | (bound << 16);
89 }
90 
91 void
lcra_set_disjoint_class(struct lcra_state * l,unsigned c1,unsigned c2)92 lcra_set_disjoint_class(struct lcra_state *l, unsigned c1, unsigned c2)
93 {
94         l->class_disjoint[(c1 * l->class_count) + c2] = true;
95         l->class_disjoint[(c2 * l->class_count) + c1] = true;
96 }
97 
98 void
lcra_restrict_range(struct lcra_state * l,unsigned node,unsigned len)99 lcra_restrict_range(struct lcra_state *l, unsigned node, unsigned len)
100 {
101         if (node < l->node_count && l->alignment[node]) {
102                 unsigned BA = l->alignment[node];
103                 unsigned alignment = (BA & 0xffff) - 1;
104                 unsigned bound = BA >> 16;
105                 l->modulus[node] = DIV_ROUND_UP(bound - len + 1, 1 << alignment);
106         }
107 }
108 
109 void
lcra_add_node_interference(struct lcra_state * l,unsigned i,unsigned cmask_i,unsigned j,unsigned cmask_j)110 lcra_add_node_interference(struct lcra_state *l, unsigned i, unsigned cmask_i, unsigned j, unsigned cmask_j)
111 {
112         if (i == j)
113                 return;
114 
115         if (l->class_disjoint[(l->class[i] * l->class_count) + l->class[j]])
116                 return;
117 
118         uint32_t constraint_fw = 0;
119         uint32_t constraint_bw = 0;
120 
121         for (unsigned D = 0; D < 16; ++D) {
122                 if (cmask_i & (cmask_j << D)) {
123                         constraint_bw |= (1 << (15 + D));
124                         constraint_fw |= (1 << (15 - D));
125                 }
126 
127                 if (cmask_i & (cmask_j >> D)) {
128                         constraint_fw |= (1 << (15 + D));
129                         constraint_bw |= (1 << (15 - D));
130                 }
131         }
132 
133         l->linear[j * l->node_count + i] |= constraint_fw;
134         l->linear[i * l->node_count + j] |= constraint_bw;
135 }
136 
137 static bool
lcra_test_linear(struct lcra_state * l,unsigned * solutions,unsigned i)138 lcra_test_linear(struct lcra_state *l, unsigned *solutions, unsigned i)
139 {
140         unsigned *row = &l->linear[i * l->node_count];
141         signed constant = solutions[i];
142 
143         for (unsigned j = 0; j < l->node_count; ++j) {
144                 if (solutions[j] == ~0) continue;
145 
146                 signed lhs = solutions[j] - constant;
147 
148                 if (lhs < -15 || lhs > 15)
149                         continue;
150 
151                 if (row[j] & (1 << (lhs + 15)))
152                         return false;
153         }
154 
155         return true;
156 }
157 
158 bool
lcra_solve(struct lcra_state * l)159 lcra_solve(struct lcra_state *l)
160 {
161         for (unsigned step = 0; step < l->node_count; ++step) {
162                 if (l->solutions[step] != ~0) continue;
163                 if (l->alignment[step] == 0) continue;
164 
165                 unsigned _class = l->class[step];
166                 unsigned class_start = l->class_start[_class];
167 
168                 unsigned BA = l->alignment[step];
169                 unsigned shift = (BA & 0xffff) - 1;
170                 unsigned bound = BA >> 16;
171 
172                 unsigned P = bound >> shift;
173                 unsigned Q = l->modulus[step];
174                 unsigned r_max = l->class_size[_class];
175                 unsigned k_max = r_max >> shift;
176                 unsigned m_max = k_max / P;
177                 bool succ = false;
178 
179                 for (unsigned m = 0; m < m_max; ++m) {
180                         for (unsigned n = 0; n < Q; ++n) {
181                                 l->solutions[step] = ((m * P + n) << shift) + class_start;
182                                 succ = lcra_test_linear(l, l->solutions, step);
183 
184                                 if (succ) break;
185                         }
186 
187                         if (succ) break;
188                 }
189 
190                 /* Out of registers - prepare to spill */
191                 if (!succ) {
192                         l->spill_class = l->class[step];
193                         return false;
194                 }
195         }
196 
197         return true;
198 }
199 
200 /* Register spilling is implemented with a cost-benefit system. Costs are set
201  * by the user. Benefits are calculated from the constraints. */
202 
203 void
lcra_set_node_spill_cost(struct lcra_state * l,unsigned node,signed cost)204 lcra_set_node_spill_cost(struct lcra_state *l, unsigned node, signed cost)
205 {
206         if (node < l->node_count)
207                 l->spill_cost[node] = cost;
208 }
209 
210 /* Count along the lower triangle */
211 
212 static unsigned
lcra_count_constraints(struct lcra_state * l,unsigned i)213 lcra_count_constraints(struct lcra_state *l, unsigned i)
214 {
215         unsigned count = 0;
216         unsigned *constraints = &l->linear[i * l->node_count];
217 
218         for (unsigned j = 0; j < i; ++j)
219                 count += util_bitcount(constraints[j]);
220 
221         return count;
222 }
223 
224 signed
lcra_get_best_spill_node(struct lcra_state * l)225 lcra_get_best_spill_node(struct lcra_state *l)
226 {
227         float best_benefit = -1.0;
228         signed best_node = -1;
229 
230         for (unsigned i = 0; i < l->node_count; ++i) {
231                 /* Find spillable nodes */
232                 if (l->class[i] != l->spill_class) continue;
233                 if (l->spill_cost[i] < 0) continue;
234 
235                 /* Adapted from Chaitin's heuristic */
236                 float constraints = lcra_count_constraints(l, i);
237                 float cost = (l->spill_cost[i] + 1);
238                 float benefit = constraints / cost;
239 
240                 if (benefit > best_benefit) {
241                         best_benefit = benefit;
242                         best_node = i;
243                 }
244         }
245 
246         return best_node;
247 }
248