1 /*
2  * Copyright (c) 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdlib.h>
25 #include <math.h>
26 
27 #include "util/macros.h"
28 #include "main/macros.h"
29 
30 #include "gen_l3_config.h"
31 
32 /**
33  * IVB/HSW validated L3 configurations.  The first entry will be used as
34  * default by gen7_restore_default_l3_config(), otherwise the ordering is
35  * unimportant.
36  */
37 static const struct gen_l3_config ivb_l3_configs[] = {
38    /* SLM URB ALL DC  RO  IS   C   T */
39    {{  0, 32,  0,  0, 32,  0,  0,  0 }},
40    {{  0, 32,  0, 16, 16,  0,  0,  0 }},
41    {{  0, 32,  0,  4,  0,  8,  4, 16 }},
42    {{  0, 28,  0,  8,  0,  8,  4, 16 }},
43    {{  0, 28,  0, 16,  0,  8,  4,  8 }},
44    {{  0, 28,  0,  8,  0, 16,  4,  8 }},
45    {{  0, 28,  0,  0,  0, 16,  4, 16 }},
46    {{  0, 32,  0,  0,  0, 16,  0, 16 }},
47    {{  0, 28,  0,  4, 32,  0,  0,  0 }},
48    {{ 16, 16,  0, 16, 16,  0,  0,  0 }},
49    {{ 16, 16,  0,  8,  0,  8,  8,  8 }},
50    {{ 16, 16,  0,  4,  0,  8,  4, 16 }},
51    {{ 16, 16,  0,  4,  0, 16,  4,  8 }},
52    {{ 16, 16,  0,  0, 32,  0,  0,  0 }},
53    {{ 0 }}
54 };
55 
56 /**
57  * VLV validated L3 configurations.  \sa ivb_l3_configs.
58  */
59 static const struct gen_l3_config vlv_l3_configs[] = {
60    /* SLM URB ALL DC  RO  IS   C   T */
61    {{  0, 64,  0,  0, 32,  0,  0,  0 }},
62    {{  0, 80,  0,  0, 16,  0,  0,  0 }},
63    {{  0, 80,  0,  8,  8,  0,  0,  0 }},
64    {{  0, 64,  0, 16, 16,  0,  0,  0 }},
65    {{  0, 60,  0,  4, 32,  0,  0,  0 }},
66    {{ 32, 32,  0, 16, 16,  0,  0,  0 }},
67    {{ 32, 40,  0,  8, 16,  0,  0,  0 }},
68    {{ 32, 40,  0, 16,  8,  0,  0,  0 }},
69    {{ 0 }}
70 };
71 
72 /**
73  * BDW validated L3 configurations.  \sa ivb_l3_configs.
74  */
75 static const struct gen_l3_config bdw_l3_configs[] = {
76    /* SLM URB ALL DC  RO  IS   C   T */
77    {{  0, 48, 48,  0,  0,  0,  0,  0 }},
78    {{  0, 48,  0, 16, 32,  0,  0,  0 }},
79    {{  0, 32,  0, 16, 48,  0,  0,  0 }},
80    {{  0, 32,  0,  0, 64,  0,  0,  0 }},
81    {{  0, 32, 64,  0,  0,  0,  0,  0 }},
82    {{ 24, 16, 48,  0,  0,  0,  0,  0 }},
83    {{ 24, 16,  0, 16, 32,  0,  0,  0 }},
84    {{ 24, 16,  0, 32, 16,  0,  0,  0 }},
85    {{ 0 }}
86 };
87 
88 /**
89  * CHV/SKL validated L3 configurations.  \sa ivb_l3_configs.
90  */
91 static const struct gen_l3_config chv_l3_configs[] = {
92    /* SLM URB ALL DC  RO  IS   C   T */
93    {{  0, 48, 48,  0,  0,  0,  0,  0 }},
94    {{  0, 48,  0, 16, 32,  0,  0,  0 }},
95    {{  0, 32,  0, 16, 48,  0,  0,  0 }},
96    {{  0, 32,  0,  0, 64,  0,  0,  0 }},
97    {{  0, 32, 64,  0,  0,  0,  0,  0 }},
98    {{ 32, 16, 48,  0,  0,  0,  0,  0 }},
99    {{ 32, 16,  0, 16, 32,  0,  0,  0 }},
100    {{ 32, 16,  0, 32, 16,  0,  0,  0 }},
101    {{ 0 }}
102 };
103 
104 /**
105  * BXT 2x6 validated L3 configurations.  \sa ivb_l3_configs.
106  */
107 static const struct gen_l3_config bxt_2x6_l3_configs[] = {
108    /* SLM URB ALL DC  RO  IS   C   T */
109    {{  0, 32, 48,  0,  0,  0,  0,  0 }},
110    {{  0, 32,  0,  8, 40,  0,  0,  0 }},
111    {{  0, 32,  0, 32, 16,  0,  0,  0 }},
112    {{ 16, 16, 48,  0,  0,  0,  0,  0 }},
113    {{ 16, 16,  0, 40,  8,  0,  0,  0 }},
114    {{ 16, 16,  0, 16, 32,  0,  0,  0 }},
115    {{ 0 }}
116 };
117 
118 /**
119  * CNL validated L3 configurations.  \sa ivb_l3_configs.
120  */
121 static const struct gen_l3_config cnl_l3_configs[] = {
122    /* SLM URB ALL DC  RO  IS   C   T */
123    {{  0, 64, 64,  0,  0,  0,  0,  0 }},
124    {{  0, 64,  0, 16, 48,  0,  0,  0 }},
125    {{  0, 48,  0, 16, 64,  0,  0,  0 }},
126    {{  0, 32,  0,  0, 96,  0,  0,  0 }},
127    {{  0, 32, 96,  0,  0,  0,  0,  0 }},
128    {{  0, 32,  0, 16, 80,  0,  0,  0 }},
129    {{ 32, 16, 80,  0,  0,  0,  0,  0 }},
130    {{ 32, 16,  0, 64, 16,  0,  0,  0 }},
131    {{ 32,  0, 96,  0,  0,  0,  0,  0 }},
132    {{ 0 }}
133 };
134 
135 /**
136  * Return a zero-terminated array of validated L3 configurations for the
137  * specified device.
138  */
139 static const struct gen_l3_config *
get_l3_configs(const struct gen_device_info * devinfo)140 get_l3_configs(const struct gen_device_info *devinfo)
141 {
142    switch (devinfo->gen) {
143    case 7:
144       return (devinfo->is_baytrail ? vlv_l3_configs : ivb_l3_configs);
145 
146    case 8:
147       return (devinfo->is_cherryview ? chv_l3_configs : bdw_l3_configs);
148 
149    case 9:
150       if (devinfo->l3_banks == 1)
151          return bxt_2x6_l3_configs;
152       return chv_l3_configs;
153 
154    case 10:
155       return cnl_l3_configs;
156 
157    default:
158       unreachable("Not implemented");
159    }
160 }
161 
162 /**
163  * L1-normalize a vector of L3 partition weights.
164  */
165 static struct gen_l3_weights
norm_l3_weights(struct gen_l3_weights w)166 norm_l3_weights(struct gen_l3_weights w)
167 {
168    float sz = 0;
169 
170    for (unsigned i = 0; i < GEN_NUM_L3P; i++)
171       sz += w.w[i];
172 
173    for (unsigned i = 0; i < GEN_NUM_L3P; i++)
174       w.w[i] /= sz;
175 
176    return w;
177 }
178 
179 /**
180  * Get the relative partition weights of the specified L3 configuration.
181  */
182 struct gen_l3_weights
gen_get_l3_config_weights(const struct gen_l3_config * cfg)183 gen_get_l3_config_weights(const struct gen_l3_config *cfg)
184 {
185    if (cfg) {
186       struct gen_l3_weights w;
187 
188       for (unsigned i = 0; i < GEN_NUM_L3P; i++)
189          w.w[i] = cfg->n[i];
190 
191       return norm_l3_weights(w);
192    } else {
193       const struct gen_l3_weights w = { { 0 } };
194       return w;
195    }
196 }
197 
198 /**
199  * Distance between two L3 configurations represented as vectors of weights.
200  * Usually just the L1 metric except when the two configurations are
201  * considered incompatible in which case the distance will be infinite.  Note
202  * that the compatibility condition is asymmetric -- They will be considered
203  * incompatible whenever the reference configuration \p w0 requires SLM, DC,
204  * or URB but \p w1 doesn't provide it.
205  */
206 float
gen_diff_l3_weights(struct gen_l3_weights w0,struct gen_l3_weights w1)207 gen_diff_l3_weights(struct gen_l3_weights w0, struct gen_l3_weights w1)
208 {
209    if ((w0.w[GEN_L3P_SLM] && !w1.w[GEN_L3P_SLM]) ||
210        (w0.w[GEN_L3P_DC] && !w1.w[GEN_L3P_DC] && !w1.w[GEN_L3P_ALL]) ||
211        (w0.w[GEN_L3P_URB] && !w1.w[GEN_L3P_URB])) {
212       return HUGE_VALF;
213 
214    } else {
215       float dw = 0;
216 
217       for (unsigned i = 0; i < GEN_NUM_L3P; i++)
218          dw += fabs(w0.w[i] - w1.w[i]);
219 
220       return dw;
221    }
222 }
223 
224 /**
225  * Return a reasonable default L3 configuration for the specified device based
226  * on whether SLM and DC are required.  In the non-SLM non-DC case the result
227  * is intended to approximately resemble the hardware defaults.
228  */
229 struct gen_l3_weights
gen_get_default_l3_weights(const struct gen_device_info * devinfo,bool needs_dc,bool needs_slm)230 gen_get_default_l3_weights(const struct gen_device_info *devinfo,
231                            bool needs_dc, bool needs_slm)
232 {
233    struct gen_l3_weights w = {{ 0 }};
234 
235    w.w[GEN_L3P_SLM] = needs_slm;
236    w.w[GEN_L3P_URB] = 1.0;
237 
238    if (devinfo->gen >= 8) {
239       w.w[GEN_L3P_ALL] = 1.0;
240    } else {
241       w.w[GEN_L3P_DC] = needs_dc ? 0.1 : 0;
242       w.w[GEN_L3P_RO] = devinfo->is_baytrail ? 0.5 : 1.0;
243    }
244 
245    return norm_l3_weights(w);
246 }
247 
248 /**
249  * Get the default L3 configuration
250  */
251 const struct gen_l3_config *
gen_get_default_l3_config(const struct gen_device_info * devinfo)252 gen_get_default_l3_config(const struct gen_device_info *devinfo)
253 {
254    /* For efficiency assume that the first entry of the array matches the
255     * default configuration.
256     */
257    const struct gen_l3_config *const cfg = get_l3_configs(devinfo);
258    assert(cfg == gen_get_l3_config(devinfo,
259                     gen_get_default_l3_weights(devinfo, false, false)));
260    return cfg;
261 }
262 
263 /**
264  * Return the closest validated L3 configuration for the specified device and
265  * weight vector.
266  */
267 const struct gen_l3_config *
gen_get_l3_config(const struct gen_device_info * devinfo,struct gen_l3_weights w0)268 gen_get_l3_config(const struct gen_device_info *devinfo,
269                   struct gen_l3_weights w0)
270 {
271    const struct gen_l3_config *const cfgs = get_l3_configs(devinfo);
272    const struct gen_l3_config *cfg_best = NULL;
273    float dw_best = HUGE_VALF;
274 
275    for (const struct gen_l3_config *cfg = cfgs; cfg->n[GEN_L3P_URB]; cfg++) {
276       const float dw = gen_diff_l3_weights(w0, gen_get_l3_config_weights(cfg));
277 
278       if (dw < dw_best) {
279          cfg_best = cfg;
280          dw_best = dw;
281       }
282    }
283 
284    return cfg_best;
285 }
286 
287 /**
288  * Return the size of an L3 way in KB.
289  */
290 static unsigned
get_l3_way_size(const struct gen_device_info * devinfo)291 get_l3_way_size(const struct gen_device_info *devinfo)
292 {
293    const unsigned way_size_per_bank =
294       devinfo->gen >= 9 && devinfo->l3_banks == 1 ? 4 : 2;
295 
296    assert(devinfo->l3_banks);
297    return way_size_per_bank * devinfo->l3_banks;
298 }
299 
300 /**
301  * Return the unit brw_context::urb::size is expressed in, in KB.  \sa
302  * gen_device_info::urb::size.
303  */
304 static unsigned
get_urb_size_scale(const struct gen_device_info * devinfo)305 get_urb_size_scale(const struct gen_device_info *devinfo)
306 {
307    return (devinfo->gen >= 8 ? devinfo->num_slices : 1);
308 }
309 
310 unsigned
gen_get_l3_config_urb_size(const struct gen_device_info * devinfo,const struct gen_l3_config * cfg)311 gen_get_l3_config_urb_size(const struct gen_device_info *devinfo,
312                            const struct gen_l3_config *cfg)
313 {
314    /* From the SKL "L3 Allocation and Programming" documentation:
315     *
316     * "URB is limited to 1008KB due to programming restrictions.  This is not
317     * a restriction of the L3 implementation, but of the FF and other clients.
318     * Therefore, in a GT4 implementation it is possible for the programmed
319     * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
320     * only 1008KB of this will be used."
321     */
322    const unsigned max = (devinfo->gen == 9 ? 1008 : ~0);
323    return MIN2(max, cfg->n[GEN_L3P_URB] * get_l3_way_size(devinfo)) /
324           get_urb_size_scale(devinfo);
325 }
326 
327 /**
328  * Print out the specified L3 configuration.
329  */
330 void
gen_dump_l3_config(const struct gen_l3_config * cfg,FILE * fp)331 gen_dump_l3_config(const struct gen_l3_config *cfg, FILE *fp)
332 {
333    fprintf(stderr, "SLM=%d URB=%d ALL=%d DC=%d RO=%d IS=%d C=%d T=%d\n",
334            cfg->n[GEN_L3P_SLM], cfg->n[GEN_L3P_URB], cfg->n[GEN_L3P_ALL],
335            cfg->n[GEN_L3P_DC], cfg->n[GEN_L3P_RO],
336            cfg->n[GEN_L3P_IS], cfg->n[GEN_L3P_C], cfg->n[GEN_L3P_T]);
337 }
338