1 /*
2  * Copyright (c) 2011 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <stdlib.h>
25 #include <math.h>
26 
27 #include "util/macros.h"
28 #include "main/macros.h"
29 
30 #include "gen_l3_config.h"
31 
32 /**
33  * The following diagram shows how we partition the URB:
34  *
35  *        16kb or 32kb               Rest of the URB space
36  *   __________-__________   _________________-_________________
37  *  /                     \ /                                   \
38  * +-------------------------------------------------------------+
39  * |  VS/HS/DS/GS/FS Push  |           VS/HS/DS/GS URB           |
40  * |       Constants       |               Entries               |
41  * +-------------------------------------------------------------+
42  *
43  * Push constants must be stored at the beginning of the URB space,
44  * while URB entries can be stored anywhere.  We choose to lay them
45  * out in pipeline order (VS -> HS -> DS -> GS).
46  */
47 
48 /**
49  * Decide how to partition the URB among the various stages.
50  *
51  * \param[in] push_constant_bytes - space allocate for push constants.
52  * \param[in] urb_size_bytes - total size of the URB (from L3 config).
53  * \param[in] tess_present - are tessellation shaders active?
54  * \param[in] gs_present - are geometry shaders active?
55  * \param[in] entry_size - the URB entry size (from the shader compiler)
56  * \param[out] entries - the number of URB entries for each stage
57  * \param[out] start - the starting offset for each stage
58  */
59 void
gen_get_urb_config(const struct gen_device_info * devinfo,unsigned push_constant_bytes,unsigned urb_size_bytes,bool tess_present,bool gs_present,const unsigned entry_size[4],unsigned entries[4],unsigned start[4])60 gen_get_urb_config(const struct gen_device_info *devinfo,
61                    unsigned push_constant_bytes, unsigned urb_size_bytes,
62                    bool tess_present, bool gs_present,
63                    const unsigned entry_size[4],
64                    unsigned entries[4], unsigned start[4])
65 {
66    const bool active[4] = { true, tess_present, tess_present, gs_present };
67 
68    /* URB allocations must be done in 8k chunks. */
69    const unsigned chunk_size_bytes = 8192;
70 
71    const unsigned push_constant_chunks =
72       push_constant_bytes / chunk_size_bytes;
73    const unsigned urb_chunks = urb_size_bytes / chunk_size_bytes;
74 
75    /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
76     *
77     *     VS Number of URB Entries must be divisible by 8 if the VS URB Entry
78     *     Allocation Size is less than 9 512-bit URB entries.
79     *
80     * Similar text exists for HS, DS and GS.
81     */
82    unsigned granularity[4];
83    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
84       granularity[i] = (entry_size[i] < 9) ? 8 : 1;
85    }
86 
87    unsigned min_entries[4] = {
88       /* VS has a lower limit on the number of URB entries.
89        *
90        * From the Broadwell PRM, 3DSTATE_URB_VS instruction:
91        * "When tessellation is enabled, the VS Number of URB Entries must be
92        *  greater than or equal to 192."
93        */
94       [MESA_SHADER_VERTEX] = tess_present && devinfo->gen == 8 ?
95          192 : devinfo->urb.min_entries[MESA_SHADER_VERTEX],
96 
97       /* There are two constraints on the minimum amount of URB space we can
98        * allocate:
99        *
100        * (1) We need room for at least 2 URB entries, since we always operate
101        * the GS in DUAL_OBJECT mode.
102        *
103        * (2) We can't allocate less than nr_gs_entries_granularity.
104        */
105       [MESA_SHADER_GEOMETRY] = gs_present ? 2 : 0,
106 
107       [MESA_SHADER_TESS_CTRL] = tess_present ? 1 : 0,
108 
109       [MESA_SHADER_TESS_EVAL] = tess_present ?
110          devinfo->urb.min_entries[MESA_SHADER_TESS_EVAL] : 0,
111    };
112 
113    /* Min VS Entries isn't a multiple of 8 on Cherryview/Broxton; round up.
114     * Round them all up.
115     */
116    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
117       min_entries[i] = ALIGN(min_entries[i], granularity[i]);
118    }
119 
120    unsigned entry_size_bytes[4];
121    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
122       entry_size_bytes[i] = 64 * entry_size[i];
123    }
124 
125    /* Initially, assign each stage the minimum amount of URB space it needs,
126     * and make a note of how much additional space it "wants" (the amount of
127     * additional space it could actually make use of).
128     */
129    unsigned chunks[4];
130    unsigned wants[4];
131    unsigned total_needs = push_constant_chunks;
132    unsigned total_wants = 0;
133 
134    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
135       if (active[i]) {
136          chunks[i] = DIV_ROUND_UP(min_entries[i] * entry_size_bytes[i],
137                                   chunk_size_bytes);
138 
139          wants[i] =
140             DIV_ROUND_UP(devinfo->urb.max_entries[i] * entry_size_bytes[i],
141                          chunk_size_bytes) - chunks[i];
142       } else {
143          chunks[i] = 0;
144          wants[i] = 0;
145       }
146 
147       total_needs += chunks[i];
148       total_wants += wants[i];
149    }
150 
151    assert(total_needs <= urb_chunks);
152 
153    /* Mete out remaining space (if any) in proportion to "wants". */
154    unsigned remaining_space = MIN2(urb_chunks - total_needs, total_wants);
155 
156    if (remaining_space > 0) {
157       for (int i = MESA_SHADER_VERTEX;
158            total_wants > 0 && i <= MESA_SHADER_TESS_EVAL; i++) {
159          unsigned additional = (unsigned)
160             roundf(wants[i] * (((float) remaining_space) / total_wants));
161          chunks[i] += additional;
162          remaining_space -= additional;
163          total_wants -= wants[i];
164       }
165 
166       chunks[MESA_SHADER_GEOMETRY] += remaining_space;
167    }
168 
169    /* Sanity check that we haven't over-allocated. */
170    unsigned total_chunks = push_constant_chunks;
171    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
172       total_chunks += chunks[i];
173    }
174    assert(total_chunks <= urb_chunks);
175 
176    /* Finally, compute the number of entries that can fit in the space
177     * allocated to each stage.
178     */
179    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
180       entries[i] = chunks[i] * chunk_size_bytes / entry_size_bytes[i];
181 
182       /* Since we rounded up when computing wants[], this may be slightly
183        * more than the maximum allowed amount, so correct for that.
184        */
185       entries[i] = MIN2(entries[i], devinfo->urb.max_entries[i]);
186 
187       /* Ensure that we program a multiple of the granularity. */
188       entries[i] = ROUND_DOWN_TO(entries[i], granularity[i]);
189 
190       /* Finally, sanity check to make sure we have at least the minimum
191        * number of entries needed for each stage.
192        */
193       assert(entries[i] >= min_entries[i]);
194    }
195 
196    /* Lay out the URB in pipeline order: push constants, VS, HS, DS, GS. */
197    start[0] = push_constant_chunks;
198    for (int i = MESA_SHADER_TESS_CTRL; i <= MESA_SHADER_GEOMETRY; i++) {
199       start[i] = start[i - 1] + chunks[i - 1];
200    }
201 }
202