1 /*
2 * Copyright © 2011 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "intel_batchbuffer.h"
25 #include "intel_mipmap_tree.h"
26 #include "intel_regions.h"
27 #include "intel_fbo.h"
28 #include "brw_context.h"
29 #include "brw_state.h"
30 #include "brw_defines.h"
31
emit_depthbuffer(struct brw_context * brw)32 static void emit_depthbuffer(struct brw_context *brw)
33 {
34 struct intel_context *intel = &brw->intel;
35 struct gl_context *ctx = &intel->ctx;
36 struct gl_framebuffer *fb = ctx->DrawBuffer;
37
38 /* _NEW_BUFFERS */
39 struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
40 struct intel_renderbuffer *srb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
41 struct intel_mipmap_tree *depth_mt = NULL,
42 *stencil_mt = NULL,
43 *hiz_mt = NULL;
44
45 /* Amount by which drawing should be offset in order to draw to the
46 * appropriate miplevel/zoffset/cubeface. We will extract these values
47 * from depth_irb or stencil_irb once we determine which is present.
48 */
49 uint32_t draw_x = 0, draw_y = 0;
50
51 /* Masks used to determine how much of the draw_x and draw_y offsets should
52 * be performed using the fine adjustment of "depth coordinate offset X/Y"
53 * (dw5 of 3DSTATE_DEPTH_BUFFER). Any remaining coarse adjustment will be
54 * performed by changing the base addresses of the buffers.
55 *
56 * Since the HiZ, depth, and stencil buffers all use the same "depth
57 * coordinate offset X/Y" values, we need to make sure that the coarse
58 * adjustment will be possible to apply to all three buffers. Since coarse
59 * adjustment can only be applied in multiples of the tile size, we will OR
60 * together the tile masks of all the buffers to determine which offsets to
61 * perform as fine adjustments.
62 */
63 uint32_t tile_mask_x = 0, tile_mask_y = 0;
64
65 if (drb)
66 depth_mt = drb->mt;
67
68 if (depth_mt) {
69 hiz_mt = depth_mt->hiz_mt;
70
71 intel_region_get_tile_masks(depth_mt->region,
72 &tile_mask_x, &tile_mask_y, false);
73
74 if (hiz_mt) {
75 uint32_t hiz_tile_mask_x, hiz_tile_mask_y;
76 intel_region_get_tile_masks(hiz_mt->region,
77 &hiz_tile_mask_x, &hiz_tile_mask_y,
78 false);
79
80 /* Each HiZ row represents 2 rows of pixels */
81 hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1;
82
83 tile_mask_x |= hiz_tile_mask_x;
84 tile_mask_y |= hiz_tile_mask_y;
85 }
86 }
87
88 if (srb) {
89 stencil_mt = srb->mt;
90 if (stencil_mt->stencil_mt)
91 stencil_mt = stencil_mt->stencil_mt;
92
93 assert(stencil_mt->format == MESA_FORMAT_S8);
94
95 /* Stencil buffer uses 64x64 tiles. */
96 tile_mask_x |= 63;
97 tile_mask_y |= 63;
98 }
99
100 /* Gen7 doesn't support packed depth/stencil */
101 assert(stencil_mt == NULL || depth_mt != stencil_mt);
102 assert(!depth_mt || !_mesa_is_format_packed_depth_stencil(depth_mt->format));
103
104 intel_emit_depth_stall_flushes(intel);
105
106 if (depth_mt == NULL) {
107 uint32_t dw1 = BRW_DEPTHFORMAT_D32_FLOAT << 18;
108 uint32_t dw3 = 0;
109 uint32_t tile_x = 0, tile_y = 0;
110
111 if (stencil_mt == NULL) {
112 dw1 |= (BRW_SURFACE_NULL << 29);
113 } else {
114 /* _NEW_STENCIL: enable stencil buffer writes */
115 dw1 |= ((ctx->Stencil.WriteMask != 0) << 27);
116
117 draw_x = srb->draw_x;
118 draw_y = srb->draw_y;
119 tile_x = draw_x & tile_mask_x;
120 tile_y = draw_y & tile_mask_y;
121
122 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
123 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
124 * Coordinate Offset X/Y":
125 *
126 * "The 3 LSBs of both offsets must be zero to ensure correct
127 * alignment"
128 *
129 * We have no guarantee that tile_x and tile_y are correctly aligned,
130 * since they are determined by the mipmap layout, which is only
131 * aligned to multiples of 4.
132 *
133 * So, to avoid hanging the GPU, just smash the low order 3 bits of
134 * tile_x and tile_y to 0. This is a temporary workaround until we
135 * come up with a better solution.
136 */
137 tile_x &= ~7;
138 tile_y &= ~7;
139
140 /* 3DSTATE_STENCIL_BUFFER inherits surface type and dimensions. */
141 dw1 |= (BRW_SURFACE_2D << 29);
142 dw3 = ((srb->Base.Base.Width + tile_x - 1) << 4) |
143 ((srb->Base.Base.Height + tile_y - 1) << 18);
144 }
145
146 BEGIN_BATCH(7);
147 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
148 OUT_BATCH(dw1);
149 OUT_BATCH(0);
150 OUT_BATCH(dw3);
151 OUT_BATCH(0);
152 OUT_BATCH(tile_x | (tile_y << 16));
153 OUT_BATCH(0);
154 ADVANCE_BATCH();
155 } else {
156 struct intel_region *region = depth_mt->region;
157 uint32_t tile_x, tile_y, offset;
158
159 draw_x = drb->draw_x;
160 draw_y = drb->draw_y;
161 tile_x = draw_x & tile_mask_x;
162 tile_y = draw_y & tile_mask_y;
163
164 /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
165 * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
166 * Coordinate Offset X/Y":
167 *
168 * "The 3 LSBs of both offsets must be zero to ensure correct
169 * alignment"
170 *
171 * We have no guarantee that tile_x and tile_y are correctly aligned,
172 * since they are determined by the mipmap layout, which is only aligned
173 * to multiples of 4.
174 *
175 * So, to avoid hanging the GPU, just smash the low order 3 bits of
176 * tile_x and tile_y to 0. This is a temporary workaround until we come
177 * up with a better solution.
178 */
179 tile_x &= ~7;
180 tile_y &= ~7;
181
182 offset = intel_region_get_aligned_offset(region,
183 draw_x & ~tile_mask_x,
184 draw_y & ~tile_mask_y,
185 false);
186
187 assert(region->tiling == I915_TILING_Y);
188
189 /* _NEW_DEPTH, _NEW_STENCIL */
190 BEGIN_BATCH(7);
191 OUT_BATCH(GEN7_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
192 OUT_BATCH(((region->pitch * region->cpp) - 1) |
193 (brw_depthbuffer_format(brw) << 18) |
194 ((hiz_mt ? 1 : 0) << 22) | /* hiz enable */
195 ((stencil_mt != NULL && ctx->Stencil.WriteMask != 0) << 27) |
196 ((ctx->Depth.Mask != 0) << 28) |
197 (BRW_SURFACE_2D << 29));
198 OUT_RELOC(region->bo,
199 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
200 offset);
201 OUT_BATCH((((drb->Base.Base.Width + tile_x) - 1) << 4) |
202 (((drb->Base.Base.Height + tile_y) - 1) << 18));
203 OUT_BATCH(0);
204 OUT_BATCH(tile_x | (tile_y << 16));
205 OUT_BATCH(0);
206 ADVANCE_BATCH();
207 }
208
209 if (hiz_mt == NULL) {
210 BEGIN_BATCH(3);
211 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
212 OUT_BATCH(0);
213 OUT_BATCH(0);
214 ADVANCE_BATCH();
215 } else {
216 uint32_t hiz_offset =
217 intel_region_get_aligned_offset(hiz_mt->region,
218 draw_x & ~tile_mask_x,
219 (draw_y & ~tile_mask_y) / 2,
220 false);
221 BEGIN_BATCH(3);
222 OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
223 OUT_BATCH(hiz_mt->region->pitch * hiz_mt->region->cpp - 1);
224 OUT_RELOC(hiz_mt->region->bo,
225 I915_GEM_DOMAIN_RENDER,
226 I915_GEM_DOMAIN_RENDER,
227 hiz_offset);
228 ADVANCE_BATCH();
229 }
230
231 if (stencil_mt == NULL) {
232 BEGIN_BATCH(3);
233 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
234 OUT_BATCH(0);
235 OUT_BATCH(0);
236 ADVANCE_BATCH();
237 } else {
238 const int enabled = intel->is_haswell ? HSW_STENCIL_ENABLED : 0;
239
240 /* Note: We can't compute the stencil offset using
241 * intel_region_get_aligned_offset(), because the stencil region claims
242 * that the region is untiled; in fact it's W tiled.
243 */
244 uint32_t stencil_offset =
245 (draw_y & ~tile_mask_y) * stencil_mt->region->pitch +
246 (draw_x & ~tile_mask_x) * 64;
247
248 BEGIN_BATCH(3);
249 OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
250 /* The stencil buffer has quirky pitch requirements. From the Graphics
251 * BSpec: vol2a.11 3D Pipeline Windower > Early Depth/Stencil Processing
252 * > Depth/Stencil Buffer State > 3DSTATE_STENCIL_BUFFER [DevIVB+],
253 * field "Surface Pitch":
254 *
255 * The pitch must be set to 2x the value computed based on width, as
256 * the stencil buffer is stored with two rows interleaved.
257 *
258 * (Note that it is not 100% clear whether this intended to apply to
259 * Gen7; the BSpec flags this comment as "DevILK,DevSNB" (which would
260 * imply that it doesn't), however the comment appears on a "DevIVB+"
261 * page (which would imply that it does). Experiments with the hardware
262 * indicate that it does.
263 */
264 OUT_BATCH(enabled |
265 (2 * stencil_mt->region->pitch * stencil_mt->region->cpp - 1));
266 OUT_RELOC(stencil_mt->region->bo,
267 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
268 stencil_offset);
269 ADVANCE_BATCH();
270 }
271
272 BEGIN_BATCH(3);
273 OUT_BATCH(GEN7_3DSTATE_CLEAR_PARAMS << 16 | (3 - 2));
274 OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0);
275 OUT_BATCH(1);
276 ADVANCE_BATCH();
277 }
278
279 /**
280 * \see brw_context.state.depth_region
281 */
282 const struct brw_tracked_state gen7_depthbuffer = {
283 .dirty = {
284 .mesa = (_NEW_BUFFERS | _NEW_DEPTH | _NEW_STENCIL),
285 .brw = BRW_NEW_BATCH,
286 .cache = 0,
287 },
288 .emit = emit_depthbuffer,
289 };
290