1 /*
2  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3  * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "r300_context.h"
25 #include "r300_reg.h"
26 #include "r300_fs.h"
27 
28 #include "util/u_format.h"
29 #include "util/u_mm.h"
30 
31 /*
32   HiZ rules - taken from various docs
33    1. HiZ only works on depth values
34    2. Cannot HiZ if stencil fail or zfail is !KEEP
35    3. on R300/400, HiZ is disabled if depth test is EQUAL
36    4. comparison changes without clears usually mean disabling HiZ
37 */
38 /*****************************************************************************/
39 /* The HyperZ setup                                                          */
40 /*****************************************************************************/
41 
r300_get_hiz_func(struct r300_context * r300)42 static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
43 {
44     struct r300_dsa_state *dsa = r300->dsa_state.state;
45 
46     switch (dsa->dsa.depth.func) {
47     case PIPE_FUNC_NEVER:
48     case PIPE_FUNC_EQUAL:
49     case PIPE_FUNC_NOTEQUAL:
50     case PIPE_FUNC_ALWAYS:
51     default:
52         /* Guess MAX for uncertain cases. */
53     case PIPE_FUNC_LESS:
54     case PIPE_FUNC_LEQUAL:
55         return HIZ_FUNC_MAX;
56 
57     case PIPE_FUNC_GREATER:
58     case PIPE_FUNC_GEQUAL:
59         return HIZ_FUNC_MIN;
60     }
61 }
62 
63 /* Return what's used for the depth test (either minimum or maximum). */
r300_get_sc_hz_max(struct r300_context * r300)64 static unsigned r300_get_sc_hz_max(struct r300_context *r300)
65 {
66     struct r300_dsa_state *dsa = r300->dsa_state.state;
67     unsigned func = dsa->dsa.depth.func;
68 
69     return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
70 }
71 
r300_is_hiz_func_valid(struct r300_context * r300)72 static boolean r300_is_hiz_func_valid(struct r300_context *r300)
73 {
74     struct r300_dsa_state *dsa = r300->dsa_state.state;
75     unsigned func = dsa->dsa.depth.func;
76 
77     if (r300->hiz_func == HIZ_FUNC_NONE)
78         return TRUE;
79 
80     /* func1 is less/lessthan */
81     if (r300->hiz_func == HIZ_FUNC_MAX &&
82         (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
83         return FALSE;
84 
85     /* func1 is greater/greaterthan */
86     if (r300->hiz_func == HIZ_FUNC_MIN &&
87         (func == PIPE_FUNC_LESS   || func == PIPE_FUNC_LEQUAL))
88         return FALSE;
89 
90     return TRUE;
91 }
92 
r300_dsa_stencil_op_not_keep(struct pipe_stencil_state * s)93 static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
94 {
95     return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
96                           s->zfail_op != PIPE_STENCIL_OP_KEEP);
97 }
98 
r300_hiz_allowed(struct r300_context * r300)99 static boolean r300_hiz_allowed(struct r300_context *r300)
100 {
101     struct r300_dsa_state *dsa = r300->dsa_state.state;
102     struct r300_screen *r300screen = r300->screen;
103 
104     if (r300_fragment_shader_writes_depth(r300_fs(r300)))
105         return FALSE;
106 
107     if (r300->query_current)
108         return FALSE;
109 
110     /* If the depth function is inverted, HiZ must be disabled. */
111     if (!r300_is_hiz_func_valid(r300))
112         return FALSE;
113 
114     /* if stencil fail/zfail op is not KEEP */
115     if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
116         r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
117         return FALSE;
118 
119     if (dsa->dsa.depth.enabled) {
120         /* if depth func is EQUAL pre-r500 */
121         if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
122             return FALSE;
123 
124         /* if depth func is NOTEQUAL */
125         if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL)
126             return FALSE;
127     }
128     return TRUE;
129 }
130 
r300_update_hyperz(struct r300_context * r300)131 static void r300_update_hyperz(struct r300_context* r300)
132 {
133     struct r300_hyperz_state *z =
134         (struct r300_hyperz_state*)r300->hyperz_state.state;
135     struct pipe_framebuffer_state *fb =
136         (struct pipe_framebuffer_state*)r300->fb_state.state;
137     struct r300_dsa_state *dsa = r300->dsa_state.state;
138     struct r300_resource *zstex =
139             fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
140 
141     z->gb_z_peq_config = 0;
142     z->zb_bw_cntl = 0;
143     z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
144     z->flush = 0;
145 
146     if (r300->cbzb_clear) {
147         z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
148         return;
149     }
150 
151     if (!zstex || !r300->hyperz_enabled)
152         return;
153 
154     /* Set the size of ZMASK tiles. */
155     if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
156         z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
157     }
158 
159     /* R500-specific features and optimizations. */
160     if (r300->screen->caps.is_r500) {
161         z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
162                          R500_COVERED_PTR_MASKING_ENABLE;
163     }
164 
165     /* Setup decompression if needed. No other HyperZ setting is required. */
166     if (r300->zmask_decompress) {
167         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
168                          R300_RD_COMP_ENABLE;
169         return;
170     }
171 
172     /* Do not set anything if depth and stencil tests are off. */
173     if (!dsa->dsa.depth.enabled &&
174         !dsa->dsa.stencil[0].enabled &&
175         !dsa->dsa.stencil[1].enabled) {
176         assert(!dsa->dsa.depth.writemask);
177         return;
178     }
179 
180     /* Zbuffer compression. */
181     if (r300->zmask_in_use && !r300->locked_zbuffer) {
182         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
183                          R300_RD_COMP_ENABLE |
184                          R300_WR_COMP_ENABLE;
185     }
186 
187     /* HiZ. */
188     if (r300->hiz_in_use && !r300->locked_zbuffer) {
189         /* HiZ cannot be used under some circumstances. */
190         if (!r300_hiz_allowed(r300)) {
191             /* If writemask is disabled, the HiZ memory will not be changed,
192              * so we can keep its content for later. */
193             if (dsa->dsa.depth.writemask) {
194                 r300->hiz_in_use = FALSE;
195             }
196             return;
197         }
198         DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth.func);
199 
200         /* Set the HiZ function if needed. */
201         if (r300->hiz_func == HIZ_FUNC_NONE) {
202             r300->hiz_func = r300_get_hiz_func(r300);
203         }
204 
205         /* Setup the HiZ bits. */
206         z->zb_bw_cntl |= R300_HIZ_ENABLE |
207                 (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
208 
209         z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
210                         r300_get_sc_hz_max(r300);
211 
212         if (r300->screen->caps.is_r500) {
213             z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
214         }
215     }
216 }
217 
218 /*****************************************************************************/
219 /* The ZTOP state                                                            */
220 /*****************************************************************************/
221 
r300_dsa_writes_stencil(struct pipe_stencil_state * s)222 static boolean r300_dsa_writes_stencil(
223         struct pipe_stencil_state *s)
224 {
225     return s->enabled && s->writemask &&
226            (s->fail_op  != PIPE_STENCIL_OP_KEEP ||
227             s->zfail_op != PIPE_STENCIL_OP_KEEP ||
228             s->zpass_op != PIPE_STENCIL_OP_KEEP);
229 }
230 
r300_dsa_writes_depth_stencil(struct pipe_depth_stencil_alpha_state * dsa)231 static boolean r300_dsa_writes_depth_stencil(
232         struct pipe_depth_stencil_alpha_state *dsa)
233 {
234     /* We are interested only in the cases when a depth or stencil value
235      * can be changed. */
236 
237     if (dsa->depth.enabled && dsa->depth.writemask &&
238         dsa->depth.func != PIPE_FUNC_NEVER)
239         return TRUE;
240 
241     if (r300_dsa_writes_stencil(&dsa->stencil[0]) ||
242         r300_dsa_writes_stencil(&dsa->stencil[1]))
243         return TRUE;
244 
245     return FALSE;
246 }
247 
r300_dsa_alpha_test_enabled(struct pipe_depth_stencil_alpha_state * dsa)248 static boolean r300_dsa_alpha_test_enabled(
249         struct pipe_depth_stencil_alpha_state *dsa)
250 {
251     /* We are interested only in the cases when alpha testing can kill
252      * a fragment. */
253 
254     return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS;
255 }
256 
r300_update_ztop(struct r300_context * r300)257 static void r300_update_ztop(struct r300_context* r300)
258 {
259     struct r300_ztop_state* ztop_state =
260         (struct r300_ztop_state*)r300->ztop_state.state;
261     uint32_t old_ztop = ztop_state->z_buffer_top;
262 
263     /* This is important enough that I felt it warranted a comment.
264      *
265      * According to the docs, these are the conditions where ZTOP must be
266      * disabled:
267      * 1) Alpha testing enabled
268      * 2) Texture kill instructions in fragment shader
269      * 3) Chroma key culling enabled
270      * 4) W-buffering enabled
271      *
272      * The docs claim that for the first three cases, if no ZS writes happen,
273      * then ZTOP can be used.
274      *
275      * (3) will never apply since we do not support chroma-keyed operations.
276      * (4) will need to be re-examined (and this comment updated) if/when
277      * Hyper-Z becomes supported.
278      *
279      * Additionally, the following conditions require disabled ZTOP:
280      * 5) Depth writes in fragment shader
281      * 6) Outstanding occlusion queries
282      *
283      * This register causes stalls all the way from SC to CB when changed,
284      * but it is buffered on-chip so it does not hurt to write it if it has
285      * not changed.
286      *
287      * ~C.
288      */
289 
290     /* ZS writes */
291     if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
292            (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||  /* (1) */
293             r300_fs(r300)->shader->info.uses_kill)) {              /* (2) */
294         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
295     } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
296         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
297     } else if (r300->query_current) {                              /* (6) */
298         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
299     } else {
300         ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
301     }
302     if (ztop_state->z_buffer_top != old_ztop)
303         r300_mark_atom_dirty(r300, &r300->ztop_state);
304 }
305 
r300_update_hyperz_state(struct r300_context * r300)306 void r300_update_hyperz_state(struct r300_context* r300)
307 {
308     r300_update_ztop(r300);
309 
310     if (r300->hyperz_state.dirty) {
311         r300_update_hyperz(r300);
312     }
313 }
314