1 /*
2  * Copyright 2008 Corbin Simpson <MostAwesomeDude@gmail.com>
3  * Copyright 2009 Marek Olšák <maraeo@gmail.com>
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
23 
24 #include "r300_context.h"
25 #include "r300_reg.h"
26 #include "r300_fs.h"
27 
28 #include "util/u_format.h"
29 
30 /*
31   HiZ rules - taken from various docs
32    1. HiZ only works on depth values
33    2. Cannot HiZ if stencil fail or zfail is !KEEP
34    3. on R300/400, HiZ is disabled if depth test is EQUAL
35    4. comparison changes without clears usually mean disabling HiZ
36 */
37 /*****************************************************************************/
38 /* The HyperZ setup                                                          */
39 /*****************************************************************************/
40 
r300_get_hiz_func(struct r300_context * r300)41 static enum r300_hiz_func r300_get_hiz_func(struct r300_context *r300)
42 {
43     struct r300_dsa_state *dsa = r300->dsa_state.state;
44 
45     switch (dsa->dsa.depth.func) {
46     case PIPE_FUNC_NEVER:
47     case PIPE_FUNC_EQUAL:
48     case PIPE_FUNC_NOTEQUAL:
49     case PIPE_FUNC_ALWAYS:
50     default:
51         /* Guess MAX for uncertain cases. */
52     case PIPE_FUNC_LESS:
53     case PIPE_FUNC_LEQUAL:
54         return HIZ_FUNC_MAX;
55 
56     case PIPE_FUNC_GREATER:
57     case PIPE_FUNC_GEQUAL:
58         return HIZ_FUNC_MIN;
59     }
60 }
61 
62 /* Return what's used for the depth test (either minimum or maximum). */
r300_get_sc_hz_max(struct r300_context * r300)63 static unsigned r300_get_sc_hz_max(struct r300_context *r300)
64 {
65     struct r300_dsa_state *dsa = r300->dsa_state.state;
66     unsigned func = dsa->dsa.depth.func;
67 
68     return func >= PIPE_FUNC_GREATER ? R300_SC_HYPERZ_MAX : R300_SC_HYPERZ_MIN;
69 }
70 
r300_is_hiz_func_valid(struct r300_context * r300)71 static boolean r300_is_hiz_func_valid(struct r300_context *r300)
72 {
73     struct r300_dsa_state *dsa = r300->dsa_state.state;
74     unsigned func = dsa->dsa.depth.func;
75 
76     if (r300->hiz_func == HIZ_FUNC_NONE)
77         return TRUE;
78 
79     /* func1 is less/lessthan */
80     if (r300->hiz_func == HIZ_FUNC_MAX &&
81         (func == PIPE_FUNC_GEQUAL || func == PIPE_FUNC_GREATER))
82         return FALSE;
83 
84     /* func1 is greater/greaterthan */
85     if (r300->hiz_func == HIZ_FUNC_MIN &&
86         (func == PIPE_FUNC_LESS   || func == PIPE_FUNC_LEQUAL))
87         return FALSE;
88 
89     return TRUE;
90 }
91 
r300_dsa_stencil_op_not_keep(struct pipe_stencil_state * s)92 static boolean r300_dsa_stencil_op_not_keep(struct pipe_stencil_state *s)
93 {
94     return s->enabled && (s->fail_op != PIPE_STENCIL_OP_KEEP ||
95                           s->zfail_op != PIPE_STENCIL_OP_KEEP);
96 }
97 
r300_hiz_allowed(struct r300_context * r300)98 static boolean r300_hiz_allowed(struct r300_context *r300)
99 {
100     struct r300_dsa_state *dsa = r300->dsa_state.state;
101     struct r300_screen *r300screen = r300->screen;
102 
103     if (r300_fragment_shader_writes_depth(r300_fs(r300)))
104         return FALSE;
105 
106     if (r300->query_current)
107         return FALSE;
108 
109     /* If the depth function is inverted, HiZ must be disabled. */
110     if (!r300_is_hiz_func_valid(r300))
111         return FALSE;
112 
113     /* if stencil fail/zfail op is not KEEP */
114     if (r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[0]) ||
115         r300_dsa_stencil_op_not_keep(&dsa->dsa.stencil[1]))
116         return FALSE;
117 
118     if (dsa->dsa.depth.enabled) {
119         /* if depth func is EQUAL pre-r500 */
120         if (dsa->dsa.depth.func == PIPE_FUNC_EQUAL && !r300screen->caps.is_r500)
121             return FALSE;
122 
123         /* if depth func is NOTEQUAL */
124         if (dsa->dsa.depth.func == PIPE_FUNC_NOTEQUAL)
125             return FALSE;
126     }
127     return TRUE;
128 }
129 
r300_update_hyperz(struct r300_context * r300)130 static void r300_update_hyperz(struct r300_context* r300)
131 {
132     struct r300_hyperz_state *z =
133         (struct r300_hyperz_state*)r300->hyperz_state.state;
134     struct pipe_framebuffer_state *fb =
135         (struct pipe_framebuffer_state*)r300->fb_state.state;
136     struct r300_dsa_state *dsa = r300->dsa_state.state;
137     struct r300_resource *zstex =
138             fb->zsbuf ? r300_resource(fb->zsbuf->texture) : NULL;
139 
140     z->gb_z_peq_config = 0;
141     z->zb_bw_cntl = 0;
142     z->sc_hyperz = R300_SC_HYPERZ_ADJ_2;
143     z->flush = 0;
144 
145     if (r300->cbzb_clear) {
146         z->zb_bw_cntl |= R300_ZB_CB_CLEAR_CACHE_LINE_WRITE_ONLY;
147         return;
148     }
149 
150     if (!zstex || !r300->hyperz_enabled)
151         return;
152 
153     /* Set the size of ZMASK tiles. */
154     if (zstex->tex.zcomp8x8[fb->zsbuf->u.tex.level]) {
155         z->gb_z_peq_config |= R300_GB_Z_PEQ_CONFIG_Z_PEQ_SIZE_8_8;
156     }
157 
158     /* R500-specific features and optimizations. */
159     if (r300->screen->caps.is_r500) {
160         z->zb_bw_cntl |= R500_PEQ_PACKING_ENABLE |
161                          R500_COVERED_PTR_MASKING_ENABLE;
162     }
163 
164     /* Setup decompression if needed. No other HyperZ setting is required. */
165     if (r300->zmask_decompress) {
166         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
167                          R300_RD_COMP_ENABLE;
168         return;
169     }
170 
171     /* Do not set anything if depth and stencil tests are off. */
172     if (!dsa->dsa.depth.enabled &&
173         !dsa->dsa.stencil[0].enabled &&
174         !dsa->dsa.stencil[1].enabled) {
175         assert(!dsa->dsa.depth.writemask);
176         return;
177     }
178 
179     /* Zbuffer compression. */
180     if (r300->zmask_in_use && !r300->locked_zbuffer) {
181         z->zb_bw_cntl |= R300_FAST_FILL_ENABLE |
182                          R300_RD_COMP_ENABLE |
183                          R300_WR_COMP_ENABLE;
184     }
185 
186     /* HiZ. */
187     if (r300->hiz_in_use && !r300->locked_zbuffer) {
188         /* HiZ cannot be used under some circumstances. */
189         if (!r300_hiz_allowed(r300)) {
190             /* If writemask is disabled, the HiZ memory will not be changed,
191              * so we can keep its content for later. */
192             if (dsa->dsa.depth.writemask) {
193                 r300->hiz_in_use = FALSE;
194             }
195             return;
196         }
197         DBG(r300, DBG_HYPERZ, "r300: Z-func: %i\n", dsa->dsa.depth.func);
198 
199         /* Set the HiZ function if needed. */
200         if (r300->hiz_func == HIZ_FUNC_NONE) {
201             r300->hiz_func = r300_get_hiz_func(r300);
202         }
203 
204         /* Setup the HiZ bits. */
205         z->zb_bw_cntl |= R300_HIZ_ENABLE |
206                 (r300->hiz_func == HIZ_FUNC_MIN ? R300_HIZ_MIN : R300_HIZ_MAX);
207 
208         z->sc_hyperz |= R300_SC_HYPERZ_ENABLE |
209                         r300_get_sc_hz_max(r300);
210 
211         if (r300->screen->caps.is_r500) {
212             z->zb_bw_cntl |= R500_HIZ_EQUAL_REJECT_ENABLE;
213         }
214     }
215 }
216 
217 /*****************************************************************************/
218 /* The ZTOP state                                                            */
219 /*****************************************************************************/
220 
r300_dsa_writes_stencil(struct pipe_stencil_state * s)221 static boolean r300_dsa_writes_stencil(
222         struct pipe_stencil_state *s)
223 {
224     return s->enabled && s->writemask &&
225            (s->fail_op  != PIPE_STENCIL_OP_KEEP ||
226             s->zfail_op != PIPE_STENCIL_OP_KEEP ||
227             s->zpass_op != PIPE_STENCIL_OP_KEEP);
228 }
229 
r300_dsa_writes_depth_stencil(struct pipe_depth_stencil_alpha_state * dsa)230 static boolean r300_dsa_writes_depth_stencil(
231         struct pipe_depth_stencil_alpha_state *dsa)
232 {
233     /* We are interested only in the cases when a depth or stencil value
234      * can be changed. */
235 
236     if (dsa->depth.enabled && dsa->depth.writemask &&
237         dsa->depth.func != PIPE_FUNC_NEVER)
238         return TRUE;
239 
240     if (r300_dsa_writes_stencil(&dsa->stencil[0]) ||
241         r300_dsa_writes_stencil(&dsa->stencil[1]))
242         return TRUE;
243 
244     return FALSE;
245 }
246 
r300_dsa_alpha_test_enabled(struct pipe_depth_stencil_alpha_state * dsa)247 static boolean r300_dsa_alpha_test_enabled(
248         struct pipe_depth_stencil_alpha_state *dsa)
249 {
250     /* We are interested only in the cases when alpha testing can kill
251      * a fragment. */
252 
253     return dsa->alpha.enabled && dsa->alpha.func != PIPE_FUNC_ALWAYS;
254 }
255 
r300_update_ztop(struct r300_context * r300)256 static void r300_update_ztop(struct r300_context* r300)
257 {
258     struct r300_ztop_state* ztop_state =
259         (struct r300_ztop_state*)r300->ztop_state.state;
260     uint32_t old_ztop = ztop_state->z_buffer_top;
261 
262     /* This is important enough that I felt it warranted a comment.
263      *
264      * According to the docs, these are the conditions where ZTOP must be
265      * disabled:
266      * 1) Alpha testing enabled
267      * 2) Texture kill instructions in fragment shader
268      * 3) Chroma key culling enabled
269      * 4) W-buffering enabled
270      *
271      * The docs claim that for the first three cases, if no ZS writes happen,
272      * then ZTOP can be used.
273      *
274      * (3) will never apply since we do not support chroma-keyed operations.
275      * (4) will need to be re-examined (and this comment updated) if/when
276      * Hyper-Z becomes supported.
277      *
278      * Additionally, the following conditions require disabled ZTOP:
279      * 5) Depth writes in fragment shader
280      * 6) Outstanding occlusion queries
281      *
282      * This register causes stalls all the way from SC to CB when changed,
283      * but it is buffered on-chip so it does not hurt to write it if it has
284      * not changed.
285      *
286      * ~C.
287      */
288 
289     /* ZS writes */
290     if (r300_dsa_writes_depth_stencil(r300->dsa_state.state) &&
291            (r300_dsa_alpha_test_enabled(r300->dsa_state.state) ||  /* (1) */
292             r300_fs(r300)->shader->info.uses_kill)) {              /* (2) */
293         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
294     } else if (r300_fragment_shader_writes_depth(r300_fs(r300))) { /* (5) */
295         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
296     } else if (r300->query_current) {                              /* (6) */
297         ztop_state->z_buffer_top = R300_ZTOP_DISABLE;
298     } else {
299         ztop_state->z_buffer_top = R300_ZTOP_ENABLE;
300     }
301     if (ztop_state->z_buffer_top != old_ztop)
302         r300_mark_atom_dirty(r300, &r300->ztop_state);
303 }
304 
r300_update_hyperz_state(struct r300_context * r300)305 void r300_update_hyperz_state(struct r300_context* r300)
306 {
307     r300_update_ztop(r300);
308 
309     if (r300->hyperz_state.dirty) {
310         r300_update_hyperz(r300);
311     }
312 }
313