1 /**************************************************************************
2  *
3  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4  * Copyright 2010 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * \brief  Quad depth / stencil testing
31  */
32 
33 #include "pipe/p_defines.h"
34 #include "util/u_format.h"
35 #include "util/u_math.h"
36 #include "util/u_memory.h"
37 #include "tgsi/tgsi_scan.h"
38 #include "sp_context.h"
39 #include "sp_quad.h"
40 #include "sp_quad_pipe.h"
41 #include "sp_tile_cache.h"
42 #include "sp_state.h"           /* for sp_fragment_shader */
43 
44 
45 struct depth_data {
46    struct pipe_surface *ps;
47    enum pipe_format format;
48    unsigned bzzzz[TGSI_QUAD_SIZE];  /**< Z values fetched from depth buffer */
49    unsigned qzzzz[TGSI_QUAD_SIZE];  /**< Z values from the quad */
50    ubyte stencilVals[TGSI_QUAD_SIZE];
51    boolean use_shader_stencil_refs;
52    ubyte shader_stencil_refs[TGSI_QUAD_SIZE];
53    struct softpipe_cached_tile *tile;
54 };
55 
56 
57 
58 static void
get_depth_stencil_values(struct depth_data * data,const struct quad_header * quad)59 get_depth_stencil_values( struct depth_data *data,
60                           const struct quad_header *quad )
61 {
62    unsigned j;
63    const struct softpipe_cached_tile *tile = data->tile;
64 
65    switch (data->format) {
66    case PIPE_FORMAT_Z16_UNORM:
67       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
68          int x = quad->input.x0 % TILE_SIZE + (j & 1);
69          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
70          data->bzzzz[j] = tile->data.depth16[y][x];
71       }
72       break;
73    case PIPE_FORMAT_Z32_UNORM:
74       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
75          int x = quad->input.x0 % TILE_SIZE + (j & 1);
76          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
77          data->bzzzz[j] = tile->data.depth32[y][x];
78       }
79       break;
80    case PIPE_FORMAT_Z24X8_UNORM:
81    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
82       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
83          int x = quad->input.x0 % TILE_SIZE + (j & 1);
84          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
85          data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
86          data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
87       }
88       break;
89    case PIPE_FORMAT_X8Z24_UNORM:
90    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
91       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
92          int x = quad->input.x0 % TILE_SIZE + (j & 1);
93          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
94          data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
95          data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
96       }
97       break;
98    case PIPE_FORMAT_S8_UINT:
99       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
100          int x = quad->input.x0 % TILE_SIZE + (j & 1);
101          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
102          data->bzzzz[j] = 0;
103          data->stencilVals[j] = tile->data.stencil8[y][x];
104       }
105       break;
106    case PIPE_FORMAT_Z32_FLOAT:
107       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
108          int x = quad->input.x0 % TILE_SIZE + (j & 1);
109          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
110          data->bzzzz[j] = tile->data.depth32[y][x];
111       }
112       break;
113    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
114       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
115          int x = quad->input.x0 % TILE_SIZE + (j & 1);
116          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
117          data->bzzzz[j] = tile->data.depth64[y][x] & 0xffffffff;
118          data->stencilVals[j] = (tile->data.depth64[y][x] >> 32) & 0xff;
119       }
120       break;
121    default:
122       assert(0);
123    }
124 }
125 
126 
127 /**
128  * If the shader has not been run, interpolate the depth values
129  * ourselves.
130  */
131 static void
interpolate_quad_depth(struct quad_header * quad)132 interpolate_quad_depth( struct quad_header *quad )
133 {
134    const float fx = (float) quad->input.x0;
135    const float fy = (float) quad->input.y0;
136    const float dzdx = quad->posCoef->dadx[2];
137    const float dzdy = quad->posCoef->dady[2];
138    const float z0 = quad->posCoef->a0[2] + dzdx * fx + dzdy * fy;
139 
140    quad->output.depth[0] = z0;
141    quad->output.depth[1] = z0 + dzdx;
142    quad->output.depth[2] = z0 + dzdy;
143    quad->output.depth[3] = z0 + dzdx + dzdy;
144 }
145 
146 
147 /**
148  * Compute the depth_data::qzzzz[] values from the float fragment Z values.
149  */
150 static void
convert_quad_depth(struct depth_data * data,const struct quad_header * quad)151 convert_quad_depth( struct depth_data *data,
152                     const struct quad_header *quad )
153 {
154    unsigned j;
155 
156    /* Convert quad's float depth values to int depth values (qzzzz).
157     * If the Z buffer stores integer values, we _have_ to do the depth
158     * compares with integers (not floats).  Otherwise, the float->int->float
159     * conversion of Z values (which isn't an identity function) will cause
160     * Z-fighting errors.
161     */
162    switch (data->format) {
163    case PIPE_FORMAT_Z16_UNORM:
164       {
165          float scale = 65535.0;
166 
167          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
168             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
169          }
170       }
171       break;
172    case PIPE_FORMAT_Z32_UNORM:
173       {
174          double scale = (double) (uint) ~0UL;
175 
176          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
177             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
178          }
179       }
180       break;
181    case PIPE_FORMAT_Z24X8_UNORM:
182    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
183       {
184          float scale = (float) ((1 << 24) - 1);
185 
186          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
187             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
188          }
189       }
190       break;
191    case PIPE_FORMAT_X8Z24_UNORM:
192    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
193       {
194          float scale = (float) ((1 << 24) - 1);
195 
196          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
197             data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
198          }
199       }
200       break;
201    case PIPE_FORMAT_Z32_FLOAT:
202    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
203       {
204          union fi fui;
205 
206          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
207             fui.f = quad->output.depth[j];
208             data->qzzzz[j] = fui.ui;
209          }
210       }
211       break;
212    default:
213       assert(0);
214    }
215 }
216 
217 
218 /**
219  * Compute the depth_data::shader_stencil_refs[] values from the float
220  * fragment stencil values.
221  */
222 static void
convert_quad_stencil(struct depth_data * data,const struct quad_header * quad)223 convert_quad_stencil( struct depth_data *data,
224                       const struct quad_header *quad )
225 {
226    unsigned j;
227 
228    data->use_shader_stencil_refs = TRUE;
229    /* Copy quads stencil values
230     */
231    switch (data->format) {
232    case PIPE_FORMAT_Z24X8_UNORM:
233    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
234    case PIPE_FORMAT_X8Z24_UNORM:
235    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
236    case PIPE_FORMAT_S8_UINT:
237    case PIPE_FORMAT_Z32_FLOAT:
238    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
239       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
240          data->shader_stencil_refs[j] = ((unsigned)(quad->output.stencil[j]));
241       }
242       break;
243    default:
244       assert(0);
245    }
246 }
247 
248 
249 /**
250  * Write data->bzzzz[] values and data->stencilVals into the Z/stencil buffer.
251  */
252 static void
write_depth_stencil_values(struct depth_data * data,struct quad_header * quad)253 write_depth_stencil_values( struct depth_data *data,
254                             struct quad_header *quad )
255 {
256    struct softpipe_cached_tile *tile = data->tile;
257    unsigned j;
258 
259    /* put updated Z values back into cached tile */
260    switch (data->format) {
261    case PIPE_FORMAT_Z16_UNORM:
262       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
263          int x = quad->input.x0 % TILE_SIZE + (j & 1);
264          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
265          tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
266       }
267       break;
268    case PIPE_FORMAT_Z24X8_UNORM:
269    case PIPE_FORMAT_Z32_UNORM:
270       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
271          int x = quad->input.x0 % TILE_SIZE + (j & 1);
272          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
273          tile->data.depth32[y][x] = data->bzzzz[j];
274       }
275       break;
276    case PIPE_FORMAT_Z24_UNORM_S8_UINT:
277       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
278          int x = quad->input.x0 % TILE_SIZE + (j & 1);
279          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
280          tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
281       }
282       break;
283    case PIPE_FORMAT_S8_UINT_Z24_UNORM:
284       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
285          int x = quad->input.x0 % TILE_SIZE + (j & 1);
286          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
287          tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
288       }
289       break;
290    case PIPE_FORMAT_X8Z24_UNORM:
291       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
292          int x = quad->input.x0 % TILE_SIZE + (j & 1);
293          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
294          tile->data.depth32[y][x] = data->bzzzz[j] << 8;
295       }
296       break;
297    case PIPE_FORMAT_S8_UINT:
298       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
299          int x = quad->input.x0 % TILE_SIZE + (j & 1);
300          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
301          tile->data.stencil8[y][x] = data->stencilVals[j];
302       }
303       break;
304    case PIPE_FORMAT_Z32_FLOAT:
305       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
306          int x = quad->input.x0 % TILE_SIZE + (j & 1);
307          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
308          tile->data.depth32[y][x] = data->bzzzz[j];
309       }
310       break;
311    case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
312       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
313          int x = quad->input.x0 % TILE_SIZE + (j & 1);
314          int y = quad->input.y0 % TILE_SIZE + (j >> 1);
315          tile->data.depth64[y][x] = (uint64_t)data->bzzzz[j] | ((uint64_t)data->stencilVals[j] << 32);
316       }
317       break;
318    default:
319       assert(0);
320    }
321 }
322 
323 
324 
325 /** Only 8-bit stencil supported */
326 #define STENCIL_MAX 0xff
327 
328 
329 /**
330  * Do the basic stencil test (compare stencil buffer values against the
331  * reference value.
332  *
333  * \param data->stencilVals  the stencil values from the stencil buffer
334  * \param func  the stencil func (PIPE_FUNC_x)
335  * \param ref  the stencil reference value
336  * \param valMask  the stencil value mask indicating which bits of the stencil
337  *                 values and ref value are to be used.
338  * \return mask indicating which pixels passed the stencil test
339  */
340 static unsigned
do_stencil_test(struct depth_data * data,unsigned func,unsigned ref,unsigned valMask)341 do_stencil_test(struct depth_data *data,
342                 unsigned func,
343                 unsigned ref, unsigned valMask)
344 {
345    unsigned passMask = 0x0;
346    unsigned j;
347    ubyte refs[TGSI_QUAD_SIZE];
348 
349    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
350       if (data->use_shader_stencil_refs)
351          refs[j] = data->shader_stencil_refs[j] & valMask;
352       else
353          refs[j] = ref & valMask;
354    }
355 
356    switch (func) {
357    case PIPE_FUNC_NEVER:
358       /* passMask = 0x0 */
359       break;
360    case PIPE_FUNC_LESS:
361       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
362          if (refs[j] < (data->stencilVals[j] & valMask)) {
363             passMask |= (1 << j);
364          }
365       }
366       break;
367    case PIPE_FUNC_EQUAL:
368       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
369          if (refs[j] == (data->stencilVals[j] & valMask)) {
370             passMask |= (1 << j);
371          }
372       }
373       break;
374    case PIPE_FUNC_LEQUAL:
375       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
376          if (refs[j] <= (data->stencilVals[j] & valMask)) {
377             passMask |= (1 << j);
378          }
379       }
380       break;
381    case PIPE_FUNC_GREATER:
382       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
383          if (refs[j] > (data->stencilVals[j] & valMask)) {
384             passMask |= (1 << j);
385          }
386       }
387       break;
388    case PIPE_FUNC_NOTEQUAL:
389       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
390          if (refs[j] != (data->stencilVals[j] & valMask)) {
391             passMask |= (1 << j);
392          }
393       }
394       break;
395    case PIPE_FUNC_GEQUAL:
396       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
397          if (refs[j] >= (data->stencilVals[j] & valMask)) {
398             passMask |= (1 << j);
399          }
400       }
401       break;
402    case PIPE_FUNC_ALWAYS:
403       passMask = MASK_ALL;
404       break;
405    default:
406       assert(0);
407    }
408 
409    return passMask;
410 }
411 
412 
413 /**
414  * Apply the stencil operator to stencil values.
415  *
416  * \param data->stencilVals  the stencil buffer values (read and written)
417  * \param mask  indicates which pixels to update
418  * \param op  the stencil operator (PIPE_STENCIL_OP_x)
419  * \param ref  the stencil reference value
420  * \param wrtMask  writemask controlling which bits are changed in the
421  *                 stencil values
422  */
423 static void
apply_stencil_op(struct depth_data * data,unsigned mask,unsigned op,ubyte ref,ubyte wrtMask)424 apply_stencil_op(struct depth_data *data,
425                  unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
426 {
427    unsigned j;
428    ubyte newstencil[TGSI_QUAD_SIZE];
429    ubyte refs[TGSI_QUAD_SIZE];
430 
431    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
432       newstencil[j] = data->stencilVals[j];
433       if (data->use_shader_stencil_refs)
434          refs[j] = data->shader_stencil_refs[j];
435       else
436          refs[j] = ref;
437    }
438 
439    switch (op) {
440    case PIPE_STENCIL_OP_KEEP:
441       /* no-op */
442       break;
443    case PIPE_STENCIL_OP_ZERO:
444       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
445          if (mask & (1 << j)) {
446             newstencil[j] = 0;
447          }
448       }
449       break;
450    case PIPE_STENCIL_OP_REPLACE:
451       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
452          if (mask & (1 << j)) {
453             newstencil[j] = refs[j];
454          }
455       }
456       break;
457    case PIPE_STENCIL_OP_INCR:
458       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
459          if (mask & (1 << j)) {
460             if (data->stencilVals[j] < STENCIL_MAX) {
461                newstencil[j] = data->stencilVals[j] + 1;
462             }
463          }
464       }
465       break;
466    case PIPE_STENCIL_OP_DECR:
467       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
468          if (mask & (1 << j)) {
469             if (data->stencilVals[j] > 0) {
470                newstencil[j] = data->stencilVals[j] - 1;
471             }
472          }
473       }
474       break;
475    case PIPE_STENCIL_OP_INCR_WRAP:
476       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
477          if (mask & (1 << j)) {
478             newstencil[j] = data->stencilVals[j] + 1;
479          }
480       }
481       break;
482    case PIPE_STENCIL_OP_DECR_WRAP:
483       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
484          if (mask & (1 << j)) {
485             newstencil[j] = data->stencilVals[j] - 1;
486          }
487       }
488       break;
489    case PIPE_STENCIL_OP_INVERT:
490       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
491          if (mask & (1 << j)) {
492             newstencil[j] = ~data->stencilVals[j];
493          }
494       }
495       break;
496    default:
497       assert(0);
498    }
499 
500    /*
501     * update the stencil values
502     */
503    if (wrtMask != STENCIL_MAX) {
504       /* apply bit-wise stencil buffer writemask */
505       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
506          data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
507       }
508    }
509    else {
510       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
511          data->stencilVals[j] = newstencil[j];
512       }
513    }
514 }
515 
516 
517 
518 /**
519  * To increase efficiency, we should probably have multiple versions
520  * of this function that are specifically for Z16, Z32 and FP Z buffers.
521  * Try to effectively do that with codegen...
522  */
523 static boolean
depth_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)524 depth_test_quad(struct quad_stage *qs,
525                 struct depth_data *data,
526                 struct quad_header *quad)
527 {
528    struct softpipe_context *softpipe = qs->softpipe;
529    unsigned zmask = 0;
530    unsigned j;
531 
532    switch (softpipe->depth_stencil->depth.func) {
533    case PIPE_FUNC_NEVER:
534       /* zmask = 0 */
535       break;
536    case PIPE_FUNC_LESS:
537       /* Note this is pretty much a single sse or cell instruction.
538        * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
539        */
540       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
541 	 if (data->qzzzz[j] < data->bzzzz[j])
542 	    zmask |= 1 << j;
543       }
544       break;
545    case PIPE_FUNC_EQUAL:
546       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
547 	 if (data->qzzzz[j] == data->bzzzz[j])
548 	    zmask |= 1 << j;
549       }
550       break;
551    case PIPE_FUNC_LEQUAL:
552       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
553 	 if (data->qzzzz[j] <= data->bzzzz[j])
554 	    zmask |= (1 << j);
555       }
556       break;
557    case PIPE_FUNC_GREATER:
558       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
559 	 if (data->qzzzz[j] > data->bzzzz[j])
560 	    zmask |= (1 << j);
561       }
562       break;
563    case PIPE_FUNC_NOTEQUAL:
564       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
565 	 if (data->qzzzz[j] != data->bzzzz[j])
566 	    zmask |= (1 << j);
567       }
568       break;
569    case PIPE_FUNC_GEQUAL:
570       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
571 	 if (data->qzzzz[j] >= data->bzzzz[j])
572 	    zmask |= (1 << j);
573       }
574       break;
575    case PIPE_FUNC_ALWAYS:
576       zmask = MASK_ALL;
577       break;
578    default:
579       assert(0);
580    }
581 
582    quad->inout.mask &= zmask;
583    if (quad->inout.mask == 0)
584       return FALSE;
585 
586    /* Update our internal copy only if writemask set.  Even if
587     * depth.writemask is FALSE, may still need to write out buffer
588     * data due to stencil changes.
589     */
590    if (softpipe->depth_stencil->depth.writemask) {
591       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
592          if (quad->inout.mask & (1 << j)) {
593             data->bzzzz[j] = data->qzzzz[j];
594          }
595       }
596    }
597 
598    return TRUE;
599 }
600 
601 
602 
603 /**
604  * Do stencil (and depth) testing.  Stenciling depends on the outcome of
605  * depth testing.
606  */
607 static void
depth_stencil_test_quad(struct quad_stage * qs,struct depth_data * data,struct quad_header * quad)608 depth_stencil_test_quad(struct quad_stage *qs,
609                         struct depth_data *data,
610                         struct quad_header *quad)
611 {
612    struct softpipe_context *softpipe = qs->softpipe;
613    unsigned func, zFailOp, zPassOp, failOp;
614    ubyte ref, wrtMask, valMask;
615    uint face = quad->input.facing;
616 
617    if (!softpipe->depth_stencil->stencil[1].enabled) {
618       /* single-sided stencil test, use front (face=0) state */
619       face = 0;
620    }
621 
622    /* 0 = front-face, 1 = back-face */
623    assert(face == 0 || face == 1);
624 
625    /* choose front or back face function, operator, etc */
626    /* XXX we could do these initializations once per primitive */
627    func    = softpipe->depth_stencil->stencil[face].func;
628    failOp  = softpipe->depth_stencil->stencil[face].fail_op;
629    zFailOp = softpipe->depth_stencil->stencil[face].zfail_op;
630    zPassOp = softpipe->depth_stencil->stencil[face].zpass_op;
631    ref     = softpipe->stencil_ref.ref_value[face];
632    wrtMask = softpipe->depth_stencil->stencil[face].writemask;
633    valMask = softpipe->depth_stencil->stencil[face].valuemask;
634 
635    /* do the stencil test first */
636    {
637       unsigned passMask, failMask;
638       passMask = do_stencil_test(data, func, ref, valMask);
639       failMask = quad->inout.mask & ~passMask;
640       quad->inout.mask &= passMask;
641 
642       if (failOp != PIPE_STENCIL_OP_KEEP) {
643          apply_stencil_op(data, failMask, failOp, ref, wrtMask);
644       }
645    }
646 
647    if (quad->inout.mask) {
648       /* now the pixels that passed the stencil test are depth tested */
649       if (softpipe->depth_stencil->depth.enabled) {
650          const unsigned origMask = quad->inout.mask;
651 
652          depth_test_quad(qs, data, quad);  /* quad->mask is updated */
653 
654          /* update stencil buffer values according to z pass/fail result */
655          if (zFailOp != PIPE_STENCIL_OP_KEEP) {
656             const unsigned zFailMask = origMask & ~quad->inout.mask;
657             apply_stencil_op(data, zFailMask, zFailOp, ref, wrtMask);
658          }
659 
660          if (zPassOp != PIPE_STENCIL_OP_KEEP) {
661             const unsigned zPassMask = origMask & quad->inout.mask;
662             apply_stencil_op(data, zPassMask, zPassOp, ref, wrtMask);
663          }
664       }
665       else {
666          /* no depth test, apply Zpass operator to stencil buffer values */
667          apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
668       }
669    }
670 }
671 
672 
673 #define ALPHATEST( FUNC, COMP )                                         \
674    static unsigned                                                      \
675    alpha_test_quads_##FUNC( struct quad_stage *qs,                      \
676                            struct quad_header *quads[],                 \
677                            unsigned nr )                                \
678    {                                                                    \
679       const float ref = qs->softpipe->depth_stencil->alpha.ref_value;   \
680       const uint cbuf = 0; /* only output[0].alpha is tested */         \
681       unsigned pass_nr = 0;                                             \
682       unsigned i;                                                       \
683                                                                         \
684       for (i = 0; i < nr; i++) {                                        \
685          const float *aaaa = quads[i]->output.color[cbuf][3];           \
686          unsigned passMask = 0;                                         \
687                                                                         \
688          if (aaaa[0] COMP ref) passMask |= (1 << 0);                    \
689          if (aaaa[1] COMP ref) passMask |= (1 << 1);                    \
690          if (aaaa[2] COMP ref) passMask |= (1 << 2);                    \
691          if (aaaa[3] COMP ref) passMask |= (1 << 3);                    \
692                                                                         \
693          quads[i]->inout.mask &= passMask;                              \
694                                                                         \
695          if (quads[i]->inout.mask)                                      \
696             quads[pass_nr++] = quads[i];                                \
697       }                                                                 \
698                                                                         \
699       return pass_nr;                                                   \
700    }
701 
702 
703 ALPHATEST( LESS,     < )
704 ALPHATEST( EQUAL,    == )
705 ALPHATEST( LEQUAL,   <= )
706 ALPHATEST( GREATER,  > )
707 ALPHATEST( NOTEQUAL, != )
708 ALPHATEST( GEQUAL,   >= )
709 
710 
711 /* XXX: Incorporate into shader using KILP.
712  */
713 static unsigned
alpha_test_quads(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)714 alpha_test_quads(struct quad_stage *qs,
715                  struct quad_header *quads[],
716                  unsigned nr)
717 {
718    switch (qs->softpipe->depth_stencil->alpha.func) {
719    case PIPE_FUNC_LESS:
720       return alpha_test_quads_LESS( qs, quads, nr );
721    case PIPE_FUNC_EQUAL:
722       return alpha_test_quads_EQUAL( qs, quads, nr );
723    case PIPE_FUNC_LEQUAL:
724       return alpha_test_quads_LEQUAL( qs, quads, nr );
725    case PIPE_FUNC_GREATER:
726       return alpha_test_quads_GREATER( qs, quads, nr );
727    case PIPE_FUNC_NOTEQUAL:
728       return alpha_test_quads_NOTEQUAL( qs, quads, nr );
729    case PIPE_FUNC_GEQUAL:
730       return alpha_test_quads_GEQUAL( qs, quads, nr );
731    case PIPE_FUNC_ALWAYS:
732       return nr;
733    case PIPE_FUNC_NEVER:
734    default:
735       return 0;
736    }
737 }
738 
739 
740 static unsigned mask_count[16] =
741 {
742    0,                           /* 0x0 */
743    1,                           /* 0x1 */
744    1,                           /* 0x2 */
745    2,                           /* 0x3 */
746    1,                           /* 0x4 */
747    2,                           /* 0x5 */
748    2,                           /* 0x6 */
749    3,                           /* 0x7 */
750    1,                           /* 0x8 */
751    2,                           /* 0x9 */
752    2,                           /* 0xa */
753    3,                           /* 0xb */
754    2,                           /* 0xc */
755    3,                           /* 0xd */
756    3,                           /* 0xe */
757    4,                           /* 0xf */
758 };
759 
760 
761 
762 /**
763  * General depth/stencil test function.  Used when there's no fast-path.
764  */
765 static void
depth_test_quads_fallback(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)766 depth_test_quads_fallback(struct quad_stage *qs,
767                           struct quad_header *quads[],
768                           unsigned nr)
769 {
770    unsigned i, pass = 0;
771    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
772    boolean interp_depth = !fsInfo->writes_z;
773    boolean shader_stencil_ref = fsInfo->writes_stencil;
774    struct depth_data data;
775 
776    data.use_shader_stencil_refs = FALSE;
777 
778    if (qs->softpipe->depth_stencil->alpha.enabled) {
779       nr = alpha_test_quads(qs, quads, nr);
780    }
781 
782    if (qs->softpipe->framebuffer.zsbuf &&
783          (qs->softpipe->depth_stencil->depth.enabled ||
784           qs->softpipe->depth_stencil->stencil[0].enabled)) {
785 
786       data.ps = qs->softpipe->framebuffer.zsbuf;
787       data.format = data.ps->format;
788       data.tile = sp_get_cached_tile(qs->softpipe->zsbuf_cache,
789                                      quads[0]->input.x0,
790                                      quads[0]->input.y0);
791 
792       for (i = 0; i < nr; i++) {
793          get_depth_stencil_values(&data, quads[i]);
794 
795          if (qs->softpipe->depth_stencil->depth.enabled) {
796             if (interp_depth)
797                interpolate_quad_depth(quads[i]);
798 
799             convert_quad_depth(&data, quads[i]);
800          }
801 
802          if (qs->softpipe->depth_stencil->stencil[0].enabled) {
803             if (shader_stencil_ref)
804                convert_quad_stencil(&data, quads[i]);
805 
806             depth_stencil_test_quad(qs, &data, quads[i]);
807             write_depth_stencil_values(&data, quads[i]);
808          }
809          else {
810             if (!depth_test_quad(qs, &data, quads[i]))
811                continue;
812 
813             if (qs->softpipe->depth_stencil->depth.writemask)
814                write_depth_stencil_values(&data, quads[i]);
815          }
816 
817          quads[pass++] = quads[i];
818       }
819 
820       nr = pass;
821    }
822 
823    if (qs->softpipe->active_query_count) {
824       for (i = 0; i < nr; i++)
825          qs->softpipe->occlusion_count += mask_count[quads[i]->inout.mask];
826    }
827 
828    if (nr)
829       qs->next->run(qs->next, quads, nr);
830 }
831 
832 
833 /**
834  * Special-case Z testing for 16-bit Zbuffer and Z buffer writes enabled.
835  */
836 
837 #define NAME depth_interp_z16_less_write
838 #define OPERATOR <
839 #include "sp_quad_depth_test_tmp.h"
840 
841 #define NAME depth_interp_z16_equal_write
842 #define OPERATOR ==
843 #include "sp_quad_depth_test_tmp.h"
844 
845 #define NAME depth_interp_z16_lequal_write
846 #define OPERATOR <=
847 #include "sp_quad_depth_test_tmp.h"
848 
849 #define NAME depth_interp_z16_greater_write
850 #define OPERATOR >
851 #include "sp_quad_depth_test_tmp.h"
852 
853 #define NAME depth_interp_z16_notequal_write
854 #define OPERATOR !=
855 #include "sp_quad_depth_test_tmp.h"
856 
857 #define NAME depth_interp_z16_gequal_write
858 #define OPERATOR >=
859 #include "sp_quad_depth_test_tmp.h"
860 
861 #define NAME depth_interp_z16_always_write
862 #define ALWAYS 1
863 #include "sp_quad_depth_test_tmp.h"
864 
865 
866 
867 static void
depth_noop(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)868 depth_noop(struct quad_stage *qs,
869            struct quad_header *quads[],
870            unsigned nr)
871 {
872    qs->next->run(qs->next, quads, nr);
873 }
874 
875 
876 
877 static void
choose_depth_test(struct quad_stage * qs,struct quad_header * quads[],unsigned nr)878 choose_depth_test(struct quad_stage *qs,
879                   struct quad_header *quads[],
880                   unsigned nr)
881 {
882    const struct tgsi_shader_info *fsInfo = &qs->softpipe->fs_variant->info;
883 
884    boolean interp_depth = !fsInfo->writes_z;
885 
886    boolean alpha = qs->softpipe->depth_stencil->alpha.enabled;
887 
888    boolean depth = qs->softpipe->depth_stencil->depth.enabled;
889 
890    unsigned depthfunc = qs->softpipe->depth_stencil->depth.func;
891 
892    boolean stencil = qs->softpipe->depth_stencil->stencil[0].enabled;
893 
894    boolean depthwrite = qs->softpipe->depth_stencil->depth.writemask;
895 
896    boolean occlusion = qs->softpipe->active_query_count;
897 
898    if(!qs->softpipe->framebuffer.zsbuf)
899       depth = depthwrite = stencil = FALSE;
900 
901    /* default */
902    qs->run = depth_test_quads_fallback;
903 
904    /* look for special cases */
905    if (!alpha &&
906        !depth &&
907        !occlusion &&
908        !stencil) {
909       qs->run = depth_noop;
910    }
911    else if (!alpha &&
912             interp_depth &&
913             depth &&
914             depthwrite &&
915             !occlusion &&
916             !stencil)
917    {
918       if (qs->softpipe->framebuffer.zsbuf->format == PIPE_FORMAT_Z16_UNORM) {
919          switch (depthfunc) {
920          case PIPE_FUNC_NEVER:
921             qs->run = depth_test_quads_fallback;
922             break;
923          case PIPE_FUNC_LESS:
924             qs->run = depth_interp_z16_less_write;
925             break;
926          case PIPE_FUNC_EQUAL:
927             qs->run = depth_interp_z16_equal_write;
928             break;
929          case PIPE_FUNC_LEQUAL:
930             qs->run = depth_interp_z16_lequal_write;
931             break;
932          case PIPE_FUNC_GREATER:
933             qs->run = depth_interp_z16_greater_write;
934             break;
935          case PIPE_FUNC_NOTEQUAL:
936             qs->run = depth_interp_z16_notequal_write;
937             break;
938          case PIPE_FUNC_GEQUAL:
939             qs->run = depth_interp_z16_gequal_write;
940             break;
941          case PIPE_FUNC_ALWAYS:
942             qs->run = depth_interp_z16_always_write;
943             break;
944          default:
945             qs->run = depth_test_quads_fallback;
946             break;
947          }
948       }
949    }
950 
951    /* next quad/fragment stage */
952    qs->run( qs, quads, nr );
953 }
954 
955 
956 
957 static void
depth_test_begin(struct quad_stage * qs)958 depth_test_begin(struct quad_stage *qs)
959 {
960    qs->run = choose_depth_test;
961    qs->next->begin(qs->next);
962 }
963 
964 
965 static void
depth_test_destroy(struct quad_stage * qs)966 depth_test_destroy(struct quad_stage *qs)
967 {
968    FREE( qs );
969 }
970 
971 
972 struct quad_stage *
sp_quad_depth_test_stage(struct softpipe_context * softpipe)973 sp_quad_depth_test_stage(struct softpipe_context *softpipe)
974 {
975    struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
976 
977    stage->softpipe = softpipe;
978    stage->begin = depth_test_begin;
979    stage->run = choose_depth_test;
980    stage->destroy = depth_test_destroy;
981 
982    return stage;
983 }
984