1 /**************************************************************************
2  *
3  * Copyright 2009-2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the
15  * next paragraph) shall be included in all copies or substantial portions
16  * of the Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25  *
26  **************************************************************************/
27 
28 /**
29  * @file
30  * Depth/stencil testing to LLVM IR translation.
31  *
32  * To be done accurately/efficiently the depth/stencil test must be done with
33  * the same type/format of the depth/stencil buffer, which implies massaging
34  * the incoming depths to fit into place. Using a more straightforward
35  * type/format for depth/stencil values internally and only convert when
36  * flushing would avoid this, but it would most likely result in depth fighting
37  * artifacts.
38  *
39  * We are free to use a different pixel layout though. Since our basic
40  * processing unit is a quad (2x2 pixel block) we store the depth/stencil
41  * values tiled, a quad at time. That is, a depth buffer containing
42  *
43  *  Z11 Z12 Z13 Z14 ...
44  *  Z21 Z22 Z23 Z24 ...
45  *  Z31 Z32 Z33 Z34 ...
46  *  Z41 Z42 Z43 Z44 ...
47  *  ... ... ... ... ...
48  *
49  * will actually be stored in memory as
50  *
51  *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
52  *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
53  *  ... ... ... ... ... ... ... ... ...
54  *
55  *
56  * @author Jose Fonseca <jfonseca@vmware.com>
57  * @author Brian Paul <jfonseca@vmware.com>
58  */
59 
60 #include "pipe/p_state.h"
61 #include "util/u_format.h"
62 #include "util/u_cpu_detect.h"
63 
64 #include "gallivm/lp_bld_type.h"
65 #include "gallivm/lp_bld_arit.h"
66 #include "gallivm/lp_bld_bitarit.h"
67 #include "gallivm/lp_bld_const.h"
68 #include "gallivm/lp_bld_conv.h"
69 #include "gallivm/lp_bld_logic.h"
70 #include "gallivm/lp_bld_flow.h"
71 #include "gallivm/lp_bld_intr.h"
72 #include "gallivm/lp_bld_debug.h"
73 #include "gallivm/lp_bld_swizzle.h"
74 
75 #include "lp_bld_depth.h"
76 
77 
78 /** Used to select fields from pipe_stencil_state */
79 enum stencil_op {
80    S_FAIL_OP,
81    Z_FAIL_OP,
82    Z_PASS_OP
83 };
84 
85 
86 
87 /**
88  * Do the stencil test comparison (compare FB stencil values against ref value).
89  * This will be used twice when generating two-sided stencil code.
90  * \param stencil  the front/back stencil state
91  * \param stencilRef  the stencil reference value, replicated as a vector
92  * \param stencilVals  vector of stencil values from framebuffer
93  * \return vector mask of pass/fail values (~0 or 0)
94  */
95 static LLVMValueRef
lp_build_stencil_test_single(struct lp_build_context * bld,const struct pipe_stencil_state * stencil,LLVMValueRef stencilRef,LLVMValueRef stencilVals)96 lp_build_stencil_test_single(struct lp_build_context *bld,
97                              const struct pipe_stencil_state *stencil,
98                              LLVMValueRef stencilRef,
99                              LLVMValueRef stencilVals)
100 {
101    LLVMBuilderRef builder = bld->gallivm->builder;
102    const unsigned stencilMax = 255; /* XXX fix */
103    struct lp_type type = bld->type;
104    LLVMValueRef res;
105 
106    /*
107     * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values
108     * are between 0..255 so ensure we generate the fastest comparisons for
109     * wider elements.
110     */
111    if (type.width <= 8) {
112       assert(!type.sign);
113    } else {
114       assert(type.sign);
115    }
116 
117    assert(stencil->enabled);
118 
119    if (stencil->valuemask != stencilMax) {
120       /* compute stencilRef = stencilRef & valuemask */
121       LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask);
122       stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, "");
123       /* compute stencilVals = stencilVals & valuemask */
124       stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, "");
125    }
126 
127    res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals);
128 
129    return res;
130 }
131 
132 
133 /**
134  * Do the one or two-sided stencil test comparison.
135  * \sa lp_build_stencil_test_single
136  * \param front_facing  an integer vector mask, indicating front (~0) or back
137  *                      (0) facing polygon. If NULL, assume front-facing.
138  */
139 static LLVMValueRef
lp_build_stencil_test(struct lp_build_context * bld,const struct pipe_stencil_state stencil[2],LLVMValueRef stencilRefs[2],LLVMValueRef stencilVals,LLVMValueRef front_facing)140 lp_build_stencil_test(struct lp_build_context *bld,
141                       const struct pipe_stencil_state stencil[2],
142                       LLVMValueRef stencilRefs[2],
143                       LLVMValueRef stencilVals,
144                       LLVMValueRef front_facing)
145 {
146    LLVMValueRef res;
147 
148    assert(stencil[0].enabled);
149 
150    /* do front face test */
151    res = lp_build_stencil_test_single(bld, &stencil[0],
152                                       stencilRefs[0], stencilVals);
153 
154    if (stencil[1].enabled && front_facing != NULL) {
155       /* do back face test */
156       LLVMValueRef back_res;
157 
158       back_res = lp_build_stencil_test_single(bld, &stencil[1],
159                                               stencilRefs[1], stencilVals);
160 
161       res = lp_build_select(bld, front_facing, res, back_res);
162    }
163 
164    return res;
165 }
166 
167 
168 /**
169  * Apply the stencil operator (add/sub/keep/etc) to the given vector
170  * of stencil values.
171  * \return  new stencil values vector
172  */
173 static LLVMValueRef
lp_build_stencil_op_single(struct lp_build_context * bld,const struct pipe_stencil_state * stencil,enum stencil_op op,LLVMValueRef stencilRef,LLVMValueRef stencilVals)174 lp_build_stencil_op_single(struct lp_build_context *bld,
175                            const struct pipe_stencil_state *stencil,
176                            enum stencil_op op,
177                            LLVMValueRef stencilRef,
178                            LLVMValueRef stencilVals)
179 
180 {
181    LLVMBuilderRef builder = bld->gallivm->builder;
182    struct lp_type type = bld->type;
183    LLVMValueRef res;
184    LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff);
185    unsigned stencil_op;
186 
187    assert(type.sign);
188 
189    switch (op) {
190    case S_FAIL_OP:
191       stencil_op = stencil->fail_op;
192       break;
193    case Z_FAIL_OP:
194       stencil_op = stencil->zfail_op;
195       break;
196    case Z_PASS_OP:
197       stencil_op = stencil->zpass_op;
198       break;
199    default:
200       assert(0 && "Invalid stencil_op mode");
201       stencil_op = PIPE_STENCIL_OP_KEEP;
202    }
203 
204    switch (stencil_op) {
205    case PIPE_STENCIL_OP_KEEP:
206       res = stencilVals;
207       /* we can return early for this case */
208       return res;
209    case PIPE_STENCIL_OP_ZERO:
210       res = bld->zero;
211       break;
212    case PIPE_STENCIL_OP_REPLACE:
213       res = stencilRef;
214       break;
215    case PIPE_STENCIL_OP_INCR:
216       res = lp_build_add(bld, stencilVals, bld->one);
217       res = lp_build_min(bld, res, max);
218       break;
219    case PIPE_STENCIL_OP_DECR:
220       res = lp_build_sub(bld, stencilVals, bld->one);
221       res = lp_build_max(bld, res, bld->zero);
222       break;
223    case PIPE_STENCIL_OP_INCR_WRAP:
224       res = lp_build_add(bld, stencilVals, bld->one);
225       res = LLVMBuildAnd(builder, res, max, "");
226       break;
227    case PIPE_STENCIL_OP_DECR_WRAP:
228       res = lp_build_sub(bld, stencilVals, bld->one);
229       res = LLVMBuildAnd(builder, res, max, "");
230       break;
231    case PIPE_STENCIL_OP_INVERT:
232       res = LLVMBuildNot(builder, stencilVals, "");
233       res = LLVMBuildAnd(builder, res, max, "");
234       break;
235    default:
236       assert(0 && "bad stencil op mode");
237       res = bld->undef;
238    }
239 
240    return res;
241 }
242 
243 
244 /**
245  * Do the one or two-sided stencil test op/update.
246  */
247 static LLVMValueRef
lp_build_stencil_op(struct lp_build_context * bld,const struct pipe_stencil_state stencil[2],enum stencil_op op,LLVMValueRef stencilRefs[2],LLVMValueRef stencilVals,LLVMValueRef mask,LLVMValueRef front_facing)248 lp_build_stencil_op(struct lp_build_context *bld,
249                     const struct pipe_stencil_state stencil[2],
250                     enum stencil_op op,
251                     LLVMValueRef stencilRefs[2],
252                     LLVMValueRef stencilVals,
253                     LLVMValueRef mask,
254                     LLVMValueRef front_facing)
255 
256 {
257    LLVMBuilderRef builder = bld->gallivm->builder;
258    LLVMValueRef res;
259 
260    assert(stencil[0].enabled);
261 
262    /* do front face op */
263    res = lp_build_stencil_op_single(bld, &stencil[0], op,
264                                      stencilRefs[0], stencilVals);
265 
266    if (stencil[1].enabled && front_facing != NULL) {
267       /* do back face op */
268       LLVMValueRef back_res;
269 
270       back_res = lp_build_stencil_op_single(bld, &stencil[1], op,
271                                             stencilRefs[1], stencilVals);
272 
273       res = lp_build_select(bld, front_facing, res, back_res);
274    }
275 
276    if (stencil[0].writemask != 0xff ||
277        (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) {
278       /* mask &= stencil[0].writemask */
279       LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
280                                                       stencil[0].writemask);
281       if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) {
282          LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type,
283                                                          stencil[1].writemask);
284          writemask = lp_build_select(bld, front_facing, writemask, back_writemask);
285       }
286 
287       mask = LLVMBuildAnd(builder, mask, writemask, "");
288       /* res = (res & mask) | (stencilVals & ~mask) */
289       res = lp_build_select_bitwise(bld, mask, res, stencilVals);
290    }
291    else {
292       /* res = mask ? res : stencilVals */
293       res = lp_build_select(bld, mask, res, stencilVals);
294    }
295 
296    return res;
297 }
298 
299 
300 
301 /**
302  * Return a type appropriate for depth/stencil testing.
303  */
304 struct lp_type
lp_depth_type(const struct util_format_description * format_desc,unsigned length)305 lp_depth_type(const struct util_format_description *format_desc,
306               unsigned length)
307 {
308    struct lp_type type;
309    unsigned swizzle;
310 
311    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
312    assert(format_desc->block.width == 1);
313    assert(format_desc->block.height == 1);
314 
315    swizzle = format_desc->swizzle[0];
316    assert(swizzle < 4);
317 
318    memset(&type, 0, sizeof type);
319    type.width = format_desc->block.bits;
320 
321    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
322       type.floating = TRUE;
323       assert(swizzle == 0);
324       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
325    }
326    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
327       assert(format_desc->block.bits <= 32);
328       assert(format_desc->channel[swizzle].normalized);
329       if (format_desc->channel[swizzle].size < format_desc->block.bits) {
330          /* Prefer signed integers when possible, as SSE has less support
331           * for unsigned comparison;
332           */
333          type.sign = TRUE;
334       }
335    }
336    else
337       assert(0);
338 
339    assert(type.width <= length);
340    type.length = length / type.width;
341 
342    return type;
343 }
344 
345 
346 /**
347  * Compute bitmask and bit shift to apply to the incoming fragment Z values
348  * and the Z buffer values needed before doing the Z comparison.
349  *
350  * Note that we leave the Z bits in the position that we find them
351  * in the Z buffer (typically 0xffffff00 or 0x00ffffff).  That lets us
352  * get by with fewer bit twiddling steps.
353  */
354 static boolean
get_z_shift_and_mask(const struct util_format_description * format_desc,unsigned * shift,unsigned * width,unsigned * mask)355 get_z_shift_and_mask(const struct util_format_description *format_desc,
356                      unsigned *shift, unsigned *width, unsigned *mask)
357 {
358    const unsigned total_bits = format_desc->block.bits;
359    unsigned z_swizzle;
360    unsigned chan;
361    unsigned padding_left, padding_right;
362 
363    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
364    assert(format_desc->block.width == 1);
365    assert(format_desc->block.height == 1);
366 
367    z_swizzle = format_desc->swizzle[0];
368 
369    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
370       return FALSE;
371 
372    *width = format_desc->channel[z_swizzle].size;
373 
374    padding_right = 0;
375    for (chan = 0; chan < z_swizzle; ++chan)
376       padding_right += format_desc->channel[chan].size;
377 
378    padding_left =
379       total_bits - (padding_right + *width);
380 
381    if (padding_left || padding_right) {
382       unsigned long long mask_left = (1ULL << (total_bits - padding_left)) - 1;
383       unsigned long long mask_right = (1ULL << (padding_right)) - 1;
384       *mask = mask_left ^ mask_right;
385    }
386    else {
387       *mask = 0xffffffff;
388    }
389 
390    *shift = padding_right;
391 
392    return TRUE;
393 }
394 
395 
396 /**
397  * Compute bitmask and bit shift to apply to the framebuffer pixel values
398  * to put the stencil bits in the least significant position.
399  * (i.e. 0x000000ff)
400  */
401 static boolean
get_s_shift_and_mask(const struct util_format_description * format_desc,unsigned * shift,unsigned * mask)402 get_s_shift_and_mask(const struct util_format_description *format_desc,
403                      unsigned *shift, unsigned *mask)
404 {
405    unsigned s_swizzle;
406    unsigned chan, sz;
407 
408    s_swizzle = format_desc->swizzle[1];
409 
410    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
411       return FALSE;
412 
413    *shift = 0;
414    for (chan = 0; chan < s_swizzle; chan++)
415       *shift += format_desc->channel[chan].size;
416 
417    sz = format_desc->channel[s_swizzle].size;
418    *mask = (1U << sz) - 1U;
419 
420    return TRUE;
421 }
422 
423 
424 /**
425  * Perform the occlusion test and increase the counter.
426  * Test the depth mask. Add the number of channel which has none zero mask
427  * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}.
428  * The counter will add 4.
429  *
430  * \param type holds element type of the mask vector.
431  * \param maskvalue is the depth test mask.
432  * \param counter is a pointer of the uint32 counter.
433  */
434 void
lp_build_occlusion_count(struct gallivm_state * gallivm,struct lp_type type,LLVMValueRef maskvalue,LLVMValueRef counter)435 lp_build_occlusion_count(struct gallivm_state *gallivm,
436                          struct lp_type type,
437                          LLVMValueRef maskvalue,
438                          LLVMValueRef counter)
439 {
440    LLVMBuilderRef builder = gallivm->builder;
441    LLVMContextRef context = gallivm->context;
442    LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1);
443    LLVMValueRef count, newcount;
444 
445    assert(type.length <= 16);
446    assert(type.floating);
447 
448    if(util_cpu_caps.has_sse && type.length == 4) {
449       const char *movmskintr = "llvm.x86.sse.movmsk.ps";
450       const char *popcntintr = "llvm.ctpop.i32";
451       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
452                                            lp_build_vec_type(gallivm, type), "");
453       bits = lp_build_intrinsic_unary(builder, movmskintr,
454                                       LLVMInt32TypeInContext(context), bits);
455       count = lp_build_intrinsic_unary(builder, popcntintr,
456                                        LLVMInt32TypeInContext(context), bits);
457    }
458    else if(util_cpu_caps.has_avx && type.length == 8) {
459       const char *movmskintr = "llvm.x86.avx.movmsk.ps.256";
460       const char *popcntintr = "llvm.ctpop.i32";
461       LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue,
462                                            lp_build_vec_type(gallivm, type), "");
463       bits = lp_build_intrinsic_unary(builder, movmskintr,
464                                       LLVMInt32TypeInContext(context), bits);
465       count = lp_build_intrinsic_unary(builder, popcntintr,
466                                        LLVMInt32TypeInContext(context), bits);
467    }
468    else {
469       unsigned i;
470       LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv");
471       LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8);
472       LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4);
473       LLVMValueRef shufflev, countd;
474       LLVMValueRef shuffles[16];
475       const char *popcntintr = NULL;
476 
477       countv = LLVMBuildBitCast(builder, countv, i8vntype, "");
478 
479        for (i = 0; i < type.length; i++) {
480           shuffles[i] = lp_build_const_int32(gallivm, 4*i);
481        }
482 
483        shufflev = LLVMConstVector(shuffles, type.length);
484        countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, "");
485        countd = LLVMBuildBitCast(builder, countd, counttype, "countd");
486 
487        /*
488         * XXX FIXME
489         * this is bad on cpus without popcount (on x86 supported by intel
490         * nehalem, amd barcelona, and up - not tied to sse42).
491         * Would be much faster to just sum the 4 elements of the vector with
492         * some horizontal add (shuffle/add/shuffle/add after the initial and).
493         */
494        switch (type.length) {
495        case 4:
496           popcntintr = "llvm.ctpop.i32";
497           break;
498        case 8:
499           popcntintr = "llvm.ctpop.i64";
500           break;
501        case 16:
502           popcntintr = "llvm.ctpop.i128";
503           break;
504        default:
505           assert(0);
506        }
507        count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd);
508 
509        if (type.length > 4) {
510           count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 32), "");
511        }
512    }
513    newcount = LLVMBuildLoad(builder, counter, "origcount");
514    newcount = LLVMBuildAdd(builder, newcount, count, "newcount");
515    LLVMBuildStore(builder, newcount, counter);
516 }
517 
518 
519 
520 /**
521  * Generate code for performing depth and/or stencil tests.
522  * We operate on a vector of values (typically n 2x2 quads).
523  *
524  * \param depth  the depth test state
525  * \param stencil  the front/back stencil state
526  * \param type  the data type of the fragment depth/stencil values
527  * \param format_desc  description of the depth/stencil surface
528  * \param mask  the alive/dead pixel mask for the quad (vector)
529  * \param stencil_refs  the front/back stencil ref values (scalar)
530  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
531  * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
532  * \param face  contains boolean value indicating front/back facing polygon
533  */
534 void
lp_build_depth_stencil_test(struct gallivm_state * gallivm,const struct pipe_depth_state * depth,const struct pipe_stencil_state stencil[2],struct lp_type z_src_type,const struct util_format_description * format_desc,struct lp_build_mask_context * mask,LLVMValueRef stencil_refs[2],LLVMValueRef z_src,LLVMValueRef zs_dst_ptr,LLVMValueRef face,LLVMValueRef * zs_value,boolean do_branch)535 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
536                             const struct pipe_depth_state *depth,
537                             const struct pipe_stencil_state stencil[2],
538                             struct lp_type z_src_type,
539                             const struct util_format_description *format_desc,
540                             struct lp_build_mask_context *mask,
541                             LLVMValueRef stencil_refs[2],
542                             LLVMValueRef z_src,
543                             LLVMValueRef zs_dst_ptr,
544                             LLVMValueRef face,
545                             LLVMValueRef *zs_value,
546                             boolean do_branch)
547 {
548    LLVMBuilderRef builder = gallivm->builder;
549    struct lp_type z_type;
550    struct lp_build_context z_bld;
551    struct lp_build_context s_bld;
552    struct lp_type s_type;
553    unsigned z_shift = 0, z_width = 0, z_mask = 0;
554    LLVMValueRef zs_dst, z_dst = NULL;
555    LLVMValueRef stencil_vals = NULL;
556    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
557    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
558    LLVMValueRef orig_mask = lp_build_mask_value(mask);
559    LLVMValueRef front_facing = NULL;
560 
561 
562    /*
563     * Depths are expected to be between 0 and 1, even if they are stored in
564     * floats. Setting these bits here will ensure that the lp_build_conv() call
565     * below won't try to unnecessarily clamp the incoming values.
566     */
567    if(z_src_type.floating) {
568       z_src_type.sign = FALSE;
569       z_src_type.norm = TRUE;
570    }
571    else {
572       assert(!z_src_type.sign);
573       assert(z_src_type.norm);
574    }
575 
576    /* Pick the depth type. */
577    z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
578 
579    /* FIXME: Cope with a depth test type with a different bit width. */
580    assert(z_type.width == z_src_type.width);
581    assert(z_type.length == z_src_type.length);
582 
583    /* FIXME: for non-float depth/stencil might generate better code
584     * if we'd always split it up to use 128bit operations.
585     * For stencil we'd almost certainly want to pack to 8xi16 values,
586     * for z just run twice.
587     */
588 
589    /* Sanity checking */
590    {
591       const unsigned z_swizzle = format_desc->swizzle[0];
592       const unsigned s_swizzle = format_desc->swizzle[1];
593 
594       assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE ||
595              s_swizzle != UTIL_FORMAT_SWIZZLE_NONE);
596 
597       assert(depth->enabled || stencil[0].enabled);
598 
599       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
600       assert(format_desc->block.width == 1);
601       assert(format_desc->block.height == 1);
602 
603       if (stencil[0].enabled) {
604          assert(format_desc->format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
605                 format_desc->format == PIPE_FORMAT_S8_UINT_Z24_UNORM);
606       }
607 
608       assert(z_swizzle < 4);
609       assert(format_desc->block.bits == z_type.width);
610       if (z_type.floating) {
611          assert(z_swizzle == 0);
612          assert(format_desc->channel[z_swizzle].type ==
613                 UTIL_FORMAT_TYPE_FLOAT);
614          assert(format_desc->channel[z_swizzle].size ==
615                 format_desc->block.bits);
616       }
617       else {
618          assert(format_desc->channel[z_swizzle].type ==
619                 UTIL_FORMAT_TYPE_UNSIGNED);
620          assert(format_desc->channel[z_swizzle].normalized);
621          assert(!z_type.fixed);
622       }
623    }
624 
625 
626    /* Setup build context for Z vals */
627    lp_build_context_init(&z_bld, gallivm, z_type);
628 
629    /* Setup build context for stencil vals */
630    s_type = lp_int_type(z_type);
631    lp_build_context_init(&s_bld, gallivm, s_type);
632 
633    /* Load current z/stencil value from z/stencil buffer */
634    zs_dst_ptr = LLVMBuildBitCast(builder,
635                                  zs_dst_ptr,
636                                  LLVMPointerType(z_bld.vec_type, 0), "");
637    zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
638 
639    lp_build_name(zs_dst, "zs_dst");
640 
641 
642    /* Compute and apply the Z/stencil bitmasks and shifts.
643     */
644    {
645       unsigned s_shift, s_mask;
646 
647       if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
648          if (z_mask != 0xffffffff) {
649             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
650          }
651 
652          /*
653           * Align the framebuffer Z 's LSB to the right.
654           */
655          if (z_shift) {
656             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
657             z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
658          } else if (z_bitmask) {
659 	    /* TODO: Instead of loading a mask from memory and ANDing, it's
660 	     * probably faster to just shake the bits with two shifts. */
661             z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
662          } else {
663             z_dst = zs_dst;
664             lp_build_name(z_dst, "z_dst");
665          }
666       }
667 
668       if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
669          if (s_shift) {
670             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
671             stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
672             stencil_shift = shift;  /* used below */
673          }
674          else {
675             stencil_vals = zs_dst;
676          }
677 
678          if (s_mask != 0xffffffff) {
679             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
680             stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, "");
681          }
682 
683          lp_build_name(stencil_vals, "s_dst");
684       }
685    }
686 
687    if (stencil[0].enabled) {
688 
689       if (face) {
690          LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
691 
692          /* front_facing = face != 0 ? ~0 : 0 */
693          front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, "");
694          front_facing = LLVMBuildSExt(builder, front_facing,
695                                       LLVMIntTypeInContext(gallivm->context,
696                                              s_bld.type.length*s_bld.type.width),
697                                       "");
698          front_facing = LLVMBuildBitCast(builder, front_facing,
699                                          s_bld.int_vec_type, "");
700       }
701 
702       /* convert scalar stencil refs into vectors */
703       stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]);
704       stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]);
705 
706       s_pass_mask = lp_build_stencil_test(&s_bld, stencil,
707                                           stencil_refs, stencil_vals,
708                                           front_facing);
709 
710       /* apply stencil-fail operator */
711       {
712          LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, orig_mask, s_pass_mask);
713          stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP,
714                                             stencil_refs, stencil_vals,
715                                             s_fail_mask, front_facing);
716       }
717    }
718 
719    if (depth->enabled) {
720       /*
721        * Convert fragment Z to the desired type, aligning the LSB to the right.
722        */
723 
724       assert(z_type.width == z_src_type.width);
725       assert(z_type.length == z_src_type.length);
726       assert(lp_check_value(z_src_type, z_src));
727       if (z_src_type.floating) {
728          /*
729           * Convert from floating point values
730           */
731 
732          if (!z_type.floating) {
733             z_src = lp_build_clamped_float_to_unsigned_norm(gallivm,
734                                                             z_src_type,
735                                                             z_width,
736                                                             z_src);
737          }
738       } else {
739          /*
740           * Convert from unsigned normalized values.
741           */
742 
743          assert(!z_src_type.sign);
744          assert(!z_src_type.fixed);
745          assert(z_src_type.norm);
746          assert(!z_type.floating);
747          if (z_src_type.width > z_width) {
748             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type,
749                                                         z_src_type.width - z_width);
750             z_src = LLVMBuildLShr(builder, z_src, shift, "");
751          }
752       }
753       assert(lp_check_value(z_type, z_src));
754 
755       lp_build_name(z_src, "z_src");
756 
757       /* compare src Z to dst Z, returning 'pass' mask */
758       z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst);
759 
760       if (!stencil[0].enabled) {
761          /* We can potentially skip all remaining operations here, but only
762           * if stencil is disabled because we still need to update the stencil
763           * buffer values.  Don't need to update Z buffer values.
764           */
765          lp_build_mask_update(mask, z_pass);
766 
767          if (do_branch) {
768             lp_build_mask_check(mask);
769             do_branch = FALSE;
770          }
771       }
772 
773       if (depth->writemask) {
774          LLVMValueRef zselectmask;
775 
776          /* mask off bits that failed Z test */
777          zselectmask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
778 
779          /* mask off bits that failed stencil test */
780          if (s_pass_mask) {
781             zselectmask = LLVMBuildAnd(builder, zselectmask, s_pass_mask, "");
782          }
783 
784          /* Mix the old and new Z buffer values.
785           * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i]
786           */
787          z_dst = lp_build_select(&z_bld, zselectmask, z_src, z_dst);
788       }
789 
790       if (stencil[0].enabled) {
791          /* update stencil buffer values according to z pass/fail result */
792          LLVMValueRef z_fail_mask, z_pass_mask;
793 
794          /* apply Z-fail operator */
795          z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
796          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
797                                             stencil_refs, stencil_vals,
798                                             z_fail_mask, front_facing);
799 
800          /* apply Z-pass operator */
801          z_pass_mask = LLVMBuildAnd(builder, orig_mask, z_pass, "");
802          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
803                                             stencil_refs, stencil_vals,
804                                             z_pass_mask, front_facing);
805       }
806    }
807    else {
808       /* No depth test: apply Z-pass operator to stencil buffer values which
809        * passed the stencil test.
810        */
811       s_pass_mask = LLVMBuildAnd(builder, orig_mask, s_pass_mask, "");
812       stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP,
813                                          stencil_refs, stencil_vals,
814                                          s_pass_mask, front_facing);
815    }
816 
817    /* Put Z and ztencil bits in the right place */
818    if (z_dst && z_shift) {
819       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
820       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
821    }
822    if (stencil_vals && stencil_shift)
823       stencil_vals = LLVMBuildShl(builder, stencil_vals,
824                                   stencil_shift, "");
825 
826    /* Finally, merge/store the z/stencil values */
827    if ((depth->enabled && depth->writemask) ||
828        (stencil[0].enabled && stencil[0].writemask)) {
829 
830       if (z_dst && stencil_vals)
831          zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
832       else if (z_dst)
833          zs_dst = z_dst;
834       else
835          zs_dst = stencil_vals;
836 
837       *zs_value = zs_dst;
838    }
839 
840    if (s_pass_mask)
841       lp_build_mask_update(mask, s_pass_mask);
842 
843    if (depth->enabled && stencil[0].enabled)
844       lp_build_mask_update(mask, z_pass);
845 
846    if (do_branch)
847       lp_build_mask_check(mask);
848 
849 }
850 
851 
852 void
lp_build_depth_write(LLVMBuilderRef builder,const struct util_format_description * format_desc,LLVMValueRef zs_dst_ptr,LLVMValueRef zs_value)853 lp_build_depth_write(LLVMBuilderRef builder,
854                      const struct util_format_description *format_desc,
855                      LLVMValueRef zs_dst_ptr,
856                      LLVMValueRef zs_value)
857 {
858    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
859                                  LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
860 
861    LLVMBuildStore(builder, zs_value, zs_dst_ptr);
862 }
863 
864 
865 void
lp_build_deferred_depth_write(struct gallivm_state * gallivm,struct lp_type z_src_type,const struct util_format_description * format_desc,struct lp_build_mask_context * mask,LLVMValueRef zs_dst_ptr,LLVMValueRef zs_value)866 lp_build_deferred_depth_write(struct gallivm_state *gallivm,
867                               struct lp_type z_src_type,
868                               const struct util_format_description *format_desc,
869                               struct lp_build_mask_context *mask,
870                               LLVMValueRef zs_dst_ptr,
871                               LLVMValueRef zs_value)
872 {
873    struct lp_type z_type;
874    struct lp_build_context z_bld;
875    LLVMValueRef z_dst;
876    LLVMBuilderRef builder = gallivm->builder;
877 
878    /* XXX: pointlessly redo type logic:
879     */
880    z_type = lp_depth_type(format_desc, z_src_type.width*z_src_type.length);
881    lp_build_context_init(&z_bld, gallivm, z_type);
882 
883    zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
884                                  LLVMPointerType(z_bld.vec_type, 0), "");
885 
886    z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
887    z_dst = lp_build_select(&z_bld, lp_build_mask_value(mask), zs_value, z_dst);
888 
889    LLVMBuildStore(builder, z_dst, zs_dst_ptr);
890 }
891