1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file depthstencil.h
24 *
25 * @brief Implements depth/stencil functionality
26 *
27 ******************************************************************************/
28 #pragma once
29 #include "common/os.h"
30 #include "format_conversion.h"
31 
32 INLINE
StencilOp(SWR_STENCILOP op,simdscalar const & mask,simdscalar const & stencilRefps,simdscalar & stencilps)33 void StencilOp(SWR_STENCILOP op, simdscalar const &mask, simdscalar const &stencilRefps, simdscalar &stencilps)
34 {
35     simdscalari stencil = _simd_castps_si(stencilps);
36 
37     switch (op)
38     {
39     case STENCILOP_KEEP:
40         break;
41     case STENCILOP_ZERO:
42         stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
43         break;
44     case STENCILOP_REPLACE:
45         stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
46         break;
47     case STENCILOP_INCRSAT:
48     {
49         simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
50         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
51         break;
52     }
53     case STENCILOP_DECRSAT:
54     {
55         simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
56         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
57         break;
58     }
59     case STENCILOP_INCR:
60     {
61         simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
62         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
63         break;
64     }
65     case STENCILOP_DECR:
66     {
67         simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
68         stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
69         break;
70     }
71     case STENCILOP_INVERT:
72     {
73         simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
74         stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
75         break;
76     }
77     default:
78         break;
79     }
80 }
81 
82 
83 template<SWR_FORMAT depthFormatT>
QuantizeDepth(simdscalar const & depth)84 simdscalar QuantizeDepth(simdscalar const &depth)
85 {
86     SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
87     uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
88 
89     if (depthType == SWR_TYPE_FLOAT)
90     {
91         // assume only 32bit float depth supported
92         SWR_ASSERT(depthBpc == 32);
93 
94         // matches shader precision, no quantizing needed
95         return depth;
96     }
97 
98     // should be unorm depth if not float
99     SWR_ASSERT(depthType == SWR_TYPE_UNORM);
100 
101     float quantize = (float)((1 << depthBpc) - 1);
102     simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
103     result = _simd_add_ps(result, _simd_set1_ps(0.5f));
104     result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
105 
106     if (depthBpc > 16)
107     {
108         result = _simd_div_ps(result, _simd_set1_ps(quantize));
109     }
110     else
111     {
112         result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
113     }
114 
115     return result;
116 }
117 
118 INLINE
DepthStencilTest(const API_STATE * pState,bool frontFacing,uint32_t viewportIndex,simdscalar const & iZ,uint8_t * pDepthBase,simdscalar const & coverageMask,uint8_t * pStencilBase,simdscalar * pStencilMask)119 simdscalar DepthStencilTest(const API_STATE* pState,
120                  bool frontFacing, uint32_t viewportIndex, simdscalar const &iZ, uint8_t* pDepthBase, simdscalar const &coverageMask,
121                  uint8_t *pStencilBase, simdscalar* pStencilMask)
122 {
123     static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
124     static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
125 
126     const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
127     const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
128 
129     simdscalar depthResult = _simd_set1_ps(-1.0f);
130     simdscalar zbuf;
131 
132     // clamp Z to viewport [minZ..maxZ]
133     simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
134     simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
135     simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
136 
137     if (pDSState->depthTestEnable)
138     {
139         switch (pDSState->depthTestFunc)
140         {
141         case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
142         case ZFUNC_ALWAYS: break;
143         default:
144             zbuf = _simd_load_ps((const float*)pDepthBase);
145         }
146 
147         switch (pDSState->depthTestFunc)
148         {
149         case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
150         case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
151         case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
152         case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
153         case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
154         case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
155         }
156     }
157 
158     simdscalar stencilMask = _simd_set1_ps(-1.0f);
159 
160     if (pDSState->stencilTestEnable)
161     {
162         uint8_t stencilRefValue;
163         uint32_t stencilTestFunc;
164         uint8_t stencilTestMask;
165         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
166         {
167             stencilRefValue = pDSState->stencilRefValue;
168             stencilTestFunc = pDSState->stencilTestFunc;
169             stencilTestMask = pDSState->stencilTestMask;
170         }
171         else
172         {
173             stencilRefValue = pDSState->backfaceStencilRefValue;
174             stencilTestFunc = pDSState->backfaceStencilTestFunc;
175             stencilTestMask = pDSState->backfaceStencilTestMask;
176         }
177 
178         simdvector sbuf;
179         simdscalar stencilWithMask;
180         simdscalar stencilRef;
181         switch(stencilTestFunc)
182         {
183         case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
184         case ZFUNC_ALWAYS: break;
185         default:
186             LoadSOA<R8_UINT>(pStencilBase, sbuf);
187 
188             // apply stencil read mask
189             stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
190 
191             // do stencil compare in float to avoid simd integer emulation in AVX1
192             stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
193 
194             stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
195             break;
196         }
197 
198         switch(stencilTestFunc)
199         {
200         case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
201         case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
202         case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
203         case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
204         case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
205         case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
206         }
207     }
208 
209     simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
210     depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
211 
212     *pStencilMask = stencilMask;
213     return depthWriteMask;
214 }
215 
216 INLINE
DepthStencilWrite(const SWR_VIEWPORT * pViewport,const SWR_DEPTH_STENCIL_STATE * pDSState,bool frontFacing,simdscalar const & iZ,uint8_t * pDepthBase,const simdscalar & depthMask,const simdscalar & coverageMask,uint8_t * pStencilBase,const simdscalar & stencilMask)217 void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
218         bool frontFacing, simdscalar const &iZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
219         uint8_t *pStencilBase, const simdscalar& stencilMask)
220 {
221     if (pDSState->depthWriteEnable)
222     {
223         // clamp Z to viewport [minZ..maxZ]
224         simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
225         simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
226         simdscalar interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, iZ));
227 
228         simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
229         _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
230     }
231 
232     if (pDSState->stencilWriteEnable)
233     {
234         simdvector sbuf;
235         LoadSOA<R8_UINT>(pStencilBase, sbuf);
236         simdscalar stencilbuf = sbuf.v[0];
237 
238         uint8_t stencilRefValue;
239         uint32_t stencilFailOp;
240         uint32_t stencilPassDepthPassOp;
241         uint32_t stencilPassDepthFailOp;
242         uint8_t stencilWriteMask;
243         if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
244         {
245             stencilRefValue = pDSState->stencilRefValue;
246             stencilFailOp = pDSState->stencilFailOp;
247             stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
248             stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
249             stencilWriteMask = pDSState->stencilWriteMask;
250         }
251         else
252         {
253             stencilRefValue = pDSState->backfaceStencilRefValue;
254             stencilFailOp = pDSState->backfaceStencilFailOp;
255             stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
256             stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
257             stencilWriteMask = pDSState->backfaceStencilWriteMask;
258         }
259 
260         simdscalar stencilps = stencilbuf;
261         simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
262 
263         simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
264         simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
265         simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
266 
267         simdscalar origStencil = stencilps;
268 
269         StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
270         StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
271         StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
272 
273         // apply stencil write mask
274         simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
275         stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
276         stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
277 
278         simdvector stencilResult;
279         stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
280         StoreSOA<R8_UINT>(stencilResult, pStencilBase);
281     }
282 
283 }
284