1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file binner.h
24 *
25 * @brief Declaration for the macrotile binner
26 *
27 ******************************************************************************/
28 #include "state.h"
29 #include "conservativeRast.h"
30 #include "utils.h"
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Offsets added to post-viewport vertex positions based on
33 /// raster state.
34 ///
35 /// Can't use templated variable because we must stick with C++11 features.
36 /// Template variables were introduced with C++14
37 template <typename SIMD_T>
38 struct SwrPixelOffsets
39 {
40 public:
GetOffsetSwrPixelOffsets41     INLINE static typename SIMD_T::Float GetOffset(uint32_t loc)
42     {
43         SWR_ASSERT(loc <= 1);
44 
45         return SIMD_T::set1_ps(loc ? 0.5f : 0.0f);
46     }
47 };
48 
49 //////////////////////////////////////////////////////////////////////////
50 /// @brief Convert the X,Y coords of a triangle to the requested Fixed
51 /// Point precision from FP32.
52 template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
fpToFixedPointVertical(const typename SIMD_T::Float & vIn)53 INLINE typename SIMD_T::Integer fpToFixedPointVertical(const typename SIMD_T::Float &vIn)
54 {
55     return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
56 }
57 
58 //////////////////////////////////////////////////////////////////////////
59 /// @brief Helper function to set the X,Y coords of a triangle to the
60 /// requested Fixed Point precision from FP32.
61 /// @param tri: simdvector[3] of FP triangle verts
62 /// @param vXi: fixed point X coords of tri verts
63 /// @param vYi: fixed point Y coords of tri verts
64 template <typename SIMD_T>
FPToFixedPoint(const typename SIMD_T::Vec4 * const tri,typename SIMD_T::Integer (& vXi)[3],typename SIMD_T::Integer (& vYi)[3])65 INLINE static void FPToFixedPoint(const typename SIMD_T::Vec4 *const tri, typename SIMD_T::Integer(&vXi)[3], typename SIMD_T::Integer(&vYi)[3])
66 {
67     vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
68     vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
69     vXi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].x);
70     vYi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].y);
71     vXi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].x);
72     vYi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].y);
73 }
74 
75 //////////////////////////////////////////////////////////////////////////
76 /// @brief Calculate bounding box for current triangle
77 /// @tparam CT: ConservativeRastFETraits type
78 /// @param vX: fixed point X position for triangle verts
79 /// @param vY: fixed point Y position for triangle verts
80 /// @param bbox: fixed point bbox
81 /// *Note*: expects vX, vY to be in the correct precision for the type
82 /// of rasterization. This avoids unnecessary FP->fixed conversions.
83 template <typename SIMD_T, typename CT>
calcBoundingBoxIntVertical(const typename SIMD_T::Integer (& vX)[3],const typename SIMD_T::Integer (& vY)[3],SIMDBBOX_T<SIMD_T> & bbox)84 INLINE void calcBoundingBoxIntVertical(const typename SIMD_T::Integer(&vX)[3], const typename SIMD_T::Integer(&vY)[3], SIMDBBOX_T<SIMD_T> &bbox)
85 {
86     typename SIMD_T::Integer vMinX = vX[0];
87 
88     vMinX = SIMD_T::min_epi32(vMinX, vX[1]);
89     vMinX = SIMD_T::min_epi32(vMinX, vX[2]);
90 
91     typename SIMD_T::Integer vMaxX = vX[0];
92 
93     vMaxX = SIMD_T::max_epi32(vMaxX, vX[1]);
94     vMaxX = SIMD_T::max_epi32(vMaxX, vX[2]);
95 
96     typename SIMD_T::Integer vMinY = vY[0];
97 
98     vMinY = SIMD_T::min_epi32(vMinY, vY[1]);
99     vMinY = SIMD_T::min_epi32(vMinY, vY[2]);
100 
101     typename SIMD_T::Integer vMaxY = vY[0];
102 
103     vMaxY = SIMD_T::max_epi32(vMaxY, vY[1]);
104     vMaxY = SIMD_T::max_epi32(vMaxY, vY[2]);
105 
106     if (CT::BoundingBoxOffsetT::value != 0)
107     {
108         /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative rasterization
109         /// expand bbox by 1/256; coverage will be correctly handled in the rasterizer.
110 
111         const typename SIMD_T::Integer value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
112 
113         vMinX = SIMD_T::sub_epi32(vMinX, value);
114         vMaxX = SIMD_T::add_epi32(vMaxX, value);
115         vMinY = SIMD_T::sub_epi32(vMinY, value);
116         vMaxY = SIMD_T::add_epi32(vMaxY, value);
117     }
118 
119     bbox.xmin = vMinX;
120     bbox.xmax = vMaxX;
121     bbox.ymin = vMinY;
122     bbox.ymax = vMaxY;
123 }
124 
125 //////////////////////////////////////////////////////////////////////////
126 /// @brief  Gather scissor rect data based on per-prim viewport indices.
127 /// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
128 /// @param pViewportIndex - array of per-primitive vewport indexes.
129 /// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
130 /// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
131 /// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
132 /// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
133 //
134 /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simdscalari & scisXmin,simdscalari & scisYmin,simdscalari & scisXmax,simdscalari & scisYmax)135 static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
136     simdscalari &scisXmin, simdscalari &scisYmin, simdscalari &scisXmax, simdscalari &scisYmax)
137 {
138     scisXmin = _simd_set_epi32(
139         pScissorsInFixedPoint[pViewportIndex[7]].xmin,
140         pScissorsInFixedPoint[pViewportIndex[6]].xmin,
141         pScissorsInFixedPoint[pViewportIndex[5]].xmin,
142         pScissorsInFixedPoint[pViewportIndex[4]].xmin,
143         pScissorsInFixedPoint[pViewportIndex[3]].xmin,
144         pScissorsInFixedPoint[pViewportIndex[2]].xmin,
145         pScissorsInFixedPoint[pViewportIndex[1]].xmin,
146         pScissorsInFixedPoint[pViewportIndex[0]].xmin);
147     scisYmin = _simd_set_epi32(
148         pScissorsInFixedPoint[pViewportIndex[7]].ymin,
149         pScissorsInFixedPoint[pViewportIndex[6]].ymin,
150         pScissorsInFixedPoint[pViewportIndex[5]].ymin,
151         pScissorsInFixedPoint[pViewportIndex[4]].ymin,
152         pScissorsInFixedPoint[pViewportIndex[3]].ymin,
153         pScissorsInFixedPoint[pViewportIndex[2]].ymin,
154         pScissorsInFixedPoint[pViewportIndex[1]].ymin,
155         pScissorsInFixedPoint[pViewportIndex[0]].ymin);
156     scisXmax = _simd_set_epi32(
157         pScissorsInFixedPoint[pViewportIndex[7]].xmax,
158         pScissorsInFixedPoint[pViewportIndex[6]].xmax,
159         pScissorsInFixedPoint[pViewportIndex[5]].xmax,
160         pScissorsInFixedPoint[pViewportIndex[4]].xmax,
161         pScissorsInFixedPoint[pViewportIndex[3]].xmax,
162         pScissorsInFixedPoint[pViewportIndex[2]].xmax,
163         pScissorsInFixedPoint[pViewportIndex[1]].xmax,
164         pScissorsInFixedPoint[pViewportIndex[0]].xmax);
165     scisYmax = _simd_set_epi32(
166         pScissorsInFixedPoint[pViewportIndex[7]].ymax,
167         pScissorsInFixedPoint[pViewportIndex[6]].ymax,
168         pScissorsInFixedPoint[pViewportIndex[5]].ymax,
169         pScissorsInFixedPoint[pViewportIndex[4]].ymax,
170         pScissorsInFixedPoint[pViewportIndex[3]].ymax,
171         pScissorsInFixedPoint[pViewportIndex[2]].ymax,
172         pScissorsInFixedPoint[pViewportIndex[1]].ymax,
173         pScissorsInFixedPoint[pViewportIndex[0]].ymax);
174 }
175 
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simd16scalari & scisXmin,simd16scalari & scisYmin,simd16scalari & scisXmax,simd16scalari & scisYmax)176 static void GatherScissors(const SWR_RECT *pScissorsInFixedPoint, const uint32_t *pViewportIndex,
177     simd16scalari &scisXmin, simd16scalari &scisYmin, simd16scalari &scisXmax, simd16scalari &scisYmax)
178 {
179     scisXmin = _simd16_set_epi32(
180         pScissorsInFixedPoint[pViewportIndex[15]].xmin,
181         pScissorsInFixedPoint[pViewportIndex[14]].xmin,
182         pScissorsInFixedPoint[pViewportIndex[13]].xmin,
183         pScissorsInFixedPoint[pViewportIndex[12]].xmin,
184         pScissorsInFixedPoint[pViewportIndex[11]].xmin,
185         pScissorsInFixedPoint[pViewportIndex[10]].xmin,
186         pScissorsInFixedPoint[pViewportIndex[9]].xmin,
187         pScissorsInFixedPoint[pViewportIndex[8]].xmin,
188         pScissorsInFixedPoint[pViewportIndex[7]].xmin,
189         pScissorsInFixedPoint[pViewportIndex[6]].xmin,
190         pScissorsInFixedPoint[pViewportIndex[5]].xmin,
191         pScissorsInFixedPoint[pViewportIndex[4]].xmin,
192         pScissorsInFixedPoint[pViewportIndex[3]].xmin,
193         pScissorsInFixedPoint[pViewportIndex[2]].xmin,
194         pScissorsInFixedPoint[pViewportIndex[1]].xmin,
195         pScissorsInFixedPoint[pViewportIndex[0]].xmin);
196 
197     scisYmin = _simd16_set_epi32(
198         pScissorsInFixedPoint[pViewportIndex[15]].ymin,
199         pScissorsInFixedPoint[pViewportIndex[14]].ymin,
200         pScissorsInFixedPoint[pViewportIndex[13]].ymin,
201         pScissorsInFixedPoint[pViewportIndex[12]].ymin,
202         pScissorsInFixedPoint[pViewportIndex[11]].ymin,
203         pScissorsInFixedPoint[pViewportIndex[10]].ymin,
204         pScissorsInFixedPoint[pViewportIndex[9]].ymin,
205         pScissorsInFixedPoint[pViewportIndex[8]].ymin,
206         pScissorsInFixedPoint[pViewportIndex[7]].ymin,
207         pScissorsInFixedPoint[pViewportIndex[6]].ymin,
208         pScissorsInFixedPoint[pViewportIndex[5]].ymin,
209         pScissorsInFixedPoint[pViewportIndex[4]].ymin,
210         pScissorsInFixedPoint[pViewportIndex[3]].ymin,
211         pScissorsInFixedPoint[pViewportIndex[2]].ymin,
212         pScissorsInFixedPoint[pViewportIndex[1]].ymin,
213         pScissorsInFixedPoint[pViewportIndex[0]].ymin);
214 
215     scisXmax = _simd16_set_epi32(
216         pScissorsInFixedPoint[pViewportIndex[15]].xmax,
217         pScissorsInFixedPoint[pViewportIndex[14]].xmax,
218         pScissorsInFixedPoint[pViewportIndex[13]].xmax,
219         pScissorsInFixedPoint[pViewportIndex[12]].xmax,
220         pScissorsInFixedPoint[pViewportIndex[11]].xmax,
221         pScissorsInFixedPoint[pViewportIndex[10]].xmax,
222         pScissorsInFixedPoint[pViewportIndex[9]].xmax,
223         pScissorsInFixedPoint[pViewportIndex[8]].xmax,
224         pScissorsInFixedPoint[pViewportIndex[7]].xmax,
225         pScissorsInFixedPoint[pViewportIndex[6]].xmax,
226         pScissorsInFixedPoint[pViewportIndex[5]].xmax,
227         pScissorsInFixedPoint[pViewportIndex[4]].xmax,
228         pScissorsInFixedPoint[pViewportIndex[3]].xmax,
229         pScissorsInFixedPoint[pViewportIndex[2]].xmax,
230         pScissorsInFixedPoint[pViewportIndex[1]].xmax,
231         pScissorsInFixedPoint[pViewportIndex[0]].xmax);
232 
233     scisYmax = _simd16_set_epi32(
234         pScissorsInFixedPoint[pViewportIndex[15]].ymax,
235         pScissorsInFixedPoint[pViewportIndex[14]].ymax,
236         pScissorsInFixedPoint[pViewportIndex[13]].ymax,
237         pScissorsInFixedPoint[pViewportIndex[12]].ymax,
238         pScissorsInFixedPoint[pViewportIndex[11]].ymax,
239         pScissorsInFixedPoint[pViewportIndex[10]].ymax,
240         pScissorsInFixedPoint[pViewportIndex[9]].ymax,
241         pScissorsInFixedPoint[pViewportIndex[8]].ymax,
242         pScissorsInFixedPoint[pViewportIndex[7]].ymax,
243         pScissorsInFixedPoint[pViewportIndex[6]].ymax,
244         pScissorsInFixedPoint[pViewportIndex[5]].ymax,
245         pScissorsInFixedPoint[pViewportIndex[4]].ymax,
246         pScissorsInFixedPoint[pViewportIndex[3]].ymax,
247         pScissorsInFixedPoint[pViewportIndex[2]].ymax,
248         pScissorsInFixedPoint[pViewportIndex[1]].ymax,
249         pScissorsInFixedPoint[pViewportIndex[0]].ymax);
250 }