1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file binner.h
24  *
25  * @brief Declaration for the macrotile binner
26  *
27  ******************************************************************************/
28 #include "state.h"
29 #include "conservativeRast.h"
30 #include "utils.h"
31 //////////////////////////////////////////////////////////////////////////
32 /// @brief Offsets added to post-viewport vertex positions based on
33 /// raster state.
34 ///
35 /// Can't use templated variable because we must stick with C++11 features.
36 /// Template variables were introduced with C++14
37 template <typename SIMD_T>
38 struct SwrPixelOffsets
39 {
40 public:
GetOffsetSwrPixelOffsets41     INLINE static Float<SIMD_T> GetOffset(uint32_t loc)
42     {
43         SWR_ASSERT(loc <= 1);
44 
45         return SIMD_T::set1_ps(loc ? 0.5f : 0.0f);
46     }
47 };
48 
49 //////////////////////////////////////////////////////////////////////////
50 /// @brief Convert the X,Y coords of a triangle to the requested Fixed
51 /// Point precision from FP32.
52 template <typename SIMD_T, typename PT = FixedPointTraits<Fixed_16_8>>
fpToFixedPointVertical(const Float<SIMD_T> & vIn)53 INLINE Integer<SIMD_T> fpToFixedPointVertical(const Float<SIMD_T>& vIn)
54 {
55     return SIMD_T::cvtps_epi32(SIMD_T::mul_ps(vIn, SIMD_T::set1_ps(PT::ScaleT::value)));
56 }
57 
58 //////////////////////////////////////////////////////////////////////////
59 /// @brief Helper function to set the X,Y coords of a triangle to the
60 /// requested Fixed Point precision from FP32.
61 /// @param tri: simdvector[3] of FP triangle verts
62 /// @param vXi: fixed point X coords of tri verts
63 /// @param vYi: fixed point Y coords of tri verts
64 template <typename SIMD_T>
65 INLINE static void
FPToFixedPoint(const Vec4<SIMD_T> * const tri,Integer<SIMD_T> (& vXi)[3],Integer<SIMD_T> (& vYi)[3])66 FPToFixedPoint(const Vec4<SIMD_T>* const tri, Integer<SIMD_T> (&vXi)[3], Integer<SIMD_T> (&vYi)[3])
67 {
68     vXi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].x);
69     vYi[0] = fpToFixedPointVertical<SIMD_T>(tri[0].y);
70     vXi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].x);
71     vYi[1] = fpToFixedPointVertical<SIMD_T>(tri[1].y);
72     vXi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].x);
73     vYi[2] = fpToFixedPointVertical<SIMD_T>(tri[2].y);
74 }
75 
76 //////////////////////////////////////////////////////////////////////////
77 /// @brief Calculate bounding box for current triangle
78 /// @tparam CT: ConservativeRastFETraits type
79 /// @param vX: fixed point X position for triangle verts
80 /// @param vY: fixed point Y position for triangle verts
81 /// @param bbox: fixed point bbox
82 /// *Note*: expects vX, vY to be in the correct precision for the type
83 /// of rasterization. This avoids unnecessary FP->fixed conversions.
84 template <typename SIMD_T, typename CT>
calcBoundingBoxIntVertical(const Integer<SIMD_T> (& vX)[3],const Integer<SIMD_T> (& vY)[3],SIMDBBOX_T<SIMD_T> & bbox)85 INLINE void calcBoundingBoxIntVertical(const Integer<SIMD_T> (&vX)[3],
86                                        const Integer<SIMD_T> (&vY)[3],
87                                        SIMDBBOX_T<SIMD_T>& bbox)
88 {
89     Integer<SIMD_T> vMinX = vX[0];
90 
91     vMinX = SIMD_T::min_epi32(vMinX, vX[1]);
92     vMinX = SIMD_T::min_epi32(vMinX, vX[2]);
93 
94     Integer<SIMD_T> vMaxX = vX[0];
95 
96     vMaxX = SIMD_T::max_epi32(vMaxX, vX[1]);
97     vMaxX = SIMD_T::max_epi32(vMaxX, vX[2]);
98 
99     Integer<SIMD_T> vMinY = vY[0];
100 
101     vMinY = SIMD_T::min_epi32(vMinY, vY[1]);
102     vMinY = SIMD_T::min_epi32(vMinY, vY[2]);
103 
104     Integer<SIMD_T> vMaxY = vY[0];
105 
106     vMaxY = SIMD_T::max_epi32(vMaxY, vY[1]);
107     vMaxY = SIMD_T::max_epi32(vMaxY, vY[2]);
108 
109     if (CT::BoundingBoxOffsetT::value != 0)
110     {
111         /// Bounding box needs to be expanded by 1/512 before snapping to 16.8 for conservative
112         /// rasterization expand bbox by 1/256; coverage will be correctly handled in the
113         /// rasterizer.
114 
115         const Integer<SIMD_T> value = SIMD_T::set1_epi32(CT::BoundingBoxOffsetT::value);
116 
117         vMinX = SIMD_T::sub_epi32(vMinX, value);
118         vMaxX = SIMD_T::add_epi32(vMaxX, value);
119         vMinY = SIMD_T::sub_epi32(vMinY, value);
120         vMaxY = SIMD_T::add_epi32(vMaxY, value);
121     }
122 
123     bbox.xmin = vMinX;
124     bbox.xmax = vMaxX;
125     bbox.ymin = vMinY;
126     bbox.ymax = vMaxY;
127 }
128 
129 //////////////////////////////////////////////////////////////////////////
130 /// @brief  Gather scissor rect data based on per-prim viewport indices.
131 /// @param pScissorsInFixedPoint - array of scissor rects in 16.8 fixed point.
132 /// @param pViewportIndex - array of per-primitive vewport indexes.
133 /// @param scisXmin - output vector of per-prmitive scissor rect Xmin data.
134 /// @param scisYmin - output vector of per-prmitive scissor rect Ymin data.
135 /// @param scisXmax - output vector of per-prmitive scissor rect Xmax data.
136 /// @param scisYmax - output vector of per-prmitive scissor rect Ymax data.
137 //
138 /// @todo:  Look at speeding this up -- weigh against corresponding costs in rasterizer.
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simdscalari & scisXmin,simdscalari & scisYmin,simdscalari & scisXmax,simdscalari & scisYmax)139 static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
140                            const uint32_t* pViewportIndex,
141                            simdscalari&    scisXmin,
142                            simdscalari&    scisYmin,
143                            simdscalari&    scisXmax,
144                            simdscalari&    scisYmax)
145 {
146     scisXmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmin,
147                                pScissorsInFixedPoint[pViewportIndex[6]].xmin,
148                                pScissorsInFixedPoint[pViewportIndex[5]].xmin,
149                                pScissorsInFixedPoint[pViewportIndex[4]].xmin,
150                                pScissorsInFixedPoint[pViewportIndex[3]].xmin,
151                                pScissorsInFixedPoint[pViewportIndex[2]].xmin,
152                                pScissorsInFixedPoint[pViewportIndex[1]].xmin,
153                                pScissorsInFixedPoint[pViewportIndex[0]].xmin);
154     scisYmin = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymin,
155                                pScissorsInFixedPoint[pViewportIndex[6]].ymin,
156                                pScissorsInFixedPoint[pViewportIndex[5]].ymin,
157                                pScissorsInFixedPoint[pViewportIndex[4]].ymin,
158                                pScissorsInFixedPoint[pViewportIndex[3]].ymin,
159                                pScissorsInFixedPoint[pViewportIndex[2]].ymin,
160                                pScissorsInFixedPoint[pViewportIndex[1]].ymin,
161                                pScissorsInFixedPoint[pViewportIndex[0]].ymin);
162     scisXmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].xmax,
163                                pScissorsInFixedPoint[pViewportIndex[6]].xmax,
164                                pScissorsInFixedPoint[pViewportIndex[5]].xmax,
165                                pScissorsInFixedPoint[pViewportIndex[4]].xmax,
166                                pScissorsInFixedPoint[pViewportIndex[3]].xmax,
167                                pScissorsInFixedPoint[pViewportIndex[2]].xmax,
168                                pScissorsInFixedPoint[pViewportIndex[1]].xmax,
169                                pScissorsInFixedPoint[pViewportIndex[0]].xmax);
170     scisYmax = _simd_set_epi32(pScissorsInFixedPoint[pViewportIndex[7]].ymax,
171                                pScissorsInFixedPoint[pViewportIndex[6]].ymax,
172                                pScissorsInFixedPoint[pViewportIndex[5]].ymax,
173                                pScissorsInFixedPoint[pViewportIndex[4]].ymax,
174                                pScissorsInFixedPoint[pViewportIndex[3]].ymax,
175                                pScissorsInFixedPoint[pViewportIndex[2]].ymax,
176                                pScissorsInFixedPoint[pViewportIndex[1]].ymax,
177                                pScissorsInFixedPoint[pViewportIndex[0]].ymax);
178 }
179 
GatherScissors(const SWR_RECT * pScissorsInFixedPoint,const uint32_t * pViewportIndex,simd16scalari & scisXmin,simd16scalari & scisYmin,simd16scalari & scisXmax,simd16scalari & scisYmax)180 static void GatherScissors(const SWR_RECT* pScissorsInFixedPoint,
181                            const uint32_t* pViewportIndex,
182                            simd16scalari&  scisXmin,
183                            simd16scalari&  scisYmin,
184                            simd16scalari&  scisXmax,
185                            simd16scalari&  scisYmax)
186 {
187     scisXmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmin,
188                                  pScissorsInFixedPoint[pViewportIndex[14]].xmin,
189                                  pScissorsInFixedPoint[pViewportIndex[13]].xmin,
190                                  pScissorsInFixedPoint[pViewportIndex[12]].xmin,
191                                  pScissorsInFixedPoint[pViewportIndex[11]].xmin,
192                                  pScissorsInFixedPoint[pViewportIndex[10]].xmin,
193                                  pScissorsInFixedPoint[pViewportIndex[9]].xmin,
194                                  pScissorsInFixedPoint[pViewportIndex[8]].xmin,
195                                  pScissorsInFixedPoint[pViewportIndex[7]].xmin,
196                                  pScissorsInFixedPoint[pViewportIndex[6]].xmin,
197                                  pScissorsInFixedPoint[pViewportIndex[5]].xmin,
198                                  pScissorsInFixedPoint[pViewportIndex[4]].xmin,
199                                  pScissorsInFixedPoint[pViewportIndex[3]].xmin,
200                                  pScissorsInFixedPoint[pViewportIndex[2]].xmin,
201                                  pScissorsInFixedPoint[pViewportIndex[1]].xmin,
202                                  pScissorsInFixedPoint[pViewportIndex[0]].xmin);
203 
204     scisYmin = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymin,
205                                  pScissorsInFixedPoint[pViewportIndex[14]].ymin,
206                                  pScissorsInFixedPoint[pViewportIndex[13]].ymin,
207                                  pScissorsInFixedPoint[pViewportIndex[12]].ymin,
208                                  pScissorsInFixedPoint[pViewportIndex[11]].ymin,
209                                  pScissorsInFixedPoint[pViewportIndex[10]].ymin,
210                                  pScissorsInFixedPoint[pViewportIndex[9]].ymin,
211                                  pScissorsInFixedPoint[pViewportIndex[8]].ymin,
212                                  pScissorsInFixedPoint[pViewportIndex[7]].ymin,
213                                  pScissorsInFixedPoint[pViewportIndex[6]].ymin,
214                                  pScissorsInFixedPoint[pViewportIndex[5]].ymin,
215                                  pScissorsInFixedPoint[pViewportIndex[4]].ymin,
216                                  pScissorsInFixedPoint[pViewportIndex[3]].ymin,
217                                  pScissorsInFixedPoint[pViewportIndex[2]].ymin,
218                                  pScissorsInFixedPoint[pViewportIndex[1]].ymin,
219                                  pScissorsInFixedPoint[pViewportIndex[0]].ymin);
220 
221     scisXmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].xmax,
222                                  pScissorsInFixedPoint[pViewportIndex[14]].xmax,
223                                  pScissorsInFixedPoint[pViewportIndex[13]].xmax,
224                                  pScissorsInFixedPoint[pViewportIndex[12]].xmax,
225                                  pScissorsInFixedPoint[pViewportIndex[11]].xmax,
226                                  pScissorsInFixedPoint[pViewportIndex[10]].xmax,
227                                  pScissorsInFixedPoint[pViewportIndex[9]].xmax,
228                                  pScissorsInFixedPoint[pViewportIndex[8]].xmax,
229                                  pScissorsInFixedPoint[pViewportIndex[7]].xmax,
230                                  pScissorsInFixedPoint[pViewportIndex[6]].xmax,
231                                  pScissorsInFixedPoint[pViewportIndex[5]].xmax,
232                                  pScissorsInFixedPoint[pViewportIndex[4]].xmax,
233                                  pScissorsInFixedPoint[pViewportIndex[3]].xmax,
234                                  pScissorsInFixedPoint[pViewportIndex[2]].xmax,
235                                  pScissorsInFixedPoint[pViewportIndex[1]].xmax,
236                                  pScissorsInFixedPoint[pViewportIndex[0]].xmax);
237 
238     scisYmax = _simd16_set_epi32(pScissorsInFixedPoint[pViewportIndex[15]].ymax,
239                                  pScissorsInFixedPoint[pViewportIndex[14]].ymax,
240                                  pScissorsInFixedPoint[pViewportIndex[13]].ymax,
241                                  pScissorsInFixedPoint[pViewportIndex[12]].ymax,
242                                  pScissorsInFixedPoint[pViewportIndex[11]].ymax,
243                                  pScissorsInFixedPoint[pViewportIndex[10]].ymax,
244                                  pScissorsInFixedPoint[pViewportIndex[9]].ymax,
245                                  pScissorsInFixedPoint[pViewportIndex[8]].ymax,
246                                  pScissorsInFixedPoint[pViewportIndex[7]].ymax,
247                                  pScissorsInFixedPoint[pViewportIndex[6]].ymax,
248                                  pScissorsInFixedPoint[pViewportIndex[5]].ymax,
249                                  pScissorsInFixedPoint[pViewportIndex[4]].ymax,
250                                  pScissorsInFixedPoint[pViewportIndex[3]].ymax,
251                                  pScissorsInFixedPoint[pViewportIndex[2]].ymax,
252                                  pScissorsInFixedPoint[pViewportIndex[1]].ymax,
253                                  pScissorsInFixedPoint[pViewportIndex[0]].ymax);
254 }