1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file backend.cpp
24 *
25 * @brief Backend handles rasterization, pixel shading and output merger
26 *        operations.
27 *
28 ******************************************************************************/
29 
30 #include <smmintrin.h>
31 
32 #include "backend.h"
33 #include "backend_impl.h"
34 #include "tilemgr.h"
35 #include "memory/tilingtraits.h"
36 #include "core/multisample.h"
37 
38 #include <algorithm>
39 
40 template<SWR_FORMAT format>
ClearRasterTile(uint8_t * pTileBuffer,simdvector & value)41 void ClearRasterTile(uint8_t *pTileBuffer, simdvector &value)
42 {
43     auto lambda = [&](int32_t comp)
44     {
45         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
46 
47         pTileBuffer += (KNOB_SIMD_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
48     };
49 
50     const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD_TILE_X_DIM);
51 
52     for (uint32_t i = 0; i < numIter; ++i)
53     {
54         UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
55     }
56 }
57 
58 #if USE_8x2_TILE_BACKEND
59 template<SWR_FORMAT format>
ClearRasterTile(uint8_t * pTileBuffer,simd16vector & value)60 void ClearRasterTile(uint8_t *pTileBuffer, simd16vector &value)
61 {
62     auto lambda = [&](int32_t comp)
63     {
64         FormatTraits<format>::storeSOA(comp, pTileBuffer, value.v[comp]);
65 
66         pTileBuffer += (KNOB_SIMD16_WIDTH * FormatTraits<format>::GetBPC(comp) / 8);
67     };
68 
69     const uint32_t numIter = (KNOB_TILE_Y_DIM / SIMD16_TILE_Y_DIM) * (KNOB_TILE_X_DIM / SIMD16_TILE_X_DIM);
70 
71     for (uint32_t i = 0; i < numIter; ++i)
72     {
73         UnrollerL<0, FormatTraits<format>::numComps, 1>::step(lambda);
74     }
75 }
76 
77 #endif
78 template<SWR_FORMAT format>
ClearMacroTile(DRAW_CONTEXT * pDC,SWR_RENDERTARGET_ATTACHMENT rt,uint32_t macroTile,uint32_t renderTargetArrayIndex,DWORD clear[4],const SWR_RECT & rect)79 INLINE void ClearMacroTile(DRAW_CONTEXT *pDC, SWR_RENDERTARGET_ATTACHMENT rt, uint32_t macroTile, uint32_t renderTargetArrayIndex, DWORD clear[4], const SWR_RECT& rect)
80 {
81     // convert clear color to hottile format
82     // clear color is in RGBA float/uint32
83 #if USE_8x2_TILE_BACKEND
84     simd16vector vClear;
85     for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
86     {
87         simd16scalar vComp;
88         vComp = _simd16_load1_ps((const float*)&clear[comp]);
89         if (FormatTraits<format>::isNormalized(comp))
90         {
91             vComp = _simd16_mul_ps(vComp, _simd16_set1_ps(FormatTraits<format>::fromFloat(comp)));
92             vComp = _simd16_castsi_ps(_simd16_cvtps_epi32(vComp));
93         }
94         vComp = FormatTraits<format>::pack(comp, vComp);
95         vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
96     }
97 
98 #else
99     simdvector vClear;
100     for (uint32_t comp = 0; comp < FormatTraits<format>::numComps; ++comp)
101     {
102         simdscalar vComp;
103         vComp = _simd_load1_ps((const float*)&clear[comp]);
104         if (FormatTraits<format>::isNormalized(comp))
105         {
106             vComp = _simd_mul_ps(vComp, _simd_set1_ps(FormatTraits<format>::fromFloat(comp)));
107             vComp = _simd_castsi_ps(_simd_cvtps_epi32(vComp));
108         }
109         vComp = FormatTraits<format>::pack(comp, vComp);
110         vClear.v[FormatTraits<format>::swizzle(comp)] = vComp;
111     }
112 
113 #endif
114     uint32_t tileX, tileY;
115     MacroTileMgr::getTileIndices(macroTile, tileX, tileY);
116 
117     // Init to full macrotile
118     SWR_RECT clearTile =
119     {
120         KNOB_MACROTILE_X_DIM * int32_t(tileX),
121         KNOB_MACROTILE_Y_DIM * int32_t(tileY),
122         KNOB_MACROTILE_X_DIM * int32_t(tileX + 1),
123         KNOB_MACROTILE_Y_DIM * int32_t(tileY + 1),
124     };
125 
126     // intersect with clear rect
127     clearTile &= rect;
128 
129     // translate to local hottile origin
130     clearTile.Translate(-int32_t(tileX) * KNOB_MACROTILE_X_DIM, -int32_t(tileY) * KNOB_MACROTILE_Y_DIM);
131 
132     // Make maximums inclusive (needed for convert to raster tiles)
133     clearTile.xmax -= 1;
134     clearTile.ymax -= 1;
135 
136     // convert to raster tiles
137     clearTile.ymin >>= (KNOB_TILE_Y_DIM_SHIFT);
138     clearTile.ymax >>= (KNOB_TILE_Y_DIM_SHIFT);
139     clearTile.xmin >>= (KNOB_TILE_X_DIM_SHIFT);
140     clearTile.xmax >>= (KNOB_TILE_X_DIM_SHIFT);
141 
142     const int32_t numSamples = GetNumSamples(pDC->pState->state.rastState.sampleCount);
143     // compute steps between raster tile samples / raster tiles / macro tile rows
144     const uint32_t rasterTileSampleStep = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * FormatTraits<format>::bpp / 8;
145     const uint32_t rasterTileStep = (KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<format>::bpp / 8)) * numSamples;
146     const uint32_t macroTileRowStep = (KNOB_MACROTILE_X_DIM / KNOB_TILE_X_DIM) * rasterTileStep;
147     const uint32_t pitch = (FormatTraits<format>::bpp * KNOB_MACROTILE_X_DIM / 8);
148 
149     HOTTILE *pHotTile = pDC->pContext->pHotTileMgr->GetHotTile(pDC->pContext, pDC, macroTile, rt, true, numSamples, renderTargetArrayIndex);
150     uint32_t rasterTileStartOffset = (ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, clearTile.xmin, clearTile.ymin)) * numSamples;
151     uint8_t* pRasterTileRow = pHotTile->pBuffer + rasterTileStartOffset; //(ComputeTileOffset2D< TilingTraits<SWR_TILE_SWRZ, FormatTraits<format>::bpp > >(pitch, x, y)) * numSamples;
152 
153     // loop over all raster tiles in the current hot tile
154     for (int32_t y = clearTile.ymin; y <= clearTile.ymax; ++y)
155     {
156         uint8_t* pRasterTile = pRasterTileRow;
157         for (int32_t x = clearTile.xmin; x <= clearTile.xmax; ++x)
158         {
159             for( int32_t sampleNum = 0; sampleNum < numSamples; sampleNum++)
160             {
161                 ClearRasterTile<format>(pRasterTile, vClear);
162                 pRasterTile += rasterTileSampleStep;
163             }
164         }
165         pRasterTileRow += macroTileRowStep;
166     }
167 
168     pHotTile->state = HOTTILE_DIRTY;
169 }
170 
171 
ProcessClearBE(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t macroTile,void * pUserData)172 void ProcessClearBE(DRAW_CONTEXT *pDC, uint32_t workerId, uint32_t macroTile, void *pUserData)
173 {
174     SWR_CONTEXT *pContext = pDC->pContext;
175 
176     if (KNOB_FAST_CLEAR)
177     {
178         CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
179         SWR_MULTISAMPLE_COUNT sampleCount = pDC->pState->state.rastState.sampleCount;
180         uint32_t numSamples = GetNumSamples(sampleCount);
181 
182         SWR_ASSERT(pClear->attachmentMask != 0); // shouldn't be here without a reason.
183 
184         AR_BEGIN(BEClear, pDC->drawId);
185 
186         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
187         {
188             unsigned long rt = 0;
189             uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
190             while (_BitScanForward(&rt, mask))
191             {
192                 mask &= ~(1 << rt);
193 
194                 HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, (SWR_RENDERTARGET_ATTACHMENT)rt, true, numSamples, pClear->renderTargetArrayIndex);
195 
196                 // All we want to do here is to mark the hot tile as being in a "needs clear" state.
197                 pHotTile->clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
198                 pHotTile->clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
199                 pHotTile->clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
200                 pHotTile->clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
201                 pHotTile->state = HOTTILE_CLEAR;
202             }
203         }
204 
205         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
206         {
207             HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_DEPTH, true, numSamples, pClear->renderTargetArrayIndex);
208             pHotTile->clearData[0] = *(DWORD*)&pClear->clearDepth;
209             pHotTile->state = HOTTILE_CLEAR;
210         }
211 
212         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
213         {
214             HOTTILE *pHotTile = pContext->pHotTileMgr->GetHotTile(pContext, pDC, macroTile, SWR_ATTACHMENT_STENCIL, true, numSamples, pClear->renderTargetArrayIndex);
215 
216             pHotTile->clearData[0] = pClear->clearStencil;
217             pHotTile->state = HOTTILE_CLEAR;
218         }
219 
220         AR_END(BEClear, 1);
221     }
222     else
223     {
224         // Legacy clear
225         CLEAR_DESC *pClear = (CLEAR_DESC*)pUserData;
226         AR_BEGIN(BEClear, pDC->drawId);
227 
228         if (pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR)
229         {
230             DWORD clearData[4];
231             clearData[0] = *(DWORD*)&(pClear->clearRTColor[0]);
232             clearData[1] = *(DWORD*)&(pClear->clearRTColor[1]);
233             clearData[2] = *(DWORD*)&(pClear->clearRTColor[2]);
234             clearData[3] = *(DWORD*)&(pClear->clearRTColor[3]);
235 
236             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_COLOR_HOT_TILE_FORMAT];
237             SWR_ASSERT(pfnClearTiles != nullptr);
238 
239             unsigned long rt = 0;
240             uint32_t mask = pClear->attachmentMask & SWR_ATTACHMENT_MASK_COLOR;
241             while (_BitScanForward(&rt, mask))
242             {
243                 mask &= ~(1 << rt);
244 
245                 pfnClearTiles(pDC, (SWR_RENDERTARGET_ATTACHMENT)rt, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
246             }
247         }
248 
249         if (pClear->attachmentMask & SWR_ATTACHMENT_DEPTH_BIT)
250         {
251             DWORD clearData[4];
252             clearData[0] = *(DWORD*)&pClear->clearDepth;
253             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_DEPTH_HOT_TILE_FORMAT];
254             SWR_ASSERT(pfnClearTiles != nullptr);
255 
256             pfnClearTiles(pDC, SWR_ATTACHMENT_DEPTH, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
257         }
258 
259         if (pClear->attachmentMask & SWR_ATTACHMENT_STENCIL_BIT)
260         {
261             DWORD clearData[4];
262             clearData[0] = pClear->clearStencil;
263             PFN_CLEAR_TILES pfnClearTiles = gClearTilesTable[KNOB_STENCIL_HOT_TILE_FORMAT];
264 
265             pfnClearTiles(pDC, SWR_ATTACHMENT_STENCIL, macroTile, pClear->renderTargetArrayIndex, clearData, pClear->rect);
266         }
267 
268         AR_END(BEClear, 1);
269     }
270 }
271 
InitClearTilesTable()272 void InitClearTilesTable()
273 {
274     memset(gClearTilesTable, 0, sizeof(gClearTilesTable));
275 
276     gClearTilesTable[R8G8B8A8_UNORM]        = ClearMacroTile<R8G8B8A8_UNORM>;
277     gClearTilesTable[B8G8R8A8_UNORM]        = ClearMacroTile<B8G8R8A8_UNORM>;
278     gClearTilesTable[R32_FLOAT]             = ClearMacroTile<R32_FLOAT>;
279     gClearTilesTable[R32G32B32A32_FLOAT]    = ClearMacroTile<R32G32B32A32_FLOAT>;
280     gClearTilesTable[R8_UINT]               = ClearMacroTile<R8_UINT>;
281 }
282