1 /****************************************************************************
2  * Copyright (C) 2014-2018 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file tilemgr.h
24  *
25  * @brief Definitions for Macro Tile Manager which provides the facilities
26  *        for threads to work on an macro tile.
27  *
28  ******************************************************************************/
29 #pragma once
30 
31 #include <set>
32 #include <unordered_map>
33 #include "common/formats.h"
34 #include "common/intrin.h"
35 #include "fifo.hpp"
36 #include "context.h"
37 #include "format_traits.h"
38 
39 //////////////////////////////////////////////////////////////////////////
40 /// MacroTile - work queue for a tile.
41 //////////////////////////////////////////////////////////////////////////
42 struct MacroTileQueue
43 {
MacroTileQueueMacroTileQueue44     MacroTileQueue() {}
~MacroTileQueueMacroTileQueue45     ~MacroTileQueue() { destroy(); }
46 
47     //////////////////////////////////////////////////////////////////////////
48     /// @brief Returns number of work items queued for this tile.
getNumQueuedMacroTileQueue49     uint32_t getNumQueued() { return mFifo.getNumQueued(); }
50 
51     //////////////////////////////////////////////////////////////////////////
52     /// @brief Attempt to lock the work fifo. If already locked then return false.
tryLockMacroTileQueue53     bool tryLock() { return mFifo.tryLock(); }
54 
55     //////////////////////////////////////////////////////////////////////////
56     /// @brief Clear fifo and unlock it.
57     template <typename ArenaT>
clearMacroTileQueue58     void clear(ArenaT& arena)
59     {
60         mFifo.clear(arena);
61     }
62 
63     //////////////////////////////////////////////////////////////////////////
64     /// @brief Peek at work sitting at the front of the fifo.
peekMacroTileQueue65     BE_WORK* peek() { return mFifo.peek(); }
66 
67     template <typename ArenaT>
enqueue_try_nosyncMacroTileQueue68     bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
69     {
70         return mFifo.enqueue_try_nosync(arena, entry);
71     }
72 
73     //////////////////////////////////////////////////////////////////////////
74     /// @brief Move to next work item
dequeueMacroTileQueue75     void dequeue() { mFifo.dequeue_noinc(); }
76 
77     //////////////////////////////////////////////////////////////////////////
78     /// @brief Destroy fifo
destroyMacroTileQueue79     void destroy() { mFifo.destroy(); }
80 
81     ///@todo This will all be private.
82     uint32_t mWorkItemsFE = 0;
83     uint32_t mWorkItemsBE = 0;
84     uint32_t mId          = 0;
85 
86 private:
87     QUEUE<BE_WORK> mFifo;
88 };
89 
90 //////////////////////////////////////////////////////////////////////////
91 /// MacroTileMgr - Manages macrotiles for a draw.
92 //////////////////////////////////////////////////////////////////////////
93 class MacroTileMgr
94 {
95 public:
96     MacroTileMgr(CachingArena& arena);
~MacroTileMgr()97     ~MacroTileMgr()
98     {
99         for (auto* pTile : mTiles)
100         {
101             delete pTile;
102         }
103     }
104 
initialize()105     INLINE void initialize()
106     {
107         mWorkItemsProduced = 0;
108         mWorkItemsConsumed = 0;
109 
110         mDirtyTiles.clear();
111     }
112 
getDirtyTiles()113     INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
114     void                                 markTileComplete(uint32_t id);
115 
isWorkComplete()116     INLINE bool isWorkComplete() { return mWorkItemsProduced == mWorkItemsConsumed; }
117 
118     void enqueue(uint32_t x, uint32_t y, BE_WORK* pWork);
119 
getTileIndices(uint32_t tileID,uint32_t & x,uint32_t & y)120     static INLINE void getTileIndices(uint32_t tileID, uint32_t& x, uint32_t& y)
121     {
122         // Morton / Z order of tiles
123         x = pext_u32(tileID, 0x55555555);
124         y = pext_u32(tileID, 0xAAAAAAAA);
125     }
126 
getTileId(uint32_t x,uint32_t y)127     static INLINE uint32_t getTileId(uint32_t x, uint32_t y)
128     {
129         // Morton / Z order of tiles
130         return pdep_u32(x, 0x55555555) | pdep_u32(y, 0xAAAAAAAA);
131     }
132 
133 private:
134     CachingArena&                mArena;
135     std::vector<MacroTileQueue*> mTiles;
136 
137     // Any tile that has work queued to it is a dirty tile.
138     std::vector<MacroTileQueue*> mDirtyTiles;
139 
OSALIGNLINE(long)140     OSALIGNLINE(long) mWorkItemsProduced{0};
OSALIGNLINE(volatile long)141     OSALIGNLINE(volatile long) mWorkItemsConsumed{0};
142 };
143 
144 typedef void (*PFN_DISPATCH)(DRAW_CONTEXT* pDC,
145                              uint32_t      workerId,
146                              uint32_t      threadGroupId,
147                              void*&        pSpillFillBuffer,
148                              void*&        pScratchSpace);
149 
150 //////////////////////////////////////////////////////////////////////////
151 /// DispatchQueue - work queue for dispatch
152 //////////////////////////////////////////////////////////////////////////
153 class DispatchQueue
154 {
155 public:
DispatchQueue()156     DispatchQueue() {}
157 
158     //////////////////////////////////////////////////////////////////////////
159     /// @brief Setup the producer consumer counts.
initialize(uint32_t totalTasks,void * pTaskData,PFN_DISPATCH pfnDispatch)160     void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
161     {
162         // The available and outstanding counts start with total tasks.
163         // At the start there are N tasks available and outstanding.
164         // When both the available and outstanding counts have reached 0 then all work has
165         // completed. When a worker starts on a threadgroup then it decrements the available count.
166         // When a worker completes a threadgroup then it decrements the outstanding count.
167 
168         mTasksAvailable   = totalTasks;
169         mTasksOutstanding = totalTasks;
170 
171         mpTaskData   = pTaskData;
172         mPfnDispatch = pfnDispatch;
173     }
174 
175     //////////////////////////////////////////////////////////////////////////
176     /// @brief Returns number of tasks available for this dispatch.
getNumQueued()177     uint32_t getNumQueued() { return (mTasksAvailable > 0) ? mTasksAvailable : 0; }
178 
179     //////////////////////////////////////////////////////////////////////////
180     /// @brief Atomically decrement the work available count. If the result
181     //         is greater than 0 then we can on the associated thread group.
182     //         Otherwise, there is no more work to do.
getWork(uint32_t & groupId)183     bool getWork(uint32_t& groupId)
184     {
185         long result = InterlockedDecrement(&mTasksAvailable);
186 
187         if (result >= 0)
188         {
189             groupId = result;
190             return true;
191         }
192 
193         return false;
194     }
195 
196     //////////////////////////////////////////////////////////////////////////
197     /// @brief Atomically decrement the outstanding count. A worker is notifying
198     ///        us that he just finished some work. Also, return true if we're
199     ///        the last worker to complete this dispatch.
finishedWork()200     bool finishedWork()
201     {
202         long result = InterlockedDecrement(&mTasksOutstanding);
203         SWR_ASSERT(result >= 0, "Should never oversubscribe work");
204 
205         return (result == 0) ? true : false;
206     }
207 
208     //////////////////////////////////////////////////////////////////////////
209     /// @brief Work is complete once both the available/outstanding counts have reached 0.
isWorkComplete()210     bool isWorkComplete() { return ((mTasksAvailable <= 0) && (mTasksOutstanding <= 0)); }
211 
212     //////////////////////////////////////////////////////////////////////////
213     /// @brief Return pointer to task data.
GetTasksData()214     const void* GetTasksData() { return mpTaskData; }
215 
216     //////////////////////////////////////////////////////////////////////////
217     /// @brief Dispatches a unit of work
dispatch(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t threadGroupId,void * & pSpillFillBuffer,void * & pScratchSpace)218     void dispatch(DRAW_CONTEXT* pDC,
219                   uint32_t      workerId,
220                   uint32_t      threadGroupId,
221                   void*&        pSpillFillBuffer,
222                   void*&        pScratchSpace)
223     {
224         SWR_ASSERT(mPfnDispatch != nullptr);
225         mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
226     }
227 
228     void* mpTaskData{nullptr}; // The API thread will set this up and the callback task function
229                                // will interpet this.
230     PFN_DISPATCH mPfnDispatch{nullptr}; // Function to call per dispatch
231 
OSALIGNLINE(volatile long)232     OSALIGNLINE(volatile long) mTasksAvailable{0};
OSALIGNLINE(volatile long)233     OSALIGNLINE(volatile long) mTasksOutstanding{0};
234 };
235 
236 /// @note this enum needs to be kept in sync with SWR_TILE_STATE!
237 enum HOTTILE_STATE
238 {
239     HOTTILE_INVALID,  // tile is in unitialized state and should be loaded with surface contents
240                       // before rendering
241     HOTTILE_CLEAR,    // tile should be cleared
242     HOTTILE_DIRTY,    // tile has been rendered to
243     HOTTILE_RESOLVED, // tile is consistent with memory (either loaded or stored)
244 };
245 
246 struct HOTTILE
247 {
248     uint8_t*      pBuffer;
249     HOTTILE_STATE state;
250     uint32_t clearData[4]; // May need to change based on pfnClearTile implementation.  Reorder for
251                         // alignment?
252     uint32_t numSamples;
253     uint32_t renderTargetArrayIndex; // current render target array index loaded
254 };
255 
256 union HotTileSet
257 {
258     struct
259     {
260         HOTTILE Color[SWR_NUM_RENDERTARGETS];
261         HOTTILE Depth;
262         HOTTILE Stencil;
263     };
264     HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
265 };
266 
267 class HotTileMgr
268 {
269 public:
HotTileMgr()270     HotTileMgr()
271     {
272         memset(mHotTiles, 0, sizeof(mHotTiles));
273 
274         // cache hottile size
275         for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
276         {
277             mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
278                               FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
279         }
280         mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
281                                              FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
282         mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM *
283                                                FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
284     }
285 
~HotTileMgr()286     ~HotTileMgr()
287     {
288         for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
289         {
290             for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
291             {
292                 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
293                 {
294                     FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
295                 }
296             }
297         }
298     }
299 
300     void InitializeHotTiles(SWR_CONTEXT*  pContext,
301                             DRAW_CONTEXT* pDC,
302                             uint32_t      workerId,
303                             uint32_t      macroID);
304 
305     HOTTILE* GetHotTile(SWR_CONTEXT*                pContext,
306                         DRAW_CONTEXT*               pDC,
307                         HANDLE                      hWorkerData,
308                         uint32_t                    macroID,
309                         SWR_RENDERTARGET_ATTACHMENT attachment,
310                         bool                        create,
311                         uint32_t                    numSamples             = 1,
312                         uint32_t                    renderTargetArrayIndex = 0);
313 
314     HOTTILE* GetHotTileNoLoad(SWR_CONTEXT*                pContext,
315                               DRAW_CONTEXT*               pDC,
316                               uint32_t                    macroID,
317                               SWR_RENDERTARGET_ATTACHMENT attachment,
318                               bool                        create,
319                               uint32_t                    numSamples = 1);
320 
321     static void ClearColorHotTile(const HOTTILE* pHotTile);
322     static void ClearDepthHotTile(const HOTTILE* pHotTile);
323     static void ClearStencilHotTile(const HOTTILE* pHotTile);
324 
325 private:
326     HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
327     uint32_t   mHotTileSize[SWR_NUM_ATTACHMENTS];
328 
AllocHotTileMem(size_t size,uint32_t align,uint32_t numaNode)329     void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
330     {
331         void* p = nullptr;
332 #if defined(_WIN32)
333         HANDLE hProcess = GetCurrentProcess();
334         p               = VirtualAllocExNuma(
335             hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
336 #else
337         p = AlignedMalloc(size, align);
338 #endif
339 
340         return p;
341     }
342 
FreeHotTileMem(void * pBuffer)343     void FreeHotTileMem(void* pBuffer)
344     {
345         if (pBuffer)
346         {
347 #if defined(_WIN32)
348             VirtualFree(pBuffer, 0, MEM_RELEASE);
349 #else
350             AlignedFree(pBuffer);
351 #endif
352         }
353     }
354 };
355