1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file tilemgr.h
24 *
25 * @brief Definitions for Macro Tile Manager which provides the facilities
26 *        for threads to work on an macro tile.
27 *
28 ******************************************************************************/
29 #pragma once
30 
31 #include <set>
32 #include <unordered_map>
33 #include "common/formats.h"
34 #include "fifo.hpp"
35 #include "context.h"
36 #include "format_traits.h"
37 
38 //////////////////////////////////////////////////////////////////////////
39 /// MacroTile - work queue for a tile.
40 //////////////////////////////////////////////////////////////////////////
41 struct MacroTileQueue
42 {
MacroTileQueueMacroTileQueue43     MacroTileQueue() { }
~MacroTileQueueMacroTileQueue44     ~MacroTileQueue() { }
45 
46     //////////////////////////////////////////////////////////////////////////
47     /// @brief Returns number of work items queued for this tile.
getNumQueuedMacroTileQueue48     uint32_t getNumQueued()
49     {
50         return mFifo.getNumQueued();
51     }
52 
53     //////////////////////////////////////////////////////////////////////////
54     /// @brief Attempt to lock the work fifo. If already locked then return false.
tryLockMacroTileQueue55     bool tryLock()
56     {
57         return mFifo.tryLock();
58     }
59 
60     //////////////////////////////////////////////////////////////////////////
61     /// @brief Clear fifo and unlock it.
62     template <typename ArenaT>
clearMacroTileQueue63     void clear(ArenaT& arena)
64     {
65         mFifo.clear(arena);
66     }
67 
68     //////////////////////////////////////////////////////////////////////////
69     /// @brief Peek at work sitting at the front of the fifo.
peekMacroTileQueue70     BE_WORK* peek()
71     {
72         return mFifo.peek();
73     }
74 
75     template <typename ArenaT>
enqueue_try_nosyncMacroTileQueue76     bool enqueue_try_nosync(ArenaT& arena, const BE_WORK* entry)
77     {
78         return mFifo.enqueue_try_nosync(arena, entry);
79     }
80 
81     //////////////////////////////////////////////////////////////////////////
82     /// @brief Move to next work item
dequeueMacroTileQueue83     void dequeue()
84     {
85         mFifo.dequeue_noinc();
86     }
87 
88     //////////////////////////////////////////////////////////////////////////
89     /// @brief Destroy fifo
destroyMacroTileQueue90     void destroy()
91     {
92         mFifo.destroy();
93     }
94 
95     ///@todo This will all be private.
96     uint32_t mWorkItemsFE = 0;
97     uint32_t mWorkItemsBE = 0;
98     uint32_t mId = 0;
99 
100 private:
101     QUEUE<BE_WORK> mFifo;
102 };
103 
104 //////////////////////////////////////////////////////////////////////////
105 /// MacroTileMgr - Manages macrotiles for a draw.
106 //////////////////////////////////////////////////////////////////////////
107 class MacroTileMgr
108 {
109 public:
110     MacroTileMgr(CachingArena& arena);
~MacroTileMgr()111     ~MacroTileMgr()
112     {
113         for (auto &tile : mTiles)
114         {
115             tile.second.destroy();
116         }
117     }
118 
initialize()119     INLINE void initialize()
120     {
121         mWorkItemsProduced = 0;
122         mWorkItemsConsumed = 0;
123 
124         mDirtyTiles.clear();
125     }
126 
getDirtyTiles()127     INLINE std::vector<MacroTileQueue*>& getDirtyTiles() { return mDirtyTiles; }
128     void markTileComplete(uint32_t id);
129 
isWorkComplete()130     INLINE bool isWorkComplete()
131     {
132         return mWorkItemsProduced == mWorkItemsConsumed;
133     }
134 
135     void enqueue(uint32_t x, uint32_t y, BE_WORK *pWork);
136 
getTileIndices(uint32_t tileID,uint32_t & x,uint32_t & y)137     static INLINE void getTileIndices(uint32_t tileID, uint32_t &x, uint32_t &y)
138     {
139         y = tileID & 0xffff;
140         x = (tileID >> 16) & 0xffff;
141     }
142 
143 private:
144     CachingArena& mArena;
145     std::unordered_map<uint32_t, MacroTileQueue> mTiles;
146 
147     // Any tile that has work queued to it is a dirty tile.
148     std::vector<MacroTileQueue*> mDirtyTiles;
149 
OSALIGNLINE(long)150     OSALIGNLINE(long) mWorkItemsProduced { 0 };
OSALIGNLINE(volatile long)151     OSALIGNLINE(volatile long) mWorkItemsConsumed { 0 };
152 };
153 
154 typedef void(*PFN_DISPATCH)(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace);
155 
156 //////////////////////////////////////////////////////////////////////////
157 /// DispatchQueue - work queue for dispatch
158 //////////////////////////////////////////////////////////////////////////
159 class DispatchQueue
160 {
161 public:
DispatchQueue()162     DispatchQueue() {}
163 
164     //////////////////////////////////////////////////////////////////////////
165     /// @brief Setup the producer consumer counts.
initialize(uint32_t totalTasks,void * pTaskData,PFN_DISPATCH pfnDispatch)166     void initialize(uint32_t totalTasks, void* pTaskData, PFN_DISPATCH pfnDispatch)
167     {
168         // The available and outstanding counts start with total tasks.
169         // At the start there are N tasks available and outstanding.
170         // When both the available and outstanding counts have reached 0 then all work has completed.
171         // When a worker starts on a threadgroup then it decrements the available count.
172         // When a worker completes a threadgroup then it decrements the outstanding count.
173 
174         mTasksAvailable = totalTasks;
175         mTasksOutstanding = totalTasks;
176 
177         mpTaskData = pTaskData;
178         mPfnDispatch = pfnDispatch;
179     }
180 
181     //////////////////////////////////////////////////////////////////////////
182     /// @brief Returns number of tasks available for this dispatch.
getNumQueued()183     uint32_t getNumQueued()
184     {
185         return (mTasksAvailable > 0) ? mTasksAvailable : 0;
186     }
187 
188     //////////////////////////////////////////////////////////////////////////
189     /// @brief Atomically decrement the work available count. If the result
190     //         is greater than 0 then we can on the associated thread group.
191     //         Otherwise, there is no more work to do.
getWork(uint32_t & groupId)192     bool getWork(uint32_t& groupId)
193     {
194         long result = InterlockedDecrement(&mTasksAvailable);
195 
196         if (result >= 0)
197         {
198             groupId = result;
199             return true;
200         }
201 
202         return false;
203     }
204 
205     //////////////////////////////////////////////////////////////////////////
206     /// @brief Atomically decrement the outstanding count. A worker is notifying
207     ///        us that he just finished some work. Also, return true if we're
208     ///        the last worker to complete this dispatch.
finishedWork()209     bool finishedWork()
210     {
211         long result = InterlockedDecrement(&mTasksOutstanding);
212         SWR_ASSERT(result >= 0, "Should never oversubscribe work");
213 
214         return (result == 0) ? true : false;
215     }
216 
217     //////////////////////////////////////////////////////////////////////////
218     /// @brief Work is complete once both the available/outstanding counts have reached 0.
isWorkComplete()219     bool isWorkComplete()
220     {
221         return ((mTasksAvailable <= 0) &&
222                 (mTasksOutstanding <= 0));
223     }
224 
225     //////////////////////////////////////////////////////////////////////////
226     /// @brief Return pointer to task data.
GetTasksData()227     const void* GetTasksData()
228     {
229         return mpTaskData;
230     }
231 
232     //////////////////////////////////////////////////////////////////////////
233     /// @brief Dispatches a unit of work
dispatch(DRAW_CONTEXT * pDC,uint32_t workerId,uint32_t threadGroupId,void * & pSpillFillBuffer,void * & pScratchSpace)234     void dispatch(DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t threadGroupId, void*& pSpillFillBuffer, void*& pScratchSpace)
235     {
236         SWR_ASSERT(mPfnDispatch != nullptr);
237         mPfnDispatch(pDC, workerId, threadGroupId, pSpillFillBuffer, pScratchSpace);
238     }
239 
240     void* mpTaskData{ nullptr };        // The API thread will set this up and the callback task function will interpet this.
241     PFN_DISPATCH mPfnDispatch{ nullptr };      // Function to call per dispatch
242 
OSALIGNLINE(volatile long)243     OSALIGNLINE(volatile long) mTasksAvailable{ 0 };
OSALIGNLINE(volatile long)244     OSALIGNLINE(volatile long) mTasksOutstanding{ 0 };
245 };
246 
247 
248 enum HOTTILE_STATE
249 {
250     HOTTILE_INVALID,        // tile is in unitialized state and should be loaded with surface contents before rendering
251     HOTTILE_CLEAR,          // tile should be cleared
252     HOTTILE_DIRTY,          // tile has been rendered to
253     HOTTILE_RESOLVED,       // tile has been stored to memory
254 };
255 
256 struct HOTTILE
257 {
258     uint8_t *pBuffer;
259     HOTTILE_STATE state;
260     DWORD clearData[4];                 // May need to change based on pfnClearTile implementation.  Reorder for alignment?
261     uint32_t numSamples;
262     uint32_t renderTargetArrayIndex;    // current render target array index loaded
263 };
264 
265 union HotTileSet
266 {
267     struct
268     {
269         HOTTILE Color[SWR_NUM_RENDERTARGETS];
270         HOTTILE Depth;
271         HOTTILE Stencil;
272     };
273     HOTTILE Attachment[SWR_NUM_ATTACHMENTS];
274 };
275 
276 class HotTileMgr
277 {
278 public:
HotTileMgr()279     HotTileMgr()
280     {
281         memset(mHotTiles, 0, sizeof(mHotTiles));
282 
283         // cache hottile size
284         for (uint32_t i = SWR_ATTACHMENT_COLOR0; i <= SWR_ATTACHMENT_COLOR7; ++i)
285         {
286             mHotTileSize[i] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_COLOR_HOT_TILE_FORMAT>::bpp / 8;
287         }
288         mHotTileSize[SWR_ATTACHMENT_DEPTH] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_DEPTH_HOT_TILE_FORMAT>::bpp / 8;
289         mHotTileSize[SWR_ATTACHMENT_STENCIL] = KNOB_MACROTILE_X_DIM * KNOB_MACROTILE_Y_DIM * FormatTraits<KNOB_STENCIL_HOT_TILE_FORMAT>::bpp / 8;
290     }
291 
~HotTileMgr()292     ~HotTileMgr()
293     {
294         for (int x = 0; x < KNOB_NUM_HOT_TILES_X; ++x)
295         {
296             for (int y = 0; y < KNOB_NUM_HOT_TILES_Y; ++y)
297             {
298                 for (int a = 0; a < SWR_NUM_ATTACHMENTS; ++a)
299                 {
300                     FreeHotTileMem(mHotTiles[x][y].Attachment[a].pBuffer);
301                 }
302             }
303         }
304     }
305 
306     void InitializeHotTiles(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t workerId, uint32_t macroID);
307 
308     HOTTILE *GetHotTile(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1,
309         uint32_t renderTargetArrayIndex = 0);
310 
311     HOTTILE *GetHotTileNoLoad(SWR_CONTEXT* pContext, DRAW_CONTEXT* pDC, uint32_t macroID, SWR_RENDERTARGET_ATTACHMENT attachment, bool create, uint32_t numSamples = 1);
312 
313     static void ClearColorHotTile(const HOTTILE* pHotTile);
314     static void ClearDepthHotTile(const HOTTILE* pHotTile);
315     static void ClearStencilHotTile(const HOTTILE* pHotTile);
316 
317 private:
318     HotTileSet mHotTiles[KNOB_NUM_HOT_TILES_X][KNOB_NUM_HOT_TILES_Y];
319     uint32_t mHotTileSize[SWR_NUM_ATTACHMENTS];
320 
AllocHotTileMem(size_t size,uint32_t align,uint32_t numaNode)321     void* AllocHotTileMem(size_t size, uint32_t align, uint32_t numaNode)
322     {
323         void* p = nullptr;
324 #if defined(_WIN32)
325         HANDLE hProcess = GetCurrentProcess();
326         p = VirtualAllocExNuma(hProcess, nullptr, size, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE, numaNode);
327 #else
328         p = AlignedMalloc(size, align);
329 #endif
330 
331         return p;
332     }
333 
FreeHotTileMem(void * pBuffer)334     void FreeHotTileMem(void* pBuffer)
335     {
336         if (pBuffer)
337         {
338 #if defined(_WIN32)
339             VirtualFree(pBuffer, 0, MEM_RELEASE);
340 #else
341             AlignedFree(pBuffer);
342 #endif
343         }
344     }
345 };
346 
347