1 /****************************************************************************
2 * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file archrast.cpp
24 *
25 * @brief Implementation for archrast.
26 *
27 ******************************************************************************/
28 #include <atomic>
29 
30 #include "common/os.h"
31 #include "archrast/archrast.h"
32 #include "archrast/eventmanager.h"
33 #include "gen_ar_eventhandlerfile.hpp"
34 
35 namespace ArchRast
36 {
37     //////////////////////////////////////////////////////////////////////////
38     /// @brief struct that keeps track of depth and stencil event information
39     struct DepthStencilStats
40     {
41         uint32_t earlyZTestPassCount = 0;
42         uint32_t earlyZTestFailCount = 0;
43         uint32_t lateZTestPassCount = 0;
44         uint32_t lateZTestFailCount = 0;
45         uint32_t earlyStencilTestPassCount = 0;
46         uint32_t earlyStencilTestFailCount = 0;
47         uint32_t lateStencilTestPassCount = 0;
48         uint32_t lateStencilTestFailCount = 0;
49     };
50 
51     struct CStats
52     {
53         uint32_t clippedVerts = 0;
54     };
55 
56     struct TEStats
57     {
58         uint32_t inputPrims = 0;
59         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
60     };
61 
62     struct GSStats
63     {
64         uint32_t inputPrimCount;
65         uint32_t primGeneratedCount;
66         uint32_t vertsInput;
67     };
68 
69     //////////////////////////////////////////////////////////////////////////
70     /// @brief Event handler that saves stat events to event files. This
71     ///        handler filters out unwanted events.
72     class EventHandlerStatsFile : public EventHandlerFile
73     {
74     public:
EventHandlerStatsFile(uint32_t id)75         EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {}
76 
77         // These are events that we're not interested in saving in stats event files.
Handle(const Start & event)78         virtual void Handle(const Start& event) {}
Handle(const End & event)79         virtual void Handle(const End& event) {}
80 
Handle(const EarlyDepthStencilInfoSingleSample & event)81         virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
82         {
83             //earlyZ test compute
84             mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
85             mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
86 
87             //earlyStencil test compute
88             mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
89             mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
90             mNeedFlush = true;
91         }
92 
Handle(const EarlyDepthStencilInfoSampleRate & event)93         virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
94         {
95             //earlyZ test compute
96             mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
97             mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
98 
99             //earlyStencil test compute
100             mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
101             mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
102             mNeedFlush = true;
103         }
104 
Handle(const EarlyDepthStencilInfoNullPS & event)105         virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
106         {
107             //earlyZ test compute
108             mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
109             mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
110 
111             //earlyStencil test compute
112             mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
113             mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
114             mNeedFlush = true;
115         }
116 
Handle(const LateDepthStencilInfoSingleSample & event)117         virtual void Handle(const LateDepthStencilInfoSingleSample& event)
118         {
119             //lateZ test compute
120             mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
121             mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
122 
123             //lateStencil test compute
124             mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
125             mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
126             mNeedFlush = true;
127         }
128 
Handle(const LateDepthStencilInfoSampleRate & event)129         virtual void Handle(const LateDepthStencilInfoSampleRate& event)
130         {
131             //lateZ test compute
132             mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
133             mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
134 
135             //lateStencil test compute
136             mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
137             mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
138             mNeedFlush = true;
139         }
140 
Handle(const LateDepthStencilInfoNullPS & event)141         virtual void Handle(const LateDepthStencilInfoNullPS& event)
142         {
143             //lateZ test compute
144             mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
145             mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
146 
147             //lateStencil test compute
148             mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
149             mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
150             mNeedFlush = true;
151         }
152 
Handle(const EarlyDepthInfoPixelRate & event)153         virtual void Handle(const EarlyDepthInfoPixelRate& event)
154         {
155             //earlyZ test compute
156             mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
157             mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
158             mNeedFlush = true;
159         }
160 
161 
Handle(const LateDepthInfoPixelRate & event)162         virtual void Handle(const LateDepthInfoPixelRate& event)
163         {
164             //lateZ test compute
165             mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
166             mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
167             mNeedFlush = true;
168         }
169 
170 
171         // Flush cached events for this draw
FlushDraw(uint32_t drawId)172         virtual void FlushDraw(uint32_t drawId)
173         {
174             if (mNeedFlush == false) return;
175 
176             //singleSample
177             EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
178             EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
179             EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount));
180             EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount));
181 
182             //sampleRate
183             EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
184             EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
185             EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount));
186             EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount));
187 
188             //pixelRate
189             EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
190             EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
191 
192 
193             //NullPS
194             EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
195             EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
196 
197             //Reset Internal Counters
198             mDSSingleSample = {};
199             mDSSampleRate = {};
200             mDSPixelRate = {};
201             mDSNullPS = {};
202 
203             mNeedFlush = false;
204         }
205 
Handle(const FrontendDrawEndEvent & event)206         virtual void Handle(const FrontendDrawEndEvent& event)
207         {
208             //Clipper
209             EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts));
210 
211             //Tesselator
212             EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
213 
214             //Geometry Shader
215             EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
216             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
217             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
218 
219             //Reset Internal Counters
220             mClipper = {};
221             mTS = {};
222             mGS = {};
223         }
224 
Handle(const GSPrimInfo & event)225         virtual void Handle(const GSPrimInfo& event)
226         {
227             mGS.inputPrimCount += event.data.inputPrimCount;
228             mGS.primGeneratedCount += event.data.primGeneratedCount;
229             mGS.vertsInput += event.data.vertsInput;
230         }
231 
Handle(const ClipVertexCount & event)232         virtual void Handle(const ClipVertexCount& event)
233         {
234             mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim);
235         }
236 
Handle(const TessPrimCount & event)237         virtual void Handle(const TessPrimCount& event)
238         {
239             mTS.inputPrims += event.data.primCount;
240         }
241 
242     protected:
243         bool mNeedFlush;
244         // Per draw stats
245         DepthStencilStats mDSSingleSample = {};
246         DepthStencilStats mDSSampleRate = {};
247         DepthStencilStats mDSPixelRate = {};
248         DepthStencilStats mDSNullPS = {};
249         DepthStencilStats mDSOmZ = {};
250         CStats mClipper = {};
251         TEStats mTS = {};
252         GSStats mGS = {};
253 
254     };
255 
FromHandle(HANDLE hThreadContext)256     static EventManager* FromHandle(HANDLE hThreadContext)
257     {
258         return reinterpret_cast<EventManager*>(hThreadContext);
259     }
260 
261     // Construct an event manager and associate a handler with it.
CreateThreadContext(AR_THREAD type)262     HANDLE CreateThreadContext(AR_THREAD type)
263     {
264         // Can we assume single threaded here?
265         static std::atomic<uint32_t> counter(0);
266         uint32_t id = counter.fetch_add(1);
267 
268         EventManager* pManager = new EventManager();
269         EventHandlerFile* pHandler = new EventHandlerStatsFile(id);
270 
271         if (pManager && pHandler)
272         {
273             pManager->Attach(pHandler);
274 
275             if (type == AR_THREAD::API)
276             {
277                 pHandler->Handle(ThreadStartApiEvent());
278             }
279             else
280             {
281                 pHandler->Handle(ThreadStartWorkerEvent());
282             }
283             pHandler->MarkHeader();
284 
285             return pManager;
286         }
287 
288         SWR_INVALID("Failed to register thread.");
289         return nullptr;
290     }
291 
DestroyThreadContext(HANDLE hThreadContext)292     void DestroyThreadContext(HANDLE hThreadContext)
293     {
294         EventManager* pManager = FromHandle(hThreadContext);
295         SWR_ASSERT(pManager != nullptr);
296 
297         delete pManager;
298     }
299 
300     // Dispatch event for this thread.
Dispatch(HANDLE hThreadContext,const Event & event)301     void Dispatch(HANDLE hThreadContext, const Event& event)
302     {
303         EventManager* pManager = FromHandle(hThreadContext);
304         SWR_ASSERT(pManager != nullptr);
305 
306         pManager->Dispatch(event);
307     }
308 
309     // Flush for this thread.
FlushDraw(HANDLE hThreadContext,uint32_t drawId)310     void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
311     {
312         EventManager* pManager = FromHandle(hThreadContext);
313         SWR_ASSERT(pManager != nullptr);
314 
315         pManager->FlushDraw(drawId);
316     }
317 }
318