1 /****************************************************************************
2  * Copyright (C) 2016 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file archrast.cpp
24  *
25  * @brief Implementation for archrast.
26  *
27  ******************************************************************************/
28 #include <sys/stat.h>
29 
30 #include <atomic>
31 #include <map>
32 
33 #include "common/os.h"
34 #include "archrast/archrast.h"
35 #include "archrast/eventmanager.h"
36 #include "gen_ar_event.hpp"
37 #include "gen_ar_eventhandlerfile.hpp"
38 
39 namespace ArchRast
40 {
41     //////////////////////////////////////////////////////////////////////////
42     /// @brief struct that keeps track of depth and stencil event information
43     struct DepthStencilStats
44     {
45         uint32_t earlyZTestPassCount       = 0;
46         uint32_t earlyZTestFailCount       = 0;
47         uint32_t lateZTestPassCount        = 0;
48         uint32_t lateZTestFailCount        = 0;
49         uint32_t earlyStencilTestPassCount = 0;
50         uint32_t earlyStencilTestFailCount = 0;
51         uint32_t lateStencilTestPassCount  = 0;
52         uint32_t lateStencilTestFailCount  = 0;
53     };
54 
55     struct CStats
56     {
57         uint32_t trivialRejectCount;
58         uint32_t trivialAcceptCount;
59         uint32_t mustClipCount;
60     };
61 
62     struct TEStats
63     {
64         uint32_t inputPrims = 0;
65         //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine.
66     };
67 
68     struct GSStateInfo
69     {
70         uint32_t inputPrimCount;
71         uint32_t primGeneratedCount;
72         uint32_t vertsInput;
73     };
74 
75     struct RastStats
76     {
77         uint32_t rasterTiles = 0;
78     };
79 
80     struct CullStats
81     {
82         uint32_t degeneratePrimCount = 0;
83         uint32_t backfacePrimCount   = 0;
84     };
85 
86     struct AlphaStats
87     {
88         uint32_t alphaTestCount  = 0;
89         uint32_t alphaBlendCount = 0;
90     };
91 
92 
93     //////////////////////////////////////////////////////////////////////////
94     /// @brief Event handler that handles API thread events. This is shared
95     ///        between the API and its caller (e.g. driver shim) but typically
96     ///        there is only a single API thread per context. So you can save
97     ///        information in the class to be used for other events.
98     class EventHandlerApiStats : public EventHandlerFile
99     {
100     public:
EventHandlerApiStats(uint32_t id)101         EventHandlerApiStats(uint32_t id) : EventHandlerFile(id)
102         {
103 #if defined(_WIN32)
104             // Attempt to copy the events.proto file to the ArchRast output dir. It's common for
105             // tools to place the events.proto file in the DEBUG_OUTPUT_DIR when launching AR. If it
106             // exists, this will attempt to copy it the first time we get here to package it with
107             // the stats. Otherwise, the user would need to specify the events.proto location when
108             // parsing the stats in post.
109             std::stringstream eventsProtoSrcFilename, eventsProtoDstFilename;
110             eventsProtoSrcFilename << KNOB_DEBUG_OUTPUT_DIR << "\\events.proto" << std::ends;
111             eventsProtoDstFilename << mOutputDir.substr(0, mOutputDir.size() - 1)
112                                    << "\\events.proto" << std::ends;
113 
114             // If event.proto already exists, we're done; else do the copy
115             struct stat buf; // Use a Posix stat for file existence check
116             if (!stat(eventsProtoDstFilename.str().c_str(), &buf) == 0)
117             {
118                 // Now check to make sure the events.proto source exists
119                 if (stat(eventsProtoSrcFilename.str().c_str(), &buf) == 0)
120                 {
121                     std::ifstream srcFile;
122                     srcFile.open(eventsProtoSrcFilename.str().c_str(), std::ios::binary);
123                     if (srcFile.is_open())
124                     {
125                         // Just do a binary buffer copy
126                         std::ofstream dstFile;
127                         dstFile.open(eventsProtoDstFilename.str().c_str(), std::ios::binary);
128                         dstFile << srcFile.rdbuf();
129                         dstFile.close();
130                     }
131                     srcFile.close();
132                 }
133             }
134 #endif
135         }
136 
Handle(const DrawInstancedEvent & event)137         virtual void Handle(const DrawInstancedEvent& event)
138         {
139             DrawInfoEvent e(event.data.drawId,
140                             ArchRast::Instanced,
141                             event.data.topology,
142                             event.data.numVertices,
143                             0,
144                             0,
145                             event.data.startVertex,
146                             event.data.numInstances,
147                             event.data.startInstance,
148                             event.data.tsEnable,
149                             event.data.gsEnable,
150                             event.data.soEnable,
151                             event.data.soTopology,
152                             event.data.splitId);
153 
154             EventHandlerFile::Handle(e);
155         }
156 
Handle(const DrawIndexedInstancedEvent & event)157         virtual void Handle(const DrawIndexedInstancedEvent& event)
158         {
159             DrawInfoEvent e(event.data.drawId,
160                             ArchRast::IndexedInstanced,
161                             event.data.topology,
162                             0,
163                             event.data.numIndices,
164                             event.data.indexOffset,
165                             event.data.baseVertex,
166                             event.data.numInstances,
167                             event.data.startInstance,
168                             event.data.tsEnable,
169                             event.data.gsEnable,
170                             event.data.soEnable,
171                             event.data.soTopology,
172                             event.data.splitId);
173 
174             EventHandlerFile::Handle(e);
175         }
176     };
177 
178     //////////////////////////////////////////////////////////////////////////
179     /// @brief Event handler that handles worker thread events. There is one
180     ///        event handler per thread. The python script will need to sum
181     ///        up counters across all of the threads.
182     class EventHandlerWorkerStats : public EventHandlerFile
183     {
184     public:
EventHandlerWorkerStats(uint32_t id)185         EventHandlerWorkerStats(uint32_t id) : EventHandlerFile(id), mNeedFlush(false)
186         {
187             memset(mShaderStats, 0, sizeof(mShaderStats));
188         }
189 
Handle(const EarlyDepthStencilInfoSingleSample & event)190         virtual void Handle(const EarlyDepthStencilInfoSingleSample& event)
191         {
192             // earlyZ test compute
193             mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
194             mDSSingleSample.earlyZTestFailCount +=
195                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
196 
197             // earlyStencil test compute
198             mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
199             mDSSingleSample.earlyStencilTestFailCount +=
200                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
201 
202             // earlyZ test single and multi sample
203             mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
204             mDSCombined.earlyZTestFailCount +=
205                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
206 
207             // earlyStencil test single and multi sample
208             mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
209             mDSCombined.earlyStencilTestFailCount +=
210                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
211 
212             mNeedFlush = true;
213         }
214 
Handle(const EarlyDepthStencilInfoSampleRate & event)215         virtual void Handle(const EarlyDepthStencilInfoSampleRate& event)
216         {
217             // earlyZ test compute
218             mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
219             mDSSampleRate.earlyZTestFailCount +=
220                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
221 
222             // earlyStencil test compute
223             mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
224             mDSSampleRate.earlyStencilTestFailCount +=
225                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
226 
227             // earlyZ test single and multi sample
228             mDSCombined.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
229             mDSCombined.earlyZTestFailCount +=
230                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
231 
232             // earlyStencil test single and multi sample
233             mDSCombined.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
234             mDSCombined.earlyStencilTestFailCount +=
235                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
236 
237             mNeedFlush = true;
238         }
239 
Handle(const EarlyDepthStencilInfoNullPS & event)240         virtual void Handle(const EarlyDepthStencilInfoNullPS& event)
241         {
242             // earlyZ test compute
243             mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
244             mDSNullPS.earlyZTestFailCount +=
245                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
246 
247             // earlyStencil test compute
248             mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
249             mDSNullPS.earlyStencilTestFailCount +=
250                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
251             mNeedFlush = true;
252         }
253 
Handle(const LateDepthStencilInfoSingleSample & event)254         virtual void Handle(const LateDepthStencilInfoSingleSample& event)
255         {
256             // lateZ test compute
257             mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
258             mDSSingleSample.lateZTestFailCount +=
259                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
260 
261             // lateStencil test compute
262             mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
263             mDSSingleSample.lateStencilTestFailCount +=
264                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
265 
266             // lateZ test single and multi sample
267             mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
268             mDSCombined.lateZTestFailCount +=
269                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
270 
271             // lateStencil test single and multi sample
272             mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
273             mDSCombined.lateStencilTestFailCount +=
274                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
275 
276             mNeedFlush = true;
277         }
278 
Handle(const LateDepthStencilInfoSampleRate & event)279         virtual void Handle(const LateDepthStencilInfoSampleRate& event)
280         {
281             // lateZ test compute
282             mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
283             mDSSampleRate.lateZTestFailCount +=
284                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
285 
286             // lateStencil test compute
287             mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
288             mDSSampleRate.lateStencilTestFailCount +=
289                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
290 
291             // lateZ test single and multi sample
292             mDSCombined.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
293             mDSCombined.lateZTestFailCount +=
294                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
295 
296             // lateStencil test single and multi sample
297             mDSCombined.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
298             mDSCombined.lateStencilTestFailCount +=
299                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
300 
301             mNeedFlush = true;
302         }
303 
Handle(const LateDepthStencilInfoNullPS & event)304         virtual void Handle(const LateDepthStencilInfoNullPS& event)
305         {
306             // lateZ test compute
307             mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask);
308             mDSNullPS.lateZTestFailCount +=
309                 _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask);
310 
311             // lateStencil test compute
312             mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask);
313             mDSNullPS.lateStencilTestFailCount +=
314                 _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask);
315             mNeedFlush = true;
316         }
317 
Handle(const EarlyDepthInfoPixelRate & event)318         virtual void Handle(const EarlyDepthInfoPixelRate& event)
319         {
320             // earlyZ test compute
321             mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount;
322             mDSPixelRate.earlyZTestFailCount +=
323                 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
324             mNeedFlush = true;
325         }
326 
327 
Handle(const LateDepthInfoPixelRate & event)328         virtual void Handle(const LateDepthInfoPixelRate& event)
329         {
330             // lateZ test compute
331             mDSPixelRate.lateZTestPassCount += event.data.depthPassCount;
332             mDSPixelRate.lateZTestFailCount +=
333                 (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount);
334             mNeedFlush = true;
335         }
336 
337 
Handle(const ClipInfoEvent & event)338         virtual void Handle(const ClipInfoEvent& event)
339         {
340             mClipper.mustClipCount += _mm_popcnt_u32(event.data.clipMask);
341             mClipper.trivialRejectCount +=
342                 event.data.numInvocations - _mm_popcnt_u32(event.data.validMask);
343             mClipper.trivialAcceptCount +=
344                 _mm_popcnt_u32(event.data.validMask & ~event.data.clipMask);
345         }
346 
UpdateStats(SWR_SHADER_STATS * pStatTotals,const SWR_SHADER_STATS * pStatUpdate)347         void UpdateStats(SWR_SHADER_STATS* pStatTotals, const SWR_SHADER_STATS* pStatUpdate)
348         {
349             pStatTotals->numInstExecuted += pStatUpdate->numInstExecuted;
350             pStatTotals->numSampleExecuted += pStatUpdate->numSampleExecuted;
351             pStatTotals->numSampleLExecuted += pStatUpdate->numSampleLExecuted;
352             pStatTotals->numSampleBExecuted += pStatUpdate->numSampleBExecuted;
353             pStatTotals->numSampleCExecuted += pStatUpdate->numSampleCExecuted;
354             pStatTotals->numSampleCLZExecuted += pStatUpdate->numSampleCLZExecuted;
355             pStatTotals->numSampleCDExecuted += pStatUpdate->numSampleCDExecuted;
356             pStatTotals->numGather4Executed += pStatUpdate->numGather4Executed;
357             pStatTotals->numGather4CExecuted += pStatUpdate->numGather4CExecuted;
358             pStatTotals->numGather4CPOExecuted += pStatUpdate->numGather4CPOExecuted;
359             pStatTotals->numGather4CPOCExecuted += pStatUpdate->numGather4CPOCExecuted;
360             pStatTotals->numLodExecuted += pStatUpdate->numLodExecuted;
361         }
362 
Handle(const VSStats & event)363         virtual void Handle(const VSStats& event)
364         {
365             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
366             UpdateStats(&mShaderStats[SHADER_VERTEX], pStats);
367         }
368 
Handle(const GSStats & event)369         virtual void Handle(const GSStats& event)
370         {
371             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
372             UpdateStats(&mShaderStats[SHADER_GEOMETRY], pStats);
373         }
374 
Handle(const DSStats & event)375         virtual void Handle(const DSStats& event)
376         {
377             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
378             UpdateStats(&mShaderStats[SHADER_DOMAIN], pStats);
379         }
380 
Handle(const HSStats & event)381         virtual void Handle(const HSStats& event)
382         {
383             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
384             UpdateStats(&mShaderStats[SHADER_HULL], pStats);
385         }
386 
Handle(const PSStats & event)387         virtual void Handle(const PSStats& event)
388         {
389             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
390             UpdateStats(&mShaderStats[SHADER_PIXEL], pStats);
391             mNeedFlush = true;
392         }
393 
Handle(const CSStats & event)394         virtual void Handle(const CSStats& event)
395         {
396             SWR_SHADER_STATS* pStats = (SWR_SHADER_STATS*)event.data.hStats;
397             UpdateStats(&mShaderStats[SHADER_COMPUTE], pStats);
398             mNeedFlush = true;
399         }
400 
401         // Flush cached events for this draw
FlushDraw(uint32_t drawId)402         virtual void FlushDraw(uint32_t drawId)
403         {
404             if (mNeedFlush == false)
405                 return;
406 
407             EventHandlerFile::Handle(PSInfo(drawId,
408                                             mShaderStats[SHADER_PIXEL].numInstExecuted,
409                                             mShaderStats[SHADER_PIXEL].numSampleExecuted,
410                                             mShaderStats[SHADER_PIXEL].numSampleLExecuted,
411                                             mShaderStats[SHADER_PIXEL].numSampleBExecuted,
412                                             mShaderStats[SHADER_PIXEL].numSampleCExecuted,
413                                             mShaderStats[SHADER_PIXEL].numSampleCLZExecuted,
414                                             mShaderStats[SHADER_PIXEL].numSampleCDExecuted,
415                                             mShaderStats[SHADER_PIXEL].numGather4Executed,
416                                             mShaderStats[SHADER_PIXEL].numGather4CExecuted,
417                                             mShaderStats[SHADER_PIXEL].numGather4CPOExecuted,
418                                             mShaderStats[SHADER_PIXEL].numGather4CPOCExecuted,
419                                             mShaderStats[SHADER_PIXEL].numLodExecuted));
420             EventHandlerFile::Handle(CSInfo(drawId,
421                                             mShaderStats[SHADER_COMPUTE].numInstExecuted,
422                                             mShaderStats[SHADER_COMPUTE].numSampleExecuted,
423                                             mShaderStats[SHADER_COMPUTE].numSampleLExecuted,
424                                             mShaderStats[SHADER_COMPUTE].numSampleBExecuted,
425                                             mShaderStats[SHADER_COMPUTE].numSampleCExecuted,
426                                             mShaderStats[SHADER_COMPUTE].numSampleCLZExecuted,
427                                             mShaderStats[SHADER_COMPUTE].numSampleCDExecuted,
428                                             mShaderStats[SHADER_COMPUTE].numGather4Executed,
429                                             mShaderStats[SHADER_COMPUTE].numGather4CExecuted,
430                                             mShaderStats[SHADER_COMPUTE].numGather4CPOExecuted,
431                                             mShaderStats[SHADER_COMPUTE].numGather4CPOCExecuted,
432                                             mShaderStats[SHADER_COMPUTE].numLodExecuted));
433 
434             // singleSample
435             EventHandlerFile::Handle(EarlyZSingleSample(
436                 drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount));
437             EventHandlerFile::Handle(LateZSingleSample(
438                 drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount));
439             EventHandlerFile::Handle(
440                 EarlyStencilSingleSample(drawId,
441                                          mDSSingleSample.earlyStencilTestPassCount,
442                                          mDSSingleSample.earlyStencilTestFailCount));
443             EventHandlerFile::Handle(
444                 LateStencilSingleSample(drawId,
445                                         mDSSingleSample.lateStencilTestPassCount,
446                                         mDSSingleSample.lateStencilTestFailCount));
447 
448             // sampleRate
449             EventHandlerFile::Handle(EarlyZSampleRate(
450                 drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount));
451             EventHandlerFile::Handle(LateZSampleRate(
452                 drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount));
453             EventHandlerFile::Handle(
454                 EarlyStencilSampleRate(drawId,
455                                        mDSSampleRate.earlyStencilTestPassCount,
456                                        mDSSampleRate.earlyStencilTestFailCount));
457             EventHandlerFile::Handle(LateStencilSampleRate(drawId,
458                                                            mDSSampleRate.lateStencilTestPassCount,
459                                                            mDSSampleRate.lateStencilTestFailCount));
460 
461             // combined
462             EventHandlerFile::Handle(
463                 EarlyZ(drawId, mDSCombined.earlyZTestPassCount, mDSCombined.earlyZTestFailCount));
464             EventHandlerFile::Handle(
465                 LateZ(drawId, mDSCombined.lateZTestPassCount, mDSCombined.lateZTestFailCount));
466             EventHandlerFile::Handle(EarlyStencil(drawId,
467                                                   mDSCombined.earlyStencilTestPassCount,
468                                                   mDSCombined.earlyStencilTestFailCount));
469             EventHandlerFile::Handle(LateStencil(drawId,
470                                                  mDSCombined.lateStencilTestPassCount,
471                                                  mDSCombined.lateStencilTestFailCount));
472 
473             // pixelRate
474             EventHandlerFile::Handle(EarlyZPixelRate(
475                 drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount));
476             EventHandlerFile::Handle(LateZPixelRate(
477                 drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount));
478 
479 
480             // NullPS
481             EventHandlerFile::Handle(
482                 EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount));
483             EventHandlerFile::Handle(EarlyStencilNullPS(
484                 drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount));
485 
486             // Rasterized Subspans
487             EventHandlerFile::Handle(RasterTiles(drawId, rastStats.rasterTiles));
488 
489             // Alpha Subspans
490             EventHandlerFile::Handle(
491                 AlphaEvent(drawId, mAlphaStats.alphaTestCount, mAlphaStats.alphaBlendCount));
492 
493             // Primitive Culling
494             EventHandlerFile::Handle(
495                 CullEvent(drawId, mCullStats.backfacePrimCount, mCullStats.degeneratePrimCount));
496 
497             mDSSingleSample = {};
498             mDSSampleRate   = {};
499             mDSCombined     = {};
500             mDSPixelRate    = {};
501             mDSNullPS = {};
502 
503             rastStats   = {};
504             mCullStats  = {};
505             mAlphaStats = {};
506 
507             mShaderStats[SHADER_PIXEL]   = {};
508             mShaderStats[SHADER_COMPUTE] = {};
509 
510             mNeedFlush = false;
511         }
512 
Handle(const FrontendDrawEndEvent & event)513         virtual void Handle(const FrontendDrawEndEvent& event)
514         {
515             // Clipper
516             EventHandlerFile::Handle(ClipperEvent(event.data.drawId,
517                                                   mClipper.trivialRejectCount,
518                                                   mClipper.trivialAcceptCount,
519                                                   mClipper.mustClipCount));
520 
521             // Tesselator
522             EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims));
523 
524             // Geometry Shader
525             EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount));
526             EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount));
527             EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput));
528 
529             EventHandlerFile::Handle(VSInfo(event.data.drawId,
530                                             mShaderStats[SHADER_VERTEX].numInstExecuted,
531                                             mShaderStats[SHADER_VERTEX].numSampleExecuted,
532                                             mShaderStats[SHADER_VERTEX].numSampleLExecuted,
533                                             mShaderStats[SHADER_VERTEX].numSampleBExecuted,
534                                             mShaderStats[SHADER_VERTEX].numSampleCExecuted,
535                                             mShaderStats[SHADER_VERTEX].numSampleCLZExecuted,
536                                             mShaderStats[SHADER_VERTEX].numSampleCDExecuted,
537                                             mShaderStats[SHADER_VERTEX].numGather4Executed,
538                                             mShaderStats[SHADER_VERTEX].numGather4CExecuted,
539                                             mShaderStats[SHADER_VERTEX].numGather4CPOExecuted,
540                                             mShaderStats[SHADER_VERTEX].numGather4CPOCExecuted,
541                                             mShaderStats[SHADER_VERTEX].numLodExecuted));
542             EventHandlerFile::Handle(HSInfo(event.data.drawId,
543                                             mShaderStats[SHADER_HULL].numInstExecuted,
544                                             mShaderStats[SHADER_HULL].numSampleExecuted,
545                                             mShaderStats[SHADER_HULL].numSampleLExecuted,
546                                             mShaderStats[SHADER_HULL].numSampleBExecuted,
547                                             mShaderStats[SHADER_HULL].numSampleCExecuted,
548                                             mShaderStats[SHADER_HULL].numSampleCLZExecuted,
549                                             mShaderStats[SHADER_HULL].numSampleCDExecuted,
550                                             mShaderStats[SHADER_HULL].numGather4Executed,
551                                             mShaderStats[SHADER_HULL].numGather4CExecuted,
552                                             mShaderStats[SHADER_HULL].numGather4CPOExecuted,
553                                             mShaderStats[SHADER_HULL].numGather4CPOCExecuted,
554                                             mShaderStats[SHADER_HULL].numLodExecuted));
555             EventHandlerFile::Handle(DSInfo(event.data.drawId,
556                                             mShaderStats[SHADER_DOMAIN].numInstExecuted,
557                                             mShaderStats[SHADER_DOMAIN].numSampleExecuted,
558                                             mShaderStats[SHADER_DOMAIN].numSampleLExecuted,
559                                             mShaderStats[SHADER_DOMAIN].numSampleBExecuted,
560                                             mShaderStats[SHADER_DOMAIN].numSampleCExecuted,
561                                             mShaderStats[SHADER_DOMAIN].numSampleCLZExecuted,
562                                             mShaderStats[SHADER_DOMAIN].numSampleCDExecuted,
563                                             mShaderStats[SHADER_DOMAIN].numGather4Executed,
564                                             mShaderStats[SHADER_DOMAIN].numGather4CExecuted,
565                                             mShaderStats[SHADER_DOMAIN].numGather4CPOExecuted,
566                                             mShaderStats[SHADER_DOMAIN].numGather4CPOCExecuted,
567                                             mShaderStats[SHADER_DOMAIN].numLodExecuted));
568             EventHandlerFile::Handle(GSInfo(event.data.drawId,
569                                             mShaderStats[SHADER_GEOMETRY].numInstExecuted,
570                                             mShaderStats[SHADER_GEOMETRY].numSampleExecuted,
571                                             mShaderStats[SHADER_GEOMETRY].numSampleLExecuted,
572                                             mShaderStats[SHADER_GEOMETRY].numSampleBExecuted,
573                                             mShaderStats[SHADER_GEOMETRY].numSampleCExecuted,
574                                             mShaderStats[SHADER_GEOMETRY].numSampleCLZExecuted,
575                                             mShaderStats[SHADER_GEOMETRY].numSampleCDExecuted,
576                                             mShaderStats[SHADER_GEOMETRY].numGather4Executed,
577                                             mShaderStats[SHADER_GEOMETRY].numGather4CExecuted,
578                                             mShaderStats[SHADER_GEOMETRY].numGather4CPOExecuted,
579                                             mShaderStats[SHADER_GEOMETRY].numGather4CPOCExecuted,
580                                             mShaderStats[SHADER_GEOMETRY].numLodExecuted));
581 
582             mShaderStats[SHADER_VERTEX]   = {};
583             mShaderStats[SHADER_HULL]     = {};
584             mShaderStats[SHADER_DOMAIN]   = {};
585             mShaderStats[SHADER_GEOMETRY] = {};
586 
587             // Reset Internal Counters
588             mClipper = {};
589             mTS      = {};
590             mGS      = {};
591         }
592 
Handle(const GSPrimInfo & event)593         virtual void Handle(const GSPrimInfo& event)
594         {
595             mGS.inputPrimCount += event.data.inputPrimCount;
596             mGS.primGeneratedCount += event.data.primGeneratedCount;
597             mGS.vertsInput += event.data.vertsInput;
598         }
599 
Handle(const TessPrimCount & event)600         virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; }
601 
Handle(const RasterTileCount & event)602         virtual void Handle(const RasterTileCount& event)
603         {
604             rastStats.rasterTiles += event.data.rasterTiles;
605         }
606 
Handle(const CullInfoEvent & event)607         virtual void Handle(const CullInfoEvent& event)
608         {
609             mCullStats.degeneratePrimCount += _mm_popcnt_u32(
610                 event.data.validMask ^ (event.data.validMask & ~event.data.degeneratePrimMask));
611             mCullStats.backfacePrimCount += _mm_popcnt_u32(
612                 event.data.validMask ^ (event.data.validMask & ~event.data.backfacePrimMask));
613         }
614 
Handle(const AlphaInfoEvent & event)615         virtual void Handle(const AlphaInfoEvent& event)
616         {
617             mAlphaStats.alphaTestCount += event.data.alphaTestEnable;
618             mAlphaStats.alphaBlendCount += event.data.alphaBlendEnable;
619         }
620 
621     protected:
622         bool mNeedFlush;
623         // Per draw stats
624         DepthStencilStats mDSSingleSample = {};
625         DepthStencilStats mDSSampleRate   = {};
626         DepthStencilStats mDSPixelRate    = {};
627         DepthStencilStats mDSCombined     = {};
628         DepthStencilStats mDSNullPS       = {};
629         DepthStencilStats mDSOmZ          = {};
630         CStats            mClipper        = {};
631         TEStats           mTS             = {};
632         GSStateInfo       mGS             = {};
633         RastStats         rastStats       = {};
634         CullStats         mCullStats      = {};
635         AlphaStats        mAlphaStats     = {};
636 
637         SWR_SHADER_STATS mShaderStats[NUM_SHADER_TYPES];
638 
639     };
640 
FromHandle(HANDLE hThreadContext)641     static EventManager* FromHandle(HANDLE hThreadContext)
642     {
643         return reinterpret_cast<EventManager*>(hThreadContext);
644     }
645 
646     // Construct an event manager and associate a handler with it.
CreateThreadContext(AR_THREAD type)647     HANDLE CreateThreadContext(AR_THREAD type)
648     {
649         // Can we assume single threaded here?
650         static std::atomic<uint32_t> counter(0);
651         uint32_t                     id = counter.fetch_add(1);
652 
653         EventManager* pManager = new EventManager();
654 
655         if (pManager)
656         {
657             EventHandlerFile* pHandler = nullptr;
658 
659             if (type == AR_THREAD::API)
660             {
661                 pHandler = new EventHandlerApiStats(id);
662                 pManager->Attach(pHandler);
663                 pHandler->Handle(ThreadStartApiEvent());
664             }
665             else
666             {
667                 pHandler = new EventHandlerWorkerStats(id);
668                 pManager->Attach(pHandler);
669                 pHandler->Handle(ThreadStartWorkerEvent());
670             }
671 
672             pHandler->MarkHeader();
673 
674             return pManager;
675         }
676 
677         SWR_INVALID("Failed to register thread.");
678         return nullptr;
679     }
680 
DestroyThreadContext(HANDLE hThreadContext)681     void DestroyThreadContext(HANDLE hThreadContext)
682     {
683         EventManager* pManager = FromHandle(hThreadContext);
684         SWR_ASSERT(pManager != nullptr);
685 
686         delete pManager;
687     }
688 
689     // Dispatch event for this thread.
Dispatch(HANDLE hThreadContext,const Event & event)690     void Dispatch(HANDLE hThreadContext, const Event& event)
691     {
692         if (event.IsEnabled())
693         {
694             EventManager* pManager = reinterpret_cast<EventManager*>(hThreadContext);
695             SWR_ASSERT(pManager != nullptr);
696             pManager->Dispatch(event);
697         }
698     }
699 
700     // Flush for this thread.
FlushDraw(HANDLE hThreadContext,uint32_t drawId)701     void FlushDraw(HANDLE hThreadContext, uint32_t drawId)
702     {
703         EventManager* pManager = FromHandle(hThreadContext);
704         SWR_ASSERT(pManager != nullptr);
705 
706         pManager->FlushDraw(drawId);
707     }
708 } // namespace ArchRast
709