/**************************************************************************** * Copyright (C) 2016 Intel Corporation. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. * * @file archrast.cpp * * @brief Implementation for archrast. * ******************************************************************************/ #include #include "common/os.h" #include "archrast/archrast.h" #include "archrast/eventmanager.h" #include "gen_ar_eventhandlerfile.hpp" namespace ArchRast { ////////////////////////////////////////////////////////////////////////// /// @brief struct that keeps track of depth and stencil event information struct DepthStencilStats { uint32_t earlyZTestPassCount = 0; uint32_t earlyZTestFailCount = 0; uint32_t lateZTestPassCount = 0; uint32_t lateZTestFailCount = 0; uint32_t earlyStencilTestPassCount = 0; uint32_t earlyStencilTestFailCount = 0; uint32_t lateStencilTestPassCount = 0; uint32_t lateStencilTestFailCount = 0; }; struct CStats { uint32_t clippedVerts = 0; }; struct TEStats { uint32_t inputPrims = 0; //@todo:: Change this to numPatches. Assumed: 1 patch per prim. If holds, its fine. }; struct GSStats { uint32_t inputPrimCount; uint32_t primGeneratedCount; uint32_t vertsInput; }; ////////////////////////////////////////////////////////////////////////// /// @brief Event handler that saves stat events to event files. This /// handler filters out unwanted events. class EventHandlerStatsFile : public EventHandlerFile { public: EventHandlerStatsFile(uint32_t id) : EventHandlerFile(id), mNeedFlush(false) {} // These are events that we're not interested in saving in stats event files. virtual void Handle(const Start& event) {} virtual void Handle(const End& event) {} virtual void Handle(const EarlyDepthStencilInfoSingleSample& event) { //earlyZ test compute mDSSingleSample.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSSingleSample.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //earlyStencil test compute mDSSingleSample.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSSingleSample.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const EarlyDepthStencilInfoSampleRate& event) { //earlyZ test compute mDSSampleRate.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSSampleRate.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //earlyStencil test compute mDSSampleRate.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSSampleRate.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const EarlyDepthStencilInfoNullPS& event) { //earlyZ test compute mDSNullPS.earlyZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSNullPS.earlyZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //earlyStencil test compute mDSNullPS.earlyStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSNullPS.earlyStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const LateDepthStencilInfoSingleSample& event) { //lateZ test compute mDSSingleSample.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSSingleSample.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //lateStencil test compute mDSSingleSample.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSSingleSample.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const LateDepthStencilInfoSampleRate& event) { //lateZ test compute mDSSampleRate.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSSampleRate.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //lateStencil test compute mDSSampleRate.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSSampleRate.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const LateDepthStencilInfoNullPS& event) { //lateZ test compute mDSNullPS.lateZTestPassCount += _mm_popcnt_u32(event.data.depthPassMask); mDSNullPS.lateZTestFailCount += _mm_popcnt_u32((!event.data.depthPassMask) & event.data.coverageMask); //lateStencil test compute mDSNullPS.lateStencilTestPassCount += _mm_popcnt_u32(event.data.stencilPassMask); mDSNullPS.lateStencilTestFailCount += _mm_popcnt_u32((!event.data.stencilPassMask) & event.data.coverageMask); mNeedFlush = true; } virtual void Handle(const EarlyDepthInfoPixelRate& event) { //earlyZ test compute mDSPixelRate.earlyZTestPassCount += event.data.depthPassCount; mDSPixelRate.earlyZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); mNeedFlush = true; } virtual void Handle(const LateDepthInfoPixelRate& event) { //lateZ test compute mDSPixelRate.lateZTestPassCount += event.data.depthPassCount; mDSPixelRate.lateZTestFailCount += (_mm_popcnt_u32(event.data.activeLanes) - event.data.depthPassCount); mNeedFlush = true; } // Flush cached events for this draw virtual void FlushDraw(uint32_t drawId) { if (mNeedFlush == false) return; //singleSample EventHandlerFile::Handle(EarlyZSingleSample(drawId, mDSSingleSample.earlyZTestPassCount, mDSSingleSample.earlyZTestFailCount)); EventHandlerFile::Handle(LateZSingleSample(drawId, mDSSingleSample.lateZTestPassCount, mDSSingleSample.lateZTestFailCount)); EventHandlerFile::Handle(EarlyStencilSingleSample(drawId, mDSSingleSample.earlyStencilTestPassCount, mDSSingleSample.earlyStencilTestFailCount)); EventHandlerFile::Handle(LateStencilSingleSample(drawId, mDSSingleSample.lateStencilTestPassCount, mDSSingleSample.lateStencilTestFailCount)); //sampleRate EventHandlerFile::Handle(EarlyZSampleRate(drawId, mDSSampleRate.earlyZTestPassCount, mDSSampleRate.earlyZTestFailCount)); EventHandlerFile::Handle(LateZSampleRate(drawId, mDSSampleRate.lateZTestPassCount, mDSSampleRate.lateZTestFailCount)); EventHandlerFile::Handle(EarlyStencilSampleRate(drawId, mDSSampleRate.earlyStencilTestPassCount, mDSSampleRate.earlyStencilTestFailCount)); EventHandlerFile::Handle(LateStencilSampleRate(drawId, mDSSampleRate.lateStencilTestPassCount, mDSSampleRate.lateStencilTestFailCount)); //pixelRate EventHandlerFile::Handle(EarlyZPixelRate(drawId, mDSPixelRate.earlyZTestPassCount, mDSPixelRate.earlyZTestFailCount)); EventHandlerFile::Handle(LateZPixelRate(drawId, mDSPixelRate.lateZTestPassCount, mDSPixelRate.lateZTestFailCount)); //NullPS EventHandlerFile::Handle(EarlyZNullPS(drawId, mDSNullPS.earlyZTestPassCount, mDSNullPS.earlyZTestFailCount)); EventHandlerFile::Handle(EarlyStencilNullPS(drawId, mDSNullPS.earlyStencilTestPassCount, mDSNullPS.earlyStencilTestFailCount)); //Reset Internal Counters mDSSingleSample = {}; mDSSampleRate = {}; mDSPixelRate = {}; mDSNullPS = {}; mNeedFlush = false; } virtual void Handle(const FrontendDrawEndEvent& event) { //Clipper EventHandlerFile::Handle(VertsClipped(event.data.drawId, mClipper.clippedVerts)); //Tesselator EventHandlerFile::Handle(TessPrims(event.data.drawId, mTS.inputPrims)); //Geometry Shader EventHandlerFile::Handle(GSInputPrims(event.data.drawId, mGS.inputPrimCount)); EventHandlerFile::Handle(GSPrimsGen(event.data.drawId, mGS.primGeneratedCount)); EventHandlerFile::Handle(GSVertsInput(event.data.drawId, mGS.vertsInput)); //Reset Internal Counters mClipper = {}; mTS = {}; mGS = {}; } virtual void Handle(const GSPrimInfo& event) { mGS.inputPrimCount += event.data.inputPrimCount; mGS.primGeneratedCount += event.data.primGeneratedCount; mGS.vertsInput += event.data.vertsInput; } virtual void Handle(const ClipVertexCount& event) { mClipper.clippedVerts += (_mm_popcnt_u32(event.data.primMask) * event.data.vertsPerPrim); } virtual void Handle(const TessPrimCount& event) { mTS.inputPrims += event.data.primCount; } protected: bool mNeedFlush; // Per draw stats DepthStencilStats mDSSingleSample = {}; DepthStencilStats mDSSampleRate = {}; DepthStencilStats mDSPixelRate = {}; DepthStencilStats mDSNullPS = {}; DepthStencilStats mDSOmZ = {}; CStats mClipper = {}; TEStats mTS = {}; GSStats mGS = {}; }; static EventManager* FromHandle(HANDLE hThreadContext) { return reinterpret_cast(hThreadContext); } // Construct an event manager and associate a handler with it. HANDLE CreateThreadContext(AR_THREAD type) { // Can we assume single threaded here? static std::atomic counter(0); uint32_t id = counter.fetch_add(1); EventManager* pManager = new EventManager(); EventHandlerFile* pHandler = new EventHandlerStatsFile(id); if (pManager && pHandler) { pManager->Attach(pHandler); if (type == AR_THREAD::API) { pHandler->Handle(ThreadStartApiEvent()); } else { pHandler->Handle(ThreadStartWorkerEvent()); } pHandler->MarkHeader(); return pManager; } SWR_INVALID("Failed to register thread."); return nullptr; } void DestroyThreadContext(HANDLE hThreadContext) { EventManager* pManager = FromHandle(hThreadContext); SWR_ASSERT(pManager != nullptr); delete pManager; } // Dispatch event for this thread. void Dispatch(HANDLE hThreadContext, const Event& event) { EventManager* pManager = FromHandle(hThreadContext); SWR_ASSERT(pManager != nullptr); pManager->Dispatch(event); } // Flush for this thread. void FlushDraw(HANDLE hThreadContext, uint32_t drawId) { EventManager* pManager = FromHandle(hThreadContext); SWR_ASSERT(pManager != nullptr); pManager->FlushDraw(drawId); } }