/*------------------------------------------------------------------------- * drawElements Quality Program OpenGL ES 3.0 Module * ------------------------------------------------- * * Copyright 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * *//*! * \file * \brief Depth buffer performance tests. *//*--------------------------------------------------------------------*/ #include "es3pDepthTests.hpp" #include "glsCalibration.hpp" #include "gluShaderProgram.hpp" #include "gluObjectWrapper.hpp" #include "gluPixelTransfer.hpp" #include "glwFunctions.hpp" #include "glwEnums.hpp" #include "tcuTestLog.hpp" #include "tcuStringTemplate.hpp" #include "tcuCPUWarmup.hpp" #include "tcuCommandLine.hpp" #include "tcuResultCollector.hpp" #include "deClock.h" #include "deString.h" #include "deMath.h" #include "deStringUtil.hpp" #include "deRandom.hpp" #include "deUniquePtr.hpp" #include #include namespace deqp { namespace gles3 { namespace Performance { namespace { using namespace glw; using de::MovePtr; using tcu::TestContext; using tcu::TestLog; using tcu::Vec4; using tcu::Vec3; using tcu::Vec2; using glu::RenderContext; using glu::ProgramSources; using glu::ShaderSource; using std::vector; using std::string; using std::map; struct Sample { deInt64 nullTime; deInt64 baseTime; deInt64 testTime; int order; int workload; }; struct SampleParams { int step; int measurement; SampleParams(int step_, int measurement_) : step(step_), measurement(measurement_) {} }; typedef vector Geometry; struct ObjectData { ProgramSources shader; Geometry geometry; ObjectData (const ProgramSources& shader_, const Geometry& geometry_) : shader(shader_), geometry(geometry_) {} }; class RenderData { public: RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log); ~RenderData (void) {}; const glu::ShaderProgram m_program; const glu::VertexArray m_vao; const glu::Buffer m_vbo; const int m_numVertices; }; RenderData::RenderData (const ObjectData& object, const glu::RenderContext& renderCtx, TestLog& log) : m_program (renderCtx, object.shader) , m_vao (renderCtx.getFunctions()) , m_vbo (renderCtx.getFunctions()) , m_numVertices (int(object.geometry.size())/4) { const glw::Functions& gl = renderCtx.getFunctions(); if (!m_program.isOk()) log << m_program; gl.bindBuffer(GL_ARRAY_BUFFER, *m_vbo); gl.bufferData(GL_ARRAY_BUFFER, object.geometry.size() * sizeof(float), &object.geometry[0], GL_STATIC_DRAW); gl.bindAttribLocation(m_program.getProgram(), 0, "a_position"); gl.bindVertexArray(*m_vao); gl.vertexAttribPointer(0, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL); gl.enableVertexAttribArray(0); gl.bindVertexArray(0); } namespace Utils { vector getFullscreenQuad (float depth) { const float data[] = { +1.0f, +1.0f, depth, 0.0f, // .w is gl_VertexId%3 since Nexus 4&5 can't handle that on their own +1.0f, -1.0f, depth, 1.0f, -1.0f, -1.0f, depth, 2.0f, -1.0f, -1.0f, depth, 0.0f, -1.0f, +1.0f, depth, 1.0f, +1.0f, +1.0f, depth, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } vector getFullscreenQuadWithGradient (float depth0, float depth1) { const float data[] = { +1.0f, +1.0f, depth0, 0.0f, +1.0f, -1.0f, depth0, 1.0f, -1.0f, -1.0f, depth1, 2.0f, -1.0f, -1.0f, depth1, 0.0f, -1.0f, +1.0f, depth1, 1.0f, +1.0f, +1.0f, depth0, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } vector getPartScreenQuad (float coverage, float depth) { const float xMax = -1.0f + 2.0f*coverage; const float data[] = { xMax, +1.0f, depth, 0.0f, xMax, -1.0f, depth, 1.0f, -1.0f, -1.0f, depth, 2.0f, -1.0f, -1.0f, depth, 0.0f, -1.0f, +1.0f, depth, 1.0f, xMax, +1.0f, depth, 2.0f, }; return vector(DE_ARRAY_BEGIN(data), DE_ARRAY_END(data)); } // Axis aligned grid. Depth of vertices is baseDepth +/- depthNoise vector getFullScreenGrid (int resolution, deUint32 seed, float baseDepth, float depthNoise, float xyNoise) { const int gridsize = resolution+1; vector vertices (gridsize*gridsize); vector retval; de::Random rng (seed); for (int y = 0; y < gridsize; y++) for (int x = 0; x < gridsize; x++) { const bool isEdge = x == 0 || y == 0 || x == resolution || y == resolution; const float x_ = float(x)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); const float y_ = float(y)/float(resolution)*2.0f - 1.0f + (isEdge ? 0.0f : rng.getFloat(-xyNoise, +xyNoise)); const float z_ = baseDepth + rng.getFloat(-depthNoise, +depthNoise); vertices[y*gridsize + x] = Vec3(x_, y_, z_); } retval.reserve(resolution*resolution*6); for (int y = 0; y < resolution; y++) for (int x = 0; x < resolution; x++) { const Vec3& p0 = vertices[(y+0)*gridsize + (x+0)]; const Vec3& p1 = vertices[(y+0)*gridsize + (x+1)]; const Vec3& p2 = vertices[(y+1)*gridsize + (x+0)]; const Vec3& p3 = vertices[(y+1)*gridsize + (x+1)]; const float temp[6*4] = { p0.x(), p0.y(), p0.z(), 0.0f, p2.x(), p2.y(), p2.z(), 1.0f, p1.x(), p1.y(), p1.z(), 2.0f, p3.x(), p3.y(), p3.z(), 0.0f, p1.x(), p1.y(), p1.z(), 1.0f, p2.x(), p2.y(), p2.z(), 2.0f, }; retval.insert(retval.end(), DE_ARRAY_BEGIN(temp), DE_ARRAY_END(temp)); } return retval; } // Outputs barycentric coordinates as v_bcoords. Otherwise a passthrough shader string getBaseVertexShader (void) { return "#version 300 es\n" "in highp vec4 a_position;\n" "out mediump vec3 v_bcoords;\n" "void main()\n" "{\n" " v_bcoords = vec3(0, 0, 0);\n" " v_bcoords[int(a_position.w)] = 1.0;\n" " gl_Position = vec4(a_position.xyz, 1.0);\n" "}\n"; } // Adds noise to coordinates based on InstanceID Outputs barycentric coordinates as v_bcoords string getInstanceNoiseVertexShader (void) { return "#version 300 es\n" "in highp vec4 a_position;\n" "out mediump vec3 v_bcoords;\n" "void main()\n" "{\n" " v_bcoords = vec3(0, 0, 0);\n" " v_bcoords[int(a_position.w)] = 1.0;\n" " vec3 noise = vec3(sin(float(gl_InstanceID)*1.05), sin(float(gl_InstanceID)*1.23), sin(float(gl_InstanceID)*1.71));\n" " gl_Position = vec4(a_position.xyz + noise * 0.005, 1.0);\n" "}\n"; } // Renders green triangles with edges highlighted. Exact shade depends on depth. string getDepthAsGreenFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // Renders green triangles with edges highlighted. Exact shade depends on depth. string getDepthAsRedFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(1,d,d,1);\n" " else\n" " fragColor = vec4(d,0,0,1);\n" "}\n"; } // Basic time waster. Renders red triangles with edges highlighted. Exact shade depends on depth. string getArithmeticWorkloadFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "uniform mediump int u_iterations;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " for (int i = 0; i params; params["GRIDRENDER_SIZE"] = de::toString(gridsize); return tcu::StringTemplate(fragSrc).specialize(params); } // A static increment to frag depth string getStaticFragDepthFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + 0.1;\n" " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // A trivial dynamic change to frag depth string getDynamicFragDepthFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + (v_bcoords.x + v_bcoords.y + v_bcoords.z)*0.05;\n" // Sum of v_bcoords components is allways 1 " if (v_bcoords.x < 0.02 || v_bcoords.y < 0.02 || v_bcoords.z < 0.02)\n" " fragColor = vec4(d,1,d,1);\n" " else\n" " fragColor = vec4(0,d,0,1);\n" "}\n"; } // A static increment to frag depth string getStaticFragDepthArithmeticWorkloadFragmentShader (void) { return "#version 300 es\n" "in mediump vec3 v_bcoords;\n" "out mediump vec4 fragColor;\n" "uniform mediump int u_iterations;\n" "void main()\n" "{\n" " mediump float d = gl_FragCoord.z;\n" " gl_FragDepth = gl_FragCoord.z + 0.1;\n" " for (int i = 0; i& samples, const string& name, const string& desc); void logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg); virtual void logAnalysis (const vector& samples) = 0; virtual void logDescription (void) = 0; virtual ObjectData genOccluderGeometry (void) const = 0; virtual ObjectData genOccludedGeometry (void) const = 0; virtual int calibrate (void) const = 0; virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const = 0; void render (const RenderData& data) const; void render (const RenderData& data, int instances) const; const RenderContext& m_renderCtx; tcu::ResultCollector m_results; enum {ITERATION_STEPS = 10, ITERATION_SAMPLES = 16}; }; BaseCase::BaseCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, desc) , m_renderCtx (renderCtx) { } BaseCase::IterateResult BaseCase::iterate (void) { typedef de::MovePtr RenderDataP; const glw::Functions& gl = m_renderCtx.getFunctions(); TestLog& log = m_testCtx.getLog(); const glu::Framebuffer framebuffer (gl); const glu::Renderbuffer renderbuffer (gl); const glu::Renderbuffer depthbuffer (gl); vector results; vector params; RenderDataP occluderData; RenderDataP occludedData; tcu::TextureLevel resultTex (tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8), RENDER_SIZE, RENDER_SIZE); int maxWorkload = 0; de::Random rng (deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed()); logDescription(); gl.bindRenderbuffer(GL_RENDERBUFFER, *renderbuffer); gl.renderbufferStorage(GL_RENDERBUFFER, GL_RGBA8, RENDER_SIZE, RENDER_SIZE); gl.bindRenderbuffer(GL_RENDERBUFFER, *depthbuffer); gl.renderbufferStorage(GL_RENDERBUFFER, GL_DEPTH24_STENCIL8, RENDER_SIZE, RENDER_SIZE); gl.bindFramebuffer(GL_FRAMEBUFFER, *framebuffer); gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, *renderbuffer); gl.framebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, *depthbuffer); gl.viewport(0, 0, RENDER_SIZE, RENDER_SIZE); gl.clearColor(0.125f, 0.25f, 0.5f, 1.0f); maxWorkload = calibrate(); // Setup data occluderData = RenderDataP(new RenderData (genOccluderGeometry(), m_renderCtx, log)); occludedData = RenderDataP(new RenderData (genOccludedGeometry(), m_renderCtx, log)); TCU_CHECK(occluderData->m_program.isOk()); TCU_CHECK(occludedData->m_program.isOk()); // Force initialization of GPU resources gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(*occluderData); render(*occludedData); glu::readPixels(m_renderCtx, 0, 0, resultTex.getAccess()); logGeometry(resultTex.getAccess(), occluderData->m_program, occludedData->m_program); params.reserve(ITERATION_STEPS*ITERATION_SAMPLES); // Setup parameters for (int step = 0; step < ITERATION_STEPS; step++) { const int workload = maxWorkload*step/ITERATION_STEPS; for (int count = 0; count < ITERATION_SAMPLES; count++) params.push_back(workload); } rng.shuffle(params.begin(), params.end()); // Render samples for (size_t ndx = 0; ndx < params.size(); ndx++) { const int workload = params[ndx]; Sample sample = renderSample(*occluderData, *occludedData, workload); sample.workload = workload; sample.order = int(ndx); results.push_back(sample); } logSamples(results, "Samples", "Samples"); logAnalysis(results); m_results.setTestContextResult(m_testCtx); return STOP; } void BaseCase::logSamples (const vector& samples, const string& name, const string& desc) { TestLog& log = m_testCtx.getLog(); bool testOnly = true; for (size_t ndx = 0; ndx < samples.size(); ndx++) { if (samples[ndx].baseTime != 0 || samples[ndx].nullTime != 0) { testOnly = false; break; } } log << TestLog::SampleList(name, desc); if (testOnly) { log << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::EndSampleInfo; for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) { const Sample& sample = samples[sampleNdx]; log << TestLog::Sample << sample.workload << sample.order << sample.testTime << TestLog::EndSample; } } else { log << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("Order", "Order of sample", "", QP_SAMPLE_VALUE_TAG_PREDICTOR) << TestLog::ValueInfo("TestTime", "Test render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::ValueInfo("NullTime", "Read pixels time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::ValueInfo("BaseTime", "Base render time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE) << TestLog::EndSampleInfo; for (size_t sampleNdx = 0; sampleNdx < samples.size(); sampleNdx++) { const Sample& sample = samples[sampleNdx]; log << TestLog::Sample << sample.workload << sample.order << sample.testTime << sample.nullTime << sample.baseTime << TestLog::EndSample; } } log << TestLog::EndSampleList; } void BaseCase::logGeometry (const tcu::ConstPixelBufferAccess& sample, const glu::ShaderProgram& occluderProg, const glu::ShaderProgram& occludedProg) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Geometry", "Geometry"); log << TestLog::Message << "Occluding geometry is green with shade dependent on depth (rgb == 0, depth, 0)" << TestLog::EndMessage; log << TestLog::Message << "Occluded geometry is red with shade dependent on depth (rgb == depth, 0, 0)" << TestLog::EndMessage; log << TestLog::Message << "Primitive edges are a lighter shade of red/green" << TestLog::EndMessage; log << TestLog::Image("Test Geometry", "Test Geometry", sample); log << TestLog::EndSection; log << TestLog::Section("Occluder", "Occluder"); log << occluderProg; log << TestLog::EndSection; log << TestLog::Section("Occluded", "Occluded"); log << occludedProg; log << TestLog::EndSection; } void BaseCase::render (const RenderData& data) const { const glw::Functions& gl = m_renderCtx.getFunctions(); gl.useProgram(data.m_program.getProgram()); gl.bindVertexArray(*data.m_vao); gl.drawArrays(GL_TRIANGLES, 0, data.m_numVertices); gl.bindVertexArray(0); } void BaseCase::render (const RenderData& data, int instances) const { const glw::Functions& gl = m_renderCtx.getFunctions(); gl.useProgram(data.m_program.getProgram()); gl.bindVertexArray(*data.m_vao); gl.drawArraysInstanced(GL_TRIANGLES, 0, data.m_numVertices, instances); gl.bindVertexArray(0); } // Render occluder once, then repeatedly render occluded geometry. Sample with multiple repetition counts & establish time per call with linear regression class RenderCountCase : public BaseCase { public: RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); ~RenderCountCase (void) {} protected: virtual void logAnalysis (const vector& samples); private: virtual int calibrate (void) const; virtual Sample renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const; }; RenderCountCase::RenderCountCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : BaseCase (testCtx, renderCtx, name, desc) { } void RenderCountCase::logAnalysis (const vector& samples) { using namespace gls; TestLog& log = m_testCtx.getLog(); int maxWorkload = 0; vector testSamples (samples.size()); for (size_t ndx = 0; ndx < samples.size(); ndx++) { const Sample& sample = samples[ndx]; testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); maxWorkload = de::max(maxWorkload, sample.workload); } { const float confidence = 0.60f; const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); const float usPerCall = testParam.coefficient; const float pxPerCall = RENDER_SIZE*RENDER_SIZE; const float pxPerUs = pxPerCall/usPerCall; const float mpxPerS = pxPerUs; log << TestLog::Section("Linear Regression", "Linear Regression"); log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; log << TestLog::Message << "Render time for scene with depth test was\n\t" << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::EndSection; log << TestLog::Section("Result", "Result"); if (testParam.coefficientConfidenceLower < 0.0f) { log << TestLog::Message << "Coefficient confidence bounds include values below 0.0, the operation likely has neglible per-pixel cost" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Pass"); } else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) { log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else { log << TestLog::Message << "Culled hidden pixels @ " << mpxPerS << "Mpx/s" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(mpxPerS, 2)); } log << TestLog::EndSection; } } Sample RenderCountCase::renderSample (const RenderData& occluder, const RenderData& occluded, int callcount) const { const glw::Functions& gl = m_renderCtx.getFunctions(); Sample sample; deUint64 now = 0; deUint64 prev = 0; deUint8 buffer[4]; // Stabilize { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluder); render(occluded, callcount); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; sample.baseTime = 0; sample.nullTime = 0; sample.workload = callcount; return sample; } int RenderCountCase::calibrate (void) const { using namespace gls; const glw::Functions& gl = m_renderCtx.getFunctions(); TestLog& log = m_testCtx.getLog(); const RenderData occluderGeometry (genOccluderGeometry(), m_renderCtx, log); const RenderData occludedGeometry (genOccludedGeometry(), m_renderCtx, log); TheilSenCalibrator calibrator (CalibratorParameters(20, // Initial workload 10, // Max iteration frames 20.0f, // Iteration shortcut threshold ms 20, // Max iterations 33.0f, // Target frame time 40.0f, // Frame time cap 1000.0f // Target measurement duration )); while (true) { switch(calibrator.getState()) { case TheilSenCalibrator::STATE_FINISHED: logCalibrationInfo(m_testCtx.getLog(), calibrator); return calibrator.getCallCount(); case TheilSenCalibrator::STATE_MEASURE: { deUint8 buffer[4]; deInt64 now; deInt64 prev; prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluderGeometry); render(occludedGeometry, calibrator.getCallCount()); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); calibrator.recordIteration(now - prev); break; } case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: calibrator.recomputeParameters(); break; default: DE_ASSERT(false); return 1; } } } // Compares time/workload gradients of same geometry with and without depth testing class RelativeChangeCase : public BaseCase { public: RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); virtual ~RelativeChangeCase (void) {} protected: Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const; virtual void logAnalysis (const vector& samples); private: int calibrate (void) const; }; RelativeChangeCase::RelativeChangeCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : BaseCase (testCtx, renderCtx, name, desc) { } int RelativeChangeCase::calibrate (void) const { using namespace gls; const glw::Functions& gl = m_renderCtx.getFunctions(); TestLog& log = m_testCtx.getLog(); const RenderData geom (genOccludedGeometry(), m_renderCtx, log); TheilSenCalibrator calibrator(CalibratorParameters( 20, // Initial workload 10, // Max iteration frames 20.0f, // Iteration shortcut threshold ms 20, // Max iterations 10.0f, // Target frame time 15.0f, // Frame time cap 1000.0f // Target measurement duration )); while (true) { switch(calibrator.getState()) { case TheilSenCalibrator::STATE_FINISHED: logCalibrationInfo(m_testCtx.getLog(), calibrator); return calibrator.getCallCount(); case TheilSenCalibrator::STATE_MEASURE: { deUint8 buffer[4]; const GLuint program = geom.m_program.getProgram(); gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), calibrator.getCallCount()); const deInt64 prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(geom); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); const deInt64 now = deGetMicroseconds(); calibrator.recordIteration(now - prev); break; } case TheilSenCalibrator::STATE_RECOMPUTE_PARAMS: calibrator.recomputeParameters(); break; default: DE_ASSERT(false); return 1; } } } Sample RelativeChangeCase::renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const { const glw::Functions& gl = m_renderCtx.getFunctions(); const GLuint program = occluded.m_program.getProgram(); Sample sample; deUint64 now = 0; deUint64 prev = 0; deUint8 buffer[4]; gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); // Warmup (this workload seems to reduce variation in following workloads) { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } // Null time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.nullTime = now - prev; } // Test time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluder); render(occluded); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; } // Base time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluder); render(occluded); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.baseTime = now - prev; } sample.workload = 0; return sample; } void RelativeChangeCase::logAnalysis (const vector& samples) { using namespace gls; TestLog& log = m_testCtx.getLog(); int maxWorkload = 0; vector nullSamples (samples.size()); vector baseSamples (samples.size()); vector testSamples (samples.size()); for (size_t ndx = 0; ndx < samples.size(); ndx++) { const Sample& sample = samples[ndx]; nullSamples[ndx] = Vec2((float)sample.workload, (float)sample.nullTime); baseSamples[ndx] = Vec2((float)sample.workload, (float)sample.baseTime); testSamples[ndx] = Vec2((float)sample.workload, (float)sample.testTime); maxWorkload = de::max(maxWorkload, sample.workload); } { const float confidence = 0.60f; const LineParametersWithConfidence nullParam = theilSenSiegelLinearRegression(nullSamples, confidence); const LineParametersWithConfidence baseParam = theilSenSiegelLinearRegression(baseSamples, confidence); const LineParametersWithConfidence testParam = theilSenSiegelLinearRegression(testSamples, confidence); if (!de::inRange(0.0f, nullParam.coefficientConfidenceLower, nullParam.coefficientConfidenceUpper)) { m_results.addResult(QP_TEST_RESULT_FAIL, "Constant operation sequence duration not constant"); log << TestLog::Message << "Constant operation sequence timing may vary as a function of workload. Result quality extremely low" << TestLog::EndMessage; } if (de::inRange(0.0f, baseParam.coefficientConfidenceLower, baseParam.coefficientConfidenceUpper)) { m_results.addResult(QP_TEST_RESULT_FAIL, "Workload has no effect on duration"); log << TestLog::Message << "Workload factor has no effect on duration of sample (smart optimizer?)" << TestLog::EndMessage; } log << TestLog::Section("Linear Regression", "Linear Regression"); log << TestLog::Message << "Offset & coefficient presented as [confidence interval min, estimate, confidence interval max]. Reported confidence interval for this test is " << confidence << TestLog::EndMessage; log << TestLog::Message << "Render time for empty scene was\n\t" << "[" << nullParam.offsetConfidenceLower << ", " << nullParam.offset << ", " << nullParam.offsetConfidenceUpper << "]us +" << "[" << nullParam.coefficientConfidenceLower << ", " << nullParam.coefficient << ", " << nullParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::Message << "Render time for scene without depth test was\n\t" << "[" << baseParam.offsetConfidenceLower << ", " << baseParam.offset << ", " << baseParam.offsetConfidenceUpper << "]us +" << "[" << baseParam.coefficientConfidenceLower << ", " << baseParam.coefficient << ", " << baseParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::Message << "Render time for scene with depth test was\n\t" << "[" << testParam.offsetConfidenceLower << ", " << testParam.offset << ", " << testParam.offsetConfidenceUpper << "]us +" << "[" << testParam.coefficientConfidenceLower << ", " << testParam.coefficient << ", " << testParam.coefficientConfidenceUpper << "]" << "us/workload" << TestLog::EndMessage; log << TestLog::EndSection; if (de::inRange(0.0f, testParam.coefficientConfidenceLower, testParam.coefficientConfidenceUpper)) { log << TestLog::Message << "Test duration not dependent on culled workload" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "0.0"); } else if (testParam.coefficientConfidenceLower < testParam.coefficientConfidenceUpper*0.25) { log << TestLog::Message << "Coefficient confidence range is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else if (baseParam.coefficientConfidenceLower < baseParam.coefficientConfidenceUpper*0.25) { log << TestLog::Message << "Coefficient confidence range for base render time is extremely large, cannot give reliable result" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, "Result confidence extremely low"); } else { log << TestLog::Message << "Test duration is dependent on culled workload" << TestLog::EndMessage; m_results.addResult(QP_TEST_RESULT_PASS, de::floatToString(de::abs(testParam.coefficient)/de::abs(baseParam.coefficient), 2)); } } } // Speed of trivial culling class BaseCostCase : public RenderCountCase { public: BaseCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RenderCountCase (testCtx, renderCtx, name, desc) {} ~BaseCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Gradient class GradientCostCase : public RenderCountCase { public: GradientCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float gradientDistance) : RenderCountCase (testCtx, renderCtx, name, desc) , m_gradientDistance (gradientDistance) { } ~GradientCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuadWithGradient(0.0f, 1.0f - m_gradientDistance); } virtual ObjectData genOccludedGeometry (void) const { return ObjectData(glu::makeVtxFragSources(Utils::getInstanceNoiseVertexShader(), Utils::getDepthAsRedFragmentShader()), Utils::getFullscreenQuadWithGradient(m_gradientDistance, 1.0f)); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The quads are tilted so that the left edge of the occluded quad has a depth of 1.0 and the right edge of the occluding quad has a depth of 0.0." << TestLog::EndMessage; log << TestLog::Message << "The quads are spaced to have a depth difference of " << m_gradientDistance << " at all points." << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } const float m_gradientDistance; }; // Constant offset to frag depth in occluder class OccluderStaticFragDepthCostCase : public RenderCountCase { public: OccluderStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccluderStaticFragDepthCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class OccluderDynamicFragDepthCostCase : public RenderCountCase { public: OccluderDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccluderDynamicFragDepthCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::fastQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Constant offset to frag depth in occluder class OccludedStaticFragDepthCostCase : public RenderCountCase { public: OccludedStaticFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccludedStaticFragDepthCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class OccludedDynamicFragDepthCostCase : public RenderCountCase { public: OccludedDynamicFragDepthCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RenderCountCase(testCtx, renderCtx, name, desc) { } ~OccludedDynamicFragDepthCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) is rendered once, the second (occluded) is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Culling speed with slightly less trivial geometry class OccludingGeometryComplexityCostCase : public RenderCountCase { public: OccludingGeometryComplexityCostCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int resolution, float xyNoise, float zNoise) : RenderCountCase (testCtx, renderCtx, name, desc) , m_resolution (resolution) , m_xyNoise (xyNoise) , m_zNoise (zNoise) { } ~OccludingGeometryComplexityCostCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return ObjectData(Utils::getBaseShader(), Utils::getFullScreenGrid(m_resolution, deInt32Hash(deStringHash(getName())) ^ m_testCtx.getCommandLine().getBaseSeed(), 0.2f, m_zNoise, m_xyNoise)); } virtual ObjectData genOccludedGeometry (void) const { return Utils::variableQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing hidden fragment culling speed" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of an occluding grid and an occluded fullsceen quad. The occluding geometry is rendered once, the occluded one is rendered repeatedly" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of times the occluded quad is rendered" << TestLog::EndMessage; log << TestLog::Message << "The time per culled pixel is estimated from the rate of change of rendering time as a function of workload" << TestLog::EndMessage; log << TestLog::EndSection; } const int m_resolution; const float m_xyNoise; const float m_zNoise; }; // Cases with varying workloads in the fragment shader class FragmentWorkloadCullCase : public RelativeChangeCase { public: FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); virtual ~FragmentWorkloadCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual void logDescription (void); }; FragmentWorkloadCullCase::FragmentWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase (testCtx, renderCtx, name, desc) { } void FragmentWorkloadCullCase::logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," "the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } // Additional workload consists of texture lookups class FragmentTextureWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc); virtual ~FragmentTextureWorkloadCullCase (void) {} virtual void init (void); virtual void deinit (void); private: typedef MovePtr TexPtr; virtual ObjectData genOccludedGeometry (void) const { return ObjectData(Utils::getTextureWorkloadShader(), Utils::getFullscreenQuad(0.8f)); } TexPtr m_texture; }; FragmentTextureWorkloadCullCase::FragmentTextureWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) { } void FragmentTextureWorkloadCullCase::init (void) { const glw::Functions& gl = m_renderCtx.getFunctions(); const int size = 128; const vector data (size*size*4, 255); m_texture = MovePtr(new glu::Texture(gl)); gl.bindTexture(GL_TEXTURE_2D, m_texture); gl.texImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, size, size, 0, GL_RGBA, GL_UNSIGNED_BYTE, &data[0]); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); } void FragmentTextureWorkloadCullCase::deinit (void) { m_texture.clear(); } // Additional workload consists of arithmetic class FragmentArithmeticWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) { } virtual ~FragmentArithmeticWorkloadCullCase (void) {} private: virtual ObjectData genOccludedGeometry (void) const { return ObjectData(Utils::getArithmeticWorkloadShader(), Utils::getFullscreenQuad(0.8f)); } }; // Contains dynamicly unused discard after a series of calculations class FragmentDiscardArithmeticWorkloadCullCase : public FragmentWorkloadCullCase { public: FragmentDiscardArithmeticWorkloadCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : FragmentWorkloadCullCase (testCtx, renderCtx, name, desc) { } virtual ~FragmentDiscardArithmeticWorkloadCullCase (void) {} private: virtual ObjectData genOccludedGeometry (void) const { return ObjectData(Utils::getArithmeticWorkloadDiscardShader(), Utils::getFullscreenQuad(0.8f)); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of culled fragment workload on render time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad uses a trivial shader," "the second (occluded) contains significant fragment shader work and a discard that is never triggers but has a dynamic condition" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Discards fragments from the occluder in a grid pattern class PartialOccluderDiscardCullCase : public RelativeChangeCase { public: PartialOccluderDiscardCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, int gridsize) : RelativeChangeCase (testCtx, renderCtx, name, desc) , m_gridsize (gridsize) { } virtual ~PartialOccluderDiscardCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getGridDiscardShader(m_gridsize), 0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of partially discarded occluder on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullsceen quads. The first (occluding) quad discards half the " "fragments in a grid pattern, the second (partially occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in depth testing halving the render time" << TestLog::EndMessage; log << TestLog::EndSection; } const int m_gridsize; }; // Trivial occluder covering part of screen class PartialOccluderCullCase : public RelativeChangeCase { public: PartialOccluderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc, float coverage) : RelativeChangeCase (testCtx, renderCtx, name, desc) , m_coverage (coverage) { } ~PartialOccluderCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return ObjectData(Utils::getBaseShader(), Utils::getPartScreenQuad(m_coverage, 0.2f)); } virtual ObjectData genOccludedGeometry (void) const {return Utils::slowQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of partial occluder on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two quads. The first (occluding) quad covers " << m_coverage*100.0f << "% of the screen, while the second (partially occluded, fullscreen) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in render time increasing proportionally with unoccluded area" << TestLog::EndMessage; log << TestLog::EndSection; } const float m_coverage; }; // Constant offset to frag depth in occluder class StaticOccluderFragDepthCullCase : public RelativeChangeCase { public: StaticOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~StaticOccluderFragDepthCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluder class DynamicOccluderFragDepthCullCase : public RelativeChangeCase { public: DynamicOccluderFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~DynamicOccluderFragDepthCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthFragmentShader(), 0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluder quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Constant offset to frag depth in occluded class StaticOccludedFragDepthCullCase : public RelativeChangeCase { public: StaticOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~StaticOccludedFragDepthCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getStaticFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a static offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluded class DynamicOccludedFragDepthCullCase : public RelativeChangeCase { public: DynamicOccludedFragDepthCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~DynamicOccludedFragDepthCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::quadWith(Utils::getDynamicFragDepthArithmeticWorkloadFragmentShader(), 0.2f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of non-default frag depth on rendering time" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The first (occluding) quad is trivial, while the second (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The occluded quad has a dynamic offset applied to gl_FragDepth" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } }; // Dynamic offset to frag depth in occluded class ReversedDepthOrderCullCase : public RelativeChangeCase { public: ReversedDepthOrderCullCase (TestContext& testCtx, const RenderContext& renderCtx, const char* name, const char* desc) : RelativeChangeCase(testCtx, renderCtx, name, desc) { } ~ReversedDepthOrderCullCase (void) {} private: virtual ObjectData genOccluderGeometry (void) const { return Utils::fastQuad(0.2f); } virtual ObjectData genOccludedGeometry (void) const { return Utils::slowQuad(0.8f); } virtual void logDescription (void) { TestLog& log = m_testCtx.getLog(); log << TestLog::Section("Description", "Test description"); log << TestLog::Message << "Testing effects of of back first rendering order on culling efficiency" << TestLog::EndMessage; log << TestLog::Message << "Geometry consists of two fullscreen quads. The second (occluding) quad is trivial, while the first (occluded) contains significant fragment shader work" << TestLog::EndMessage; log << TestLog::Message << "Workload indicates the number of iterations of dummy work done in the occluded quad's fragment shader" << TestLog::EndMessage; log << TestLog::Message << "The ratio of rendering times of this scene with/without depth testing are compared" << TestLog::EndMessage; log << TestLog::Message << "Successfull early Z-testing should result in no correlation between workload and render time" << TestLog::EndMessage; log << TestLog::EndSection; } // Rendering order of occluder & occluded is reversed, otherwise identical to parent version Sample renderSample (const RenderData& occluder, const RenderData& occluded, int workload) const { const glw::Functions& gl = m_renderCtx.getFunctions(); const GLuint program = occluded.m_program.getProgram(); Sample sample; deUint64 now = 0; deUint64 prev = 0; deUint8 buffer[4]; gl.useProgram(program); gl.uniform1i(gl.getUniformLocation(program, "u_iterations"), workload); // Warmup (this workload seems to reduce variation in following workloads) { gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); } // Null time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.nullTime = now - prev; } // Test time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.enable(GL_DEPTH_TEST); render(occluded); render(occluder); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.testTime = now - prev; } // Base time { prev = deGetMicroseconds(); gl.clear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT); gl.disable(GL_DEPTH_TEST); render(occluded); render(occluder); gl.readPixels(0, 0, 1, 1, GL_RGBA, GL_UNSIGNED_BYTE, buffer); now = deGetMicroseconds(); sample.baseTime = now - prev; } sample.workload = 0; return sample; } }; } // Anonymous DepthTests::DepthTests (Context& context) : TestCaseGroup (context, "depth", "Depth culling performance") { } void DepthTests::init (void) { TestContext& testCtx = m_context.getTestContext(); const RenderContext& renderCtx = m_context.getRenderContext(); { tcu::TestCaseGroup* const cullEfficiencyGroup = new tcu::TestCaseGroup(m_testCtx, "cull_efficiency", "Fragment cull efficiency"); addChild(cullEfficiencyGroup); { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "workload", "Workload"); cullEfficiencyGroup->addChild(group); group->addChild(new FragmentTextureWorkloadCullCase( testCtx, renderCtx, "workload_texture", "Fragment shader with texture lookup workload")); group->addChild(new FragmentArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic", "Fragment shader with arithmetic workload")); group->addChild(new FragmentDiscardArithmeticWorkloadCullCase( testCtx, renderCtx, "workload_arithmetic_discard", "Fragment shader that may discard with arithmetic workload")); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_discard", "Discard"); cullEfficiencyGroup->addChild(group); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_256", "Parts of occluder geometry discarded", 256)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_128", "Parts of occluder geometry discarded", 128)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_64", "Parts of occluder geometry discarded", 64)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_32", "Parts of occluder geometry discarded", 32)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_16", "Parts of occluder geometry discarded", 16)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_8", "Parts of occluder geometry discarded", 8)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_4", "Parts of occluder geometry discarded", 4)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_2", "Parts of occluder geometry discarded", 2)); group->addChild(new PartialOccluderDiscardCullCase(testCtx, renderCtx, "grid_1", "Parts of occluder geometry discarded", 1)); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "partial_coverage", "Partial Coverage"); cullEfficiencyGroup->addChild(group); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "100", "Occluder covering only part of occluded geometry", 1.00f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "099", "Occluder covering only part of occluded geometry", 0.99f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "095", "Occluder covering only part of occluded geometry", 0.95f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "090", "Occluder covering only part of occluded geometry", 0.90f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "080", "Occluder covering only part of occluded geometry", 0.80f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "070", "Occluder covering only part of occluded geometry", 0.70f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "050", "Occluder covering only part of occluded geometry", 0.50f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "025", "Occluder covering only part of occluded geometry", 0.25f)); group->addChild(new PartialOccluderCullCase(testCtx, renderCtx, "010", "Occluder covering only part of occluded geometry", 0.10f)); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Partial Coverage"); cullEfficiencyGroup->addChild(group); group->addChild(new StaticOccluderFragDepthCullCase( testCtx, renderCtx, "occluder_static", "")); group->addChild(new DynamicOccluderFragDepthCullCase(testCtx, renderCtx, "occluder_dynamic", "")); group->addChild(new StaticOccludedFragDepthCullCase( testCtx, renderCtx, "occluded_static", "")); group->addChild(new DynamicOccludedFragDepthCullCase(testCtx, renderCtx, "occluded_dynamic", "")); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "order", "Rendering order"); cullEfficiencyGroup->addChild(group); group->addChild(new ReversedDepthOrderCullCase(testCtx, renderCtx, "reversed", "Back to front rendering order")); } } { tcu::TestCaseGroup* const testCostGroup = new tcu::TestCaseGroup(m_testCtx, "culled_pixel_cost", "Fragment cull efficiency"); addChild(testCostGroup); { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "gradient", "Gradients with small depth differences"); testCostGroup->addChild(group); group->addChild(new BaseCostCase(testCtx, renderCtx, "flat", "")); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_050", "", 0.50f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_010", "", 0.10f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_005", "", 0.05f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_002", "", 0.02f)); group->addChild(new GradientCostCase(testCtx, renderCtx, "gradient_001", "", 0.01f)); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "occluder_geometry", "Occluders with varying geometry complexity"); testCostGroup->addChild(group); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_5", "", 5, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_15", "", 15, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_25", "", 25, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_50", "", 50, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_uniform_grid_100", "", 100, 0.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_5", "", 5, 1.0f/5.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_15", "", 15, 1.0f/15.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_25", "", 25, 1.0f/25.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_50", "", 50, 1.0f/50.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "flat_noisy_grid_100", "", 100, 1.0f/100.0f, 0.0f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_5", "", 5, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_15", "", 15, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_25", "", 25, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_50", "", 50, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_uniform_grid_100", "", 100, 0.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_5", "", 5, 1.0f/5.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_15", "", 15, 1.0f/15.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_25", "", 25, 1.0f/25.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_50", "", 50, 1.0f/50.0f, 0.2f)); group->addChild(new OccludingGeometryComplexityCostCase(testCtx, renderCtx, "uneven_noisy_grid_100", "", 100, 1.0f/100.0f, 0.2f)); } { tcu::TestCaseGroup* const group = new tcu::TestCaseGroup(m_testCtx, "frag_depth", "Modifying gl_FragDepth"); testCostGroup->addChild(group); group->addChild(new OccluderStaticFragDepthCostCase( testCtx, renderCtx, "occluder_static", "")); group->addChild(new OccluderDynamicFragDepthCostCase(testCtx, renderCtx, "occluder_dynamic", "")); group->addChild(new OccludedStaticFragDepthCostCase( testCtx, renderCtx, "occluded_static", "")); group->addChild(new OccludedDynamicFragDepthCostCase(testCtx, renderCtx, "occluded_dynamic", "")); } } } } // Performance } // gles3 } // deqp