1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program OpenGL ES 3.0 Module
3 * -------------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Shader operator performance tests.
22 *//*--------------------------------------------------------------------*/
23
24 #include "es3pShaderOperatorTests.hpp"
25 #include "glsCalibration.hpp"
26 #include "gluShaderUtil.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "gluPixelTransfer.hpp"
29 #include "tcuTestLog.hpp"
30 #include "tcuRenderTarget.hpp"
31 #include "tcuCommandLine.hpp"
32 #include "tcuSurface.hpp"
33 #include "deStringUtil.hpp"
34 #include "deSharedPtr.hpp"
35 #include "deClock.h"
36 #include "deMath.h"
37
38 #include "glwEnums.hpp"
39 #include "glwFunctions.hpp"
40
41 #include <map>
42 #include <algorithm>
43 #include <limits>
44 #include <set>
45
46 namespace deqp
47 {
48 namespace gles3
49 {
50 namespace Performance
51 {
52
53 using namespace gls;
54 using namespace glu;
55 using tcu::Vec2;
56 using tcu::Vec4;
57 using tcu::TestLog;
58 using de::SharedPtr;
59
60 using std::string;
61 using std::vector;
62
63 #define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
64
65 // Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
66 static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3;
67 // How many different workload sizes are used by OperatorPerformanceCase.
68 static const int NUM_WORKLOADS = 8;
69 // Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
70 static const int MAX_WORKLOAD_SIZE = 1<<29;
71
72 // BinaryOpCase-specific constants for shader generation.
73 static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
74 static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT = 2;
75 static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT = 4;
76
77 // FunctionCase-specific constants for shader generation.
78 static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
79
80 static const char* const s_swizzles[][4] =
81 {
82 { "x", "yx", "yzx", "wzyx" },
83 { "y", "zy", "wyz", "xwzy" },
84 { "z", "wy", "zxy", "yzwx" },
85 { "w", "xw", "yxw", "zyxw" }
86 };
87
88 template <int N>
mean(const vector<tcu::Vector<float,N>> & data)89 static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data)
90 {
91 tcu::Vector<float, N> sum(0.0f);
92 for (int i = 0; i < (int)data.size(); i++)
93 sum += data[i];
94 return sum / tcu::Vector<float, N>((float)data.size());
95 }
96
uniformNfv(const glw::Functions & gl,int n,int location,int count,const float * data)97 static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
98 {
99 switch (n)
100 {
101 case 1: gl.uniform1fv(location, count, data); break;
102 case 2: gl.uniform2fv(location, count, data); break;
103 case 3: gl.uniform3fv(location, count, data); break;
104 case 4: gl.uniform4fv(location, count, data); break;
105 default: DE_ASSERT(false);
106 }
107 }
108
uniformNiv(const glw::Functions & gl,int n,int location,int count,const int * data)109 static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data)
110 {
111 switch (n)
112 {
113 case 1: gl.uniform1iv(location, count, data); break;
114 case 2: gl.uniform2iv(location, count, data); break;
115 case 3: gl.uniform3iv(location, count, data); break;
116 case 4: gl.uniform4iv(location, count, data); break;
117 default: DE_ASSERT(false);
118 }
119 }
120
uniformMatrixNfv(const glw::Functions & gl,int n,int location,int count,const float * data)121 static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
122 {
123 switch (n)
124 {
125 case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break;
126 case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break;
127 case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break;
128 default: DE_ASSERT(false);
129 }
130 }
131
getDataTypeFloatOrVec(int size)132 static glu::DataType getDataTypeFloatOrVec (int size)
133 {
134 return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
135 }
136
getIterationCountOrDefault(const tcu::CommandLine & cmdLine,int def)137 static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def)
138 {
139 const int cmdLineVal = cmdLine.getTestIterationCount();
140 return cmdLineVal > 0 ? cmdLineVal : def;
141 }
142
lineParamsString(const LineParameters & params)143 static string lineParamsString (const LineParameters& params)
144 {
145 return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
146 }
147
148 namespace
149 {
150
151 /*--------------------------------------------------------------------*//*!
152 * \brief Abstract class for measuring shader operator performance.
153 *
154 * This class draws multiple times with different workload sizes (set
155 * via a uniform, by subclass). Time for each frame is measured, and the
156 * slope of the workload size vs frame time data is estimated. This slope
157 * tells us the estimated increase in frame time caused by a workload
158 * increase of 1 unit (what 1 workload unit means is up to subclass).
159 *
160 * Generally, the shaders contain not just the operation we're interested
161 * in (e.g. addition) but also some other stuff (e.g. loop overhead). To
162 * eliminate this cost, we actually do the stuff described in the above
163 * paragraph with multiple programs (usually two), which contain different
164 * kinds of workload (e.g. different loop contents). Then we can (in
165 * theory) compute the cost of just one operation in a subclass-dependent
166 * manner.
167 *
168 * At this point, the result tells us the increase in frame time caused
169 * by the addition of one operation. Dividing this by the amount of
170 * draw calls in a frame, and further by the amount of vertices or
171 * fragments in a draw call, we get the time cost of one operation.
172 *
173 * In reality, there sometimes isn't just a trivial linear dependence
174 * between workload size and frame time. Instead, there tends to be some
175 * amount of initial "free" operations. That is, it may be that all
176 * workload sizes below some positive integer C yield the same frame time,
177 * and only workload sizes beyond C increase the frame time in a supposedly
178 * linear manner. Graphically, this means that there graph consists of two
179 * parts: a horizontal left part, and a linearly increasing right part; the
180 * right part starts where the left parts ends. The principal task of these
181 * tests is to look at the slope of the increasing right part. Additionally
182 * an estimate for the amount of initial free operations is calculated.
183 * Note that it is also normal to get graphs where the horizontal left part
184 * is of zero width, i.e. there are no free operations.
185 *//*--------------------------------------------------------------------*/
186 class OperatorPerformanceCase : public tcu::TestCase
187 {
188 public:
189 enum CaseType
190 {
191 CASETYPE_VERTEX = 0,
192 CASETYPE_FRAGMENT,
193
194 CASETYPE_LAST
195 };
196
197 struct InitialCalibration
198 {
199 int initialNumCalls;
InitialCalibrationdeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::InitialCalibration200 InitialCalibration (void) : initialNumCalls(1) {}
201 };
202
203 typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
204
205 OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
206 CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage);
207 ~OperatorPerformanceCase (void);
208
209 void init (void);
210 void deinit (void);
211
212 IterateResult iterate (void);
213
214 struct AttribSpec
215 {
AttribSpecdeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::AttribSpec216 AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_)
217 : name (name_)
218 , p00 (p00_)
219 , p01 (p01_)
220 , p10 (p10_)
221 , p11 (p11_)
222 {
223 }
224
AttribSpecdeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::AttribSpec225 AttribSpec (void) {}
226
227 std::string name;
228 tcu::Vec4 p00; //!< Bottom left.
229 tcu::Vec4 p01; //!< Bottom right.
230 tcu::Vec4 p10; //!< Top left.
231 tcu::Vec4 p11; //!< Top right.
232 };
233
234 protected:
235 struct ProgramContext
236 {
237 string vertShaderSource;
238 string fragShaderSource;
239 vector<AttribSpec> attributes;
240
241 string description;
242
ProgramContextdeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::ProgramContext243 ProgramContext (void) {}
ProgramContextdeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::ProgramContext244 ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc)
245 : vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {}
246 };
247
248 virtual vector<ProgramContext> generateProgramData (void) const = 0;
249 //! Sets program-specific uniforms that don't depend on the workload size.
250 virtual void setGeneralUniforms (deUint32 program) const = 0;
251 //! Sets the uniform(s) that specifies the workload size in the shader.
252 virtual void setWorkloadSizeUniform (deUint32 program, int workload) const = 0;
253 //! Computes the cost of a single operation, given the workload costs per program.
254 virtual float computeSingleOperationTime (const vector<float>& perProgramWorkloadCosts) const = 0;
255 //! Logs a human-readable description of what computeSingleOperationTime does.
256 virtual void logSingleOperationCalculationInfo (void) const = 0;
257
258 glu::RenderContext& m_renderCtx;
259
260 CaseType m_caseType;
261
262 private:
263 enum State
264 {
265 STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1.
266 STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
267 STATE_MEASURING, //!< Do actual measurements, for each program in m_programs.
268 STATE_REPORTING, //!< Measurements are done; calculate results and log.
269 STATE_FINISHED, //!< All done.
270
271 STATE_LAST
272 };
273
274 struct WorkloadRecord
275 {
276 int workloadSize;
277 vector<float> frameTimes; //!< In microseconds.
278
WorkloadRecorddeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::WorkloadRecord279 WorkloadRecord (int workloadSize_) : workloadSize(workloadSize_) {}
operator <deqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::WorkloadRecord280 bool operator< (const WorkloadRecord& other) const { return this->workloadSize < other.workloadSize; }
addFrameTimedeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::WorkloadRecord281 void addFrameTime (float time) { frameTimes.push_back(time); }
getMedianTimedeqp::gles3::Performance::__anonf1dfc9820111::OperatorPerformanceCase::WorkloadRecord282 float getMedianTime (void) const
283 {
284 vector<float> times = frameTimes;
285 std::sort(times.begin(), times.end());
286 return times.size() % 2 == 0 ?
287 (times[times.size()/2-1] + times[times.size()/2])*0.5f :
288 times[times.size()/2];
289 }
290 };
291
292 void prepareProgram (int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx].
293 void prepareWorkload (int progNdx, int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
294 void prepareNextRound (void); //!< Increases workload and/or updates m_state.
295 void render (int numDrawCalls);
296 deUint64 renderAndMeasure (int numDrawCalls);
297 void adjustAndLogGridAndViewport (void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
298
299 vector<Vec2> getWorkloadMedianDataPoints (int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
300
301 const int m_numMeasurementsPerWorkload;
302 const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program.
303
304 int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1.
305
306 int m_workloadMeasurementNdx;
307 vector<vector<WorkloadRecord> > m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
308 vector<vector<WorkloadRecord> > m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
309
310 State m_state;
311 int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
312
313 vector<int> m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
314
315 TheilSenCalibrator m_calibrator;
316 InitialCalibrationStorage m_initialCalibrationStorage;
317
318 int m_viewportWidth;
319 int m_viewportHeight;
320 int m_gridSizeX;
321 int m_gridSizeY;
322
323 vector<ProgramContext> m_programData;
324 vector<SharedPtr<ShaderProgram> > m_programs;
325
326 std::vector<deUint32> m_attribBuffers;
327 };
328
triangleInterpolate(float v0,float v1,float v2,float x,float y)329 static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
330 {
331 return v0 + (v2-v0)*x + (v1-v0)*y;
332 }
333
triQuadInterpolate(float x,float y,const tcu::Vec4 & quad)334 static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
335 {
336 // \note Top left fill rule.
337 if (x + y < 1.0f)
338 return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
339 else
340 return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
341 }
342
getNumVertices(int gridSizeX,int gridSizeY)343 static inline int getNumVertices (int gridSizeX, int gridSizeY)
344 {
345 return gridSizeX * gridSizeY * 2 * 3;
346 }
347
generateVertices(std::vector<float> & dst,int gridSizeX,int gridSizeY,const OperatorPerformanceCase::AttribSpec & spec)348 static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec)
349 {
350 const int numComponents = 4;
351
352 DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
353 dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
354
355 {
356 int dstNdx = 0;
357
358 for (int baseY = 0; baseY < gridSizeY; baseY++)
359 for (int baseX = 0; baseX < gridSizeX; baseX++)
360 {
361 const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
362 const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
363 const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
364 const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
365
366 #define ADD_VERTEX(XF, YF) \
367 for (int compNdx = 0; compNdx < numComponents; compNdx++) \
368 dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
369
370 ADD_VERTEX(xf0, yf0);
371 ADD_VERTEX(xf1, yf0);
372 ADD_VERTEX(xf0, yf1);
373
374 ADD_VERTEX(xf1, yf0);
375 ADD_VERTEX(xf1, yf1);
376 ADD_VERTEX(xf0, yf1);
377
378 #undef ADD_VERTEX
379 }
380 }
381 }
382
intersectionX(const gls::LineParameters & a,const gls::LineParameters & b)383 static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b)
384 {
385 return (a.offset - b.offset) / (b.coefficient - a.coefficient);
386 }
387
numDistinctX(const vector<Vec2> & data)388 static int numDistinctX (const vector<Vec2>& data)
389 {
390 std::set<float> xs;
391 for (int i = 0; i < (int)data.size(); i++)
392 xs.insert(data[i].x());
393 return (int)xs.size();
394 }
395
simpleLinearRegression(const vector<Vec2> & data)396 static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data)
397 {
398 const Vec2 mid = mean(data);
399
400 float slopeNumerator = 0.0f;
401 float slopeDenominator = 0.0f;
402
403 for (int i = 0; i < (int)data.size(); i++)
404 {
405 const Vec2 diff = data[i] - mid;
406
407 slopeNumerator += diff.x()*diff.y();
408 slopeDenominator += diff.x()*diff.x();
409 }
410
411 const float slope = slopeNumerator / slopeDenominator;
412 const float offset = mid.y() - slope*mid.x();
413
414 return gls::LineParameters(offset, slope);
415 }
416
simpleLinearRegressionError(const vector<Vec2> & data)417 static float simpleLinearRegressionError (const vector<Vec2>& data)
418 {
419 if (numDistinctX(data) <= 2)
420 return 0.0f;
421 else
422 {
423 const gls::LineParameters estimator = simpleLinearRegression(data);
424 float error = 0.0f;
425
426 for (int i = 0; i < (int)data.size(); i++)
427 {
428 const float estY = estimator.offset + estimator.coefficient*data[i].x();
429 const float diff = estY - data[i].y();
430 error += diff*diff;
431 }
432
433 return error / (float)data.size();
434 }
435 }
436
verticalVariance(const vector<Vec2> & data)437 static float verticalVariance (const vector<Vec2>& data)
438 {
439 if (numDistinctX(data) <= 2)
440 return 0.0f;
441 else
442 {
443 const float meanY = mean(data).y();
444 float error = 0.0f;
445
446 for (int i = 0; i < (int)data.size(); i++)
447 {
448 const float diff = meanY - data[i].y();
449 error += diff*diff;
450 }
451
452 return error / (float)data.size();
453 }
454 }
455
456 /*--------------------------------------------------------------------*//*!
457 * \brief Find the x coord that divides the input data into two slopes.
458 *
459 * The operator performance measurements tend to produce results where
460 * we get small operation counts "for free" (e.g. because the operations
461 * are performed during some memory transfer overhead or something),
462 * resulting in a curve with two parts: an initial horizontal line segment,
463 * and a rising line.
464 *
465 * This function finds the x coordinate that divides the input data into
466 * two parts such that the sum of the mean square errors for the
467 * least-squares estimated lines for the two parts is minimized, under the
468 * additional condition that the left line is horizontal.
469 *
470 * This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
471 * is the right line, and the rest of data is the left line.
472 *//*--------------------------------------------------------------------*/
findSlopePivotX(const vector<Vec2> & data)473 static float findSlopePivotX (const vector<Vec2>& data)
474 {
475 std::set<float> xCoords;
476 for (int i = 0; i < (int)data.size(); i++)
477 xCoords.insert(data[i].x());
478
479 float lowestError = std::numeric_limits<float>::infinity();
480 float bestPivotX = -std::numeric_limits<float>::infinity();
481
482 for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
483 {
484 vector<Vec2> leftData;
485 vector<Vec2> rightData;
486 for (int i = 0; i < (int)data.size(); i++)
487 {
488 if (data[i].x() < *pivotX)
489 leftData.push_back(data[i]);
490 else
491 rightData.push_back(data[i]);
492 }
493
494 if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
495 break;
496
497 {
498 const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
499
500 if (totalError < lowestError)
501 {
502 lowestError = totalError;
503 bestPivotX = *pivotX;
504 }
505 }
506 }
507
508 DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
509
510 return bestPivotX;
511 }
512
513 struct SegmentedEstimator
514 {
515 float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
516 gls::LineParameters left;
517 gls::LineParameters right;
SegmentedEstimatordeqp::gles3::Performance::__anonf1dfc9820111::SegmentedEstimator518 SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {}
519 };
520
521 /*--------------------------------------------------------------------*//*!
522 * \brief Compute line estimators for (potentially) two-segment data.
523 *
524 * Splits the given data into left and right parts (using findSlopePivotX)
525 * and returns the line estimates for them.
526 *
527 * Sometimes, however (especially in fragment shader cases) the data is
528 * in fact not segmented, but a straight line. This function attempts to
529 * detect if this the case, and if so, sets left.offset = right.offset and
530 * left.slope = 0, meaning essentially that the initial "flat" part of the
531 * data has zero width.
532 *//*--------------------------------------------------------------------*/
computeSegmentedEstimator(const vector<Vec2> & data)533 static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data)
534 {
535 const float pivotX = findSlopePivotX(data);
536 vector<Vec2> leftData;
537 vector<Vec2> rightData;
538
539 for (int i = 0; i < (int)data.size(); i++)
540 {
541 if (data[i].x() < pivotX)
542 leftData.push_back(data[i]);
543 else
544 rightData.push_back(data[i]);
545 }
546
547 {
548 const gls::LineParameters leftLine = gls::theilSenLinearRegression(leftData);
549 const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData);
550
551 if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f)
552 {
553 // Left data doesn't seem credible; assume the data is just a single line.
554 const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
555 return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity());
556 }
557 else
558 return SegmentedEstimator(leftLine, rightLine, pivotX);
559 }
560 }
561
OperatorPerformanceCase(tcu::TestContext & testCtx,glu::RenderContext & renderCtx,const char * name,const char * description,CaseType caseType,int numWorkloads,const InitialCalibrationStorage & initialCalibrationStorage)562 OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
563 CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage)
564 : tcu::TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
565 , m_renderCtx (renderCtx)
566 , m_caseType (caseType)
567 , m_numMeasurementsPerWorkload (getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
568 , m_numWorkloads (numWorkloads)
569 , m_workloadNdx (-1)
570 , m_workloadMeasurementNdx (-1)
571 , m_state (STATE_LAST)
572 , m_measureProgramNdx (-1)
573 , m_initialCalibrationStorage (initialCalibrationStorage)
574 , m_viewportWidth (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
575 , m_viewportHeight (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
576 , m_gridSizeX (caseType == CASETYPE_FRAGMENT ? 1 : 100)
577 , m_gridSizeY (caseType == CASETYPE_FRAGMENT ? 1 : 100)
578 {
579 DE_ASSERT(m_numWorkloads > 0);
580 }
581
~OperatorPerformanceCase(void)582 OperatorPerformanceCase::~OperatorPerformanceCase (void)
583 {
584 if (!m_attribBuffers.empty())
585 {
586 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
587 m_attribBuffers.clear();
588 }
589 }
590
logRenderTargetInfo(TestLog & log,const tcu::RenderTarget & renderTarget)591 static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget)
592 {
593 log << TestLog::Section("RenderTarget", "Render target")
594 << TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage
595 << TestLog::Message << "bits:"
596 << " R" << renderTarget.getPixelFormat().redBits
597 << " G" << renderTarget.getPixelFormat().greenBits
598 << " B" << renderTarget.getPixelFormat().blueBits
599 << " A" << renderTarget.getPixelFormat().alphaBits
600 << " D" << renderTarget.getDepthBits()
601 << " S" << renderTarget.getStencilBits()
602 << TestLog::EndMessage;
603
604 if (renderTarget.getNumSamples() != 0)
605 log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
606 else
607 log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
608
609 log << TestLog::EndSection;
610 }
611
getWorkloadMedianDataPoints(int progNdx) const612 vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const
613 {
614 const vector<WorkloadRecord>& records = m_workloadRecords[progNdx];
615 vector<Vec2> result;
616
617 for (int i = 0; i < (int)records.size(); i++)
618 result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
619
620 return result;
621 }
622
prepareProgram(int progNdx)623 void OperatorPerformanceCase::prepareProgram (int progNdx)
624 {
625 DE_ASSERT(progNdx < (int)m_programs.size());
626 DE_ASSERT(m_programData.size() == m_programs.size());
627
628 const glw::Functions& gl = m_renderCtx.getFunctions();
629 const ShaderProgram& program = *m_programs[progNdx];
630
631 vector<AttribSpec> attributes = m_programData[progNdx].attributes;
632
633 attributes.push_back(AttribSpec("a_position",
634 Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
635 Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
636 Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
637 Vec4( 1.0f, 1.0f, 0.0f, 1.0f)));
638
639 DE_ASSERT(program.isOk());
640
641 // Generate vertices.
642 if (!m_attribBuffers.empty())
643 gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
644 m_attribBuffers.resize(attributes.size(), 0);
645 gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
646 GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
647
648 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
649 {
650 std::vector<float> vertices;
651 generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
652
653 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
654 gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
655 GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
656 }
657
658 // Setup attribute bindings.
659 for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
660 {
661 int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
662
663 if (location >= 0)
664 {
665 gl.enableVertexAttribArray(location);
666 gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
667 gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
668 }
669 }
670 GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
671
672 gl.useProgram(program.getProgram());
673 setGeneralUniforms(program.getProgram());
674 gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
675 }
676
prepareWorkload(int progNdx,int workload)677 void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload)
678 {
679 setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
680 render(m_calibrator.getCallCount());
681 }
682
prepareNextRound(void)683 void OperatorPerformanceCase::prepareNextRound (void)
684 {
685 DE_ASSERT(m_state == STATE_CALIBRATING ||
686 m_state == STATE_FIND_HIGH_WORKLOAD ||
687 m_state == STATE_MEASURING);
688
689 TestLog& log = m_testCtx.getLog();
690
691 if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
692 {
693 m_measureProgramNdx = 0;
694 m_state = STATE_FIND_HIGH_WORKLOAD;
695 }
696
697 if (m_state == STATE_CALIBRATING)
698 prepareWorkload(0, 1);
699 else if (m_state == STATE_FIND_HIGH_WORKLOAD)
700 {
701 vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx];
702
703 if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime())
704 {
705 int workloadSize;
706
707 if (records.empty())
708 workloadSize = 1;
709 else
710 {
711 workloadSize = records.back().workloadSize*2;
712
713 if (workloadSize > MAX_WORKLOAD_SIZE)
714 {
715 log << TestLog::Message << "Even workload size " << records.back().workloadSize
716 << " doesn't give high enough frame time for program " << m_measureProgramNdx
717 << ". Can't get sensible result." << TestLog::EndMessage;
718 MEASUREMENT_FAIL();
719 }
720 }
721
722 records.push_back(WorkloadRecord(workloadSize));
723 prepareWorkload(0, workloadSize);
724 m_workloadMeasurementNdx = 0;
725 }
726 else
727 {
728 m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
729 m_measureProgramNdx++;
730
731 if (m_measureProgramNdx >= (int)m_programs.size())
732 {
733 m_state = STATE_MEASURING;
734 m_workloadNdx = -1;
735 m_measureProgramNdx = 0;
736 }
737
738 prepareProgram(m_measureProgramNdx);
739 prepareNextRound();
740 }
741 }
742 else
743 {
744 m_workloadNdx++;
745
746 if (m_workloadNdx < m_numWorkloads)
747 {
748 DE_ASSERT(m_numWorkloads > 1);
749 const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx];
750 const int workload = highWorkload > m_numWorkloads ?
751 1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) :
752 1 + m_workloadNdx;
753
754 prepareWorkload(m_measureProgramNdx, workload);
755
756 m_workloadMeasurementNdx = 0;
757
758 m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
759 }
760 else
761 {
762 m_measureProgramNdx++;
763
764 if (m_measureProgramNdx < (int)m_programs.size())
765 {
766 m_workloadNdx = -1;
767 m_workloadMeasurementNdx = 0;
768 prepareProgram(m_measureProgramNdx);
769 prepareNextRound();
770 }
771 else
772 m_state = STATE_REPORTING;
773 }
774 }
775 }
776
init(void)777 void OperatorPerformanceCase::init (void)
778 {
779 TestLog& log = m_testCtx.getLog();
780 const glw::Functions& gl = m_renderCtx.getFunctions();
781
782 // Validate that we have sane grid and viewport setup.
783 DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
784 TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
785 de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
786
787 logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
788
789 log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
790 gl.enable(GL_BLEND);
791 gl.blendEquation(GL_FUNC_ADD);
792 gl.blendFunc(GL_ONE, GL_ONE);
793
794 // Generate programs.
795 DE_ASSERT(m_programs.empty());
796 m_programData = generateProgramData();
797 DE_ASSERT(!m_programData.empty());
798
799 for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
800 {
801 const string& vert = m_programData[progNdx].vertShaderSource;
802 const string& frag = m_programData[progNdx].fragShaderSource;
803
804 m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
805
806 if (!m_programs.back()->isOk())
807 {
808 log << *m_programs.back();
809 TCU_FAIL("Compile failed");
810 }
811 }
812
813 // Log all programs.
814 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
815 log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
816 << TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage
817 << *m_programs[progNdx]
818 << TestLog::EndSection;
819
820 m_highWorkloadSizes.resize(m_programData.size());
821 m_workloadRecordsFindHigh.resize(m_programData.size());
822 m_workloadRecords.resize(m_programData.size());
823
824 m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
825 1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
826 m_state = STATE_CALIBRATING;
827
828 prepareProgram(0);
829 prepareNextRound();
830 }
831
deinit(void)832 void OperatorPerformanceCase::deinit (void)
833 {
834 if (!m_attribBuffers.empty())
835 {
836 m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
837 m_attribBuffers.clear();
838 }
839
840 m_programs.clear();
841 }
842
render(int numDrawCalls)843 void OperatorPerformanceCase::render (int numDrawCalls)
844 {
845 const glw::Functions& gl = m_renderCtx.getFunctions();
846 const int numVertices = getNumVertices(m_gridSizeX, m_gridSizeY);
847
848 for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
849 gl.drawArrays(GL_TRIANGLES, 0, numVertices);
850
851 glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
852 }
853
renderAndMeasure(int numDrawCalls)854 deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls)
855 {
856 const deUint64 startTime = deGetMicroseconds();
857 render(numDrawCalls);
858 return deGetMicroseconds() - startTime;
859 }
860
adjustAndLogGridAndViewport(void)861 void OperatorPerformanceCase::adjustAndLogGridAndViewport (void)
862 {
863 TestLog& log = m_testCtx.getLog();
864
865 // If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
866 if (m_calibrator.getCallCount() == 1)
867 {
868 const gls::MeasureState& calibratorMeasure = m_calibrator.getMeasureState();
869 const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
870 const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs;
871 const float targetRatio = targetDrawCallTime / drawCallTime;
872
873 if (targetRatio < 0.95f)
874 {
875 // Reduce grid or viewport size assuming draw call time scales proportionally.
876 if (m_caseType == CASETYPE_VERTEX)
877 {
878 const float targetRatioSqrt = deFloatSqrt(targetRatio);
879 m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX);
880 m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY);
881 TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times");
882 log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
883 }
884 else
885 {
886 const float targetRatioSqrt = deFloatSqrt(targetRatio);
887 m_viewportWidth = (int)(targetRatioSqrt * (float)m_viewportWidth);
888 m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight);
889 TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times");
890 log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
891 }
892 }
893 }
894
895 prepareProgram(0);
896
897 // Log grid and viewport sizes.
898 log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
899 log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
900 }
901
iterate(void)902 OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void)
903 {
904 const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
905
906 if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
907 {
908 if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
909 m_calibrator.recomputeParameters();
910 else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
911 m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
912 else
913 DE_ASSERT(false);
914
915 if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
916 {
917 logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
918 adjustAndLogGridAndViewport();
919 prepareNextRound();
920 m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
921 }
922 }
923 else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
924 {
925 if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
926 {
927 vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx];
928 records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
929 m_workloadMeasurementNdx++;
930 }
931 else
932 prepareNextRound();
933 }
934 else
935 {
936 DE_ASSERT(m_state == STATE_REPORTING);
937
938 TestLog& log = m_testCtx.getLog();
939 const int drawCallCount = m_calibrator.getCallCount();
940
941 {
942 // Compute per-program estimators for measurements.
943 vector<SegmentedEstimator> estimators;
944 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
945 estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
946
947 // Log measurements and their estimators for all programs.
948 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
949 {
950 const SegmentedEstimator& estimator = estimators[progNdx];
951 const string progNdxStr = de::toString(progNdx);
952 vector<WorkloadRecord> records = m_workloadRecords[progNdx];
953 std::sort(records.begin(), records.end());
954
955 {
956 const tcu::ScopedLogSection section(log,
957 "Program" + progNdxStr + "Measurements",
958 "Measurements for program " + progNdxStr);
959
960 // Sample list of individual frame times.
961
962 log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times")
963 << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
964 << TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
965 << TestLog::EndSampleInfo;
966
967 for (int i = 0; i < (int)records.size(); i++)
968 for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
969 log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample;
970
971 log << TestLog::EndSampleList;
972
973 // Sample list of median frame times.
974
975 log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
976 << TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
977 << TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
978 << TestLog::EndSampleInfo;
979
980 for (int i = 0; i < (int)records.size(); i++)
981 log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample;
982
983 log << TestLog::EndSampleList;
984
985 log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
986
987 if (estimator.pivotX > -std::numeric_limits<float>::infinity())
988 log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX
989 << " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage
990 << TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left)
991 << " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage;
992 else
993 log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage;
994 }
995 }
996
997 for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
998 {
999 if (estimators[progNdx].right.coefficient <= 0.0f)
1000 {
1001 log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
1002 MEASUREMENT_FAIL();
1003 }
1004 }
1005
1006 // \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
1007 // incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
1008 // of R.
1009 //
1010 // The measurements of any single program can't tell us the final result (time of single operation),
1011 // so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
1012 // subclass-defined manner.
1013 //
1014 // After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
1015
1016 {
1017 vector<float> perProgramSlopes;
1018 for (int i = 0; i < (int)m_programs.size(); i++)
1019 perProgramSlopes.push_back(estimators[i].right.coefficient);
1020
1021 logSingleOperationCalculationInfo();
1022
1023 const float maxSlope = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
1024 const float usecsPerFramePerOp = computeSingleOperationTime(perProgramSlopes);
1025 const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ?
1026 getNumVertices(m_gridSizeX, m_gridSizeY) :
1027 m_viewportWidth*m_viewportHeight;
1028 const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount;
1029 const double usecsPerSingleOp = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
1030 const double megaOpsPerSecond = (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp;
1031 const int numFreeOps = de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left,
1032 LineParameters(estimators[0].right.offset,
1033 usecsPerFramePerOp))));
1034
1035 log << TestLog::Integer("VertexOrFragmentCount",
1036 "R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count",
1037 "", QP_KEY_TAG_NONE, vertexOrFragmentCount)
1038
1039 << TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount)
1040
1041 << TestLog::Integer("VerticesOrFragmentsPerFrame",
1042 "R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame",
1043 "", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount)
1044
1045 << TestLog::Float("TimePerFramePerOp",
1046 "Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments")
1047 + " (i.e. one frame) with one shader operation",
1048 "us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
1049
1050 << TestLog::Float("TimePerDrawcallPerOp",
1051 "Estimated cost of one draw call with one shader operation",
1052 "us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
1053
1054 << TestLog::Float("TimePerSingleOp",
1055 "Estimated cost of a single shader operation",
1056 "us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
1057
1058 // \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
1059 // for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
1060 // following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
1061 if (usecsPerFramePerOp <= -0.1f*maxSlope)
1062 {
1063 log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
1064 MEASUREMENT_FAIL();
1065 }
1066 else if (usecsPerFramePerOp <= 0.001*maxSlope)
1067 {
1068 log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage;
1069 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
1070 }
1071 else
1072 {
1073 log << TestLog::Float("OpsPerSecond",
1074 "Operations per second",
1075 "Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond)
1076
1077 << TestLog::Integer("NumFreeOps",
1078 "Estimated number of \"free\" operations",
1079 "", QP_KEY_TAG_PERFORMANCE, numFreeOps);
1080
1081 m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
1082 }
1083
1084 m_state = STATE_FINISHED;
1085 }
1086 }
1087
1088 return STOP;
1089 }
1090
1091 return CONTINUE;
1092 }
1093
1094 // Binary operator case.
1095 class BinaryOpCase : public OperatorPerformanceCase
1096 {
1097 public:
1098 BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
1099 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration);
1100
1101 protected:
1102 vector<ProgramContext> generateProgramData (void) const;
1103 void setGeneralUniforms (deUint32 program) const;
1104 void setWorkloadSizeUniform (deUint32 program, int numOperations) const;
1105 float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const;
1106 void logSingleOperationCalculationInfo (void) const;
1107
1108 private:
1109 enum ProgramID
1110 {
1111 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1112 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1113 PROGRAM_WITH_BIGGER_LOOP = 0,
1114 PROGRAM_WITH_SMALLER_LOOP,
1115
1116 PROGRAM_LAST
1117 };
1118
1119 ProgramContext generateSingleProgramData (ProgramID) const;
1120
1121 const string m_op;
1122 const glu::DataType m_type;
1123 const glu::Precision m_precision;
1124 const bool m_useSwizzle;
1125 };
1126
BinaryOpCase(Context & context,const char * name,const char * description,const char * op,glu::DataType type,glu::Precision precision,bool useSwizzle,bool isVertex,const InitialCalibrationStorage & initialCalibration)1127 BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
1128 glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration)
1129 : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description,
1130 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1131 , m_op (op)
1132 , m_type (type)
1133 , m_precision (precision)
1134 , m_useSwizzle (useSwizzle)
1135 {
1136 }
1137
generateSingleProgramData(ProgramID programID) const1138 BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const
1139 {
1140 DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
1141
1142 const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
1143 const char* const precision = glu::getPrecisionName(m_precision);
1144 const char* const inputPrecision = glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
1145 const char* const typeName = getDataTypeName(m_type);
1146
1147 std::ostringstream vtx;
1148 std::ostringstream frag;
1149 std::ostringstream& op = isVertexCase ? vtx : frag;
1150
1151 vtx << "#version 300 es\n";
1152 frag << "#version 300 es\n"
1153 << "layout (location = 0) out mediump vec4 o_color;\n";
1154
1155 // Attributes.
1156 vtx << "in highp vec4 a_position;\n";
1157 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1158 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1159
1160 if (isVertexCase)
1161 {
1162 vtx << "out mediump vec4 v_color;\n";
1163 frag << "in mediump vec4 v_color;\n";
1164 }
1165 else
1166 {
1167 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1168 {
1169 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1170 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1171 }
1172 }
1173
1174 op << "uniform mediump int u_numLoopIterations;\n";
1175 if (isVertexCase)
1176 op << "uniform mediump float u_zero;\n";
1177
1178 vtx << "\n";
1179 vtx << "void main()\n";
1180 vtx << "{\n";
1181
1182 if (!isVertexCase)
1183 vtx << "\tgl_Position = a_position;\n";
1184
1185 frag << "\n";
1186 frag << "void main()\n";
1187 frag << "{\n";
1188
1189 // Expression inputs.
1190 const char* const prefix = isVertexCase ? "a_" : "v_";
1191 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1192 {
1193 const int inSize = getDataTypeScalarSize(m_type);
1194 const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
1195 const bool cast = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
1196
1197 op << "\t" << precision << " " << typeName << " in" << i << " = ";
1198
1199 if (cast)
1200 op << typeName << "(";
1201
1202 op << prefix << "in" << i;
1203
1204 if (m_useSwizzle)
1205 op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1];
1206
1207 if (cast)
1208 op << ")";
1209
1210 op << ";\n";
1211 }
1212
1213 // Operation accumulation variables.
1214 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1215 {
1216 op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n";
1217 op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n";
1218 }
1219
1220 // Loop, with expressions in it.
1221 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1222 op << "\t{\n";
1223 {
1224 const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1225 for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
1226 {
1227 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1228 {
1229 if (i > 0 || unrollNdx > 0)
1230 op << "\n";
1231 op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n";
1232 op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n";
1233 }
1234 }
1235 }
1236 op << "\t}\n";
1237 op << "\n";
1238
1239 // Result variable (sum of accumulation variables).
1240 op << "\t" << precision << " " << typeName << " res =";
1241 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1242 op << (i > 0 ? " "+m_op : "") << " acc" << i << "b";
1243 op << ";\n";
1244
1245 // Convert to color.
1246 op << "\tmediump vec4 color = ";
1247 if (m_type == TYPE_FLOAT_VEC4)
1248 op << "res";
1249 else
1250 {
1251 int size = getDataTypeScalarSize(m_type);
1252 op << "vec4(res";
1253
1254 for (int i = size; i < 4; i++)
1255 op << ", " << (i == 3 ? "1.0" : "0.0");
1256
1257 op << ")";
1258 }
1259 op << ";\n";
1260 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1261
1262 if (isVertexCase)
1263 {
1264 vtx << " gl_Position = a_position + u_zero*color;\n";
1265 frag << " o_color = v_color;\n";
1266 }
1267 else
1268 {
1269 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1270 vtx << " v_in" << i << " = a_in" << i << ";\n";
1271 }
1272
1273 vtx << "}\n";
1274 frag << "}\n";
1275
1276 {
1277 vector<AttribSpec> attributes;
1278 for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
1279 attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1280 Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1281 Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1282 Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1283 Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4)));
1284
1285 {
1286 string description = "This is the program with the ";
1287
1288 description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller"
1289 : programID == PROGRAM_WITH_BIGGER_LOOP ? "bigger"
1290 : DE_NULL;
1291
1292 description += " loop.\n"
1293 "Note: workload size for this program means the number of loop iterations.";
1294
1295 return ProgramContext(vtx.str(), frag.str(), attributes, description);
1296 }
1297 }
1298 }
1299
generateProgramData(void) const1300 vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const
1301 {
1302 vector<ProgramContext> progData;
1303 for (int i = 0; i < PROGRAM_LAST; i++)
1304 progData.push_back(generateSingleProgramData((ProgramID)i));
1305 return progData;
1306 }
1307
setGeneralUniforms(deUint32 program) const1308 void BinaryOpCase::setGeneralUniforms (deUint32 program) const
1309 {
1310 const glw::Functions& gl = m_renderCtx.getFunctions();
1311 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1312 }
1313
setWorkloadSizeUniform(deUint32 program,int numLoopIterations) const1314 void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
1315 {
1316 const glw::Functions& gl = m_renderCtx.getFunctions();
1317 gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
1318 }
1319
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1320 float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
1321 {
1322 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1323
1324 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1325 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1326 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1327 DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
1328 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1329 const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
1330
1331 return programOperationCostDiff / (float)opDiff;
1332 }
1333
logSingleOperationCalculationInfo(void) const1334 void BinaryOpCase::logSingleOperationCalculationInfo (void) const
1335 {
1336 const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
1337 const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
1338 const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
1339 const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
1340 const char* const opName = m_op == "+" ? "addition"
1341 : m_op == "-" ? "subtraction"
1342 : m_op == "*" ? "multiplication"
1343 : m_op == "/" ? "division"
1344 : DE_NULL;
1345 DE_ASSERT(opName != DE_NULL);
1346
1347 m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more "
1348 << opName << " operations in one loop iteration than the small program; "
1349 << "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
1350 << TestLog::EndMessage;
1351 }
1352
1353 // Built-in function case.
1354 class FunctionCase : public OperatorPerformanceCase
1355 {
1356 public:
1357 enum
1358 {
1359 MAX_PARAMS = 3
1360 };
1361
1362 FunctionCase (Context& context,
1363 const char* name,
1364 const char* description,
1365 const char* func,
1366 glu::DataType returnType,
1367 const glu::DataType paramTypes[MAX_PARAMS],
1368 const Vec4& attribute,
1369 int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
1370 bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
1371 glu::Precision precision,
1372 bool isVertex,
1373 const InitialCalibrationStorage& initialCalibration);
1374
1375 protected:
1376 vector<ProgramContext> generateProgramData (void) const;
1377 void setGeneralUniforms (deUint32 program) const;
1378 void setWorkloadSizeUniform (deUint32 program, int numOperations) const;
1379 float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const;
1380 void logSingleOperationCalculationInfo (void) const;
1381
1382 private:
1383 enum ProgramID
1384 {
1385 // \note 0-based sequential numbering is relevant, because these are also used as vector indices.
1386 // \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
1387 PROGRAM_WITH_FUNCTION_CALLS = 0,
1388 PROGRAM_WITHOUT_FUNCTION_CALLS,
1389
1390 PROGRAM_LAST
1391 };
1392
1393 //! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
1394 static string sumExpr (const string& aExpr, const string& bExpr, glu::DataType type);
1395 //! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
1396 //! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
1397 //! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
1398 static string incrementExpr (const string& baseExpr, glu::DataType type, bool divide);
1399
1400 ProgramContext generateSingleProgramData (ProgramID) const;
1401
1402 const string m_func;
1403 const glu::DataType m_returnType;
1404 glu::DataType m_paramTypes[MAX_PARAMS];
1405 // \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
1406 // compile-time constant (2.0) is added. This is a quick and dirty way to deal with
1407 // functions like clamp or smoothstep that require that a certain parameter is
1408 // greater than a certain other parameter.
1409 const int m_modifyParamNdx;
1410 // \note m_useNearlyConstantInputs determines whether the inputs given to the function
1411 // should increase (w.r.t m_attribute) only by very small amounts. This is relevant
1412 // for functions like asin, which requires its inputs to be in a specific range.
1413 // In practice, this affects whether expressions used to increment the input
1414 // variables use division instead of multiplication; normally, multiplication is used,
1415 // but it's hard to keep the increments very small that way, and division shouldn't
1416 // be the default, since for many functions (probably not asin, luckily), division
1417 // is too heavy and dominates time-wise.
1418 const bool m_useNearlyConstantInputs;
1419 const Vec4 m_attribute;
1420 const glu::Precision m_precision;
1421 };
1422
FunctionCase(Context & context,const char * name,const char * description,const char * func,glu::DataType returnType,const glu::DataType paramTypes[MAX_PARAMS],const Vec4 & attribute,int modifyParamNdx,bool useNearlyConstantInputs,glu::Precision precision,bool isVertex,const InitialCalibrationStorage & initialCalibration)1423 FunctionCase::FunctionCase (Context& context,
1424 const char* name,
1425 const char* description,
1426 const char* func,
1427 glu::DataType returnType,
1428 const glu::DataType paramTypes[MAX_PARAMS],
1429 const Vec4& attribute,
1430 int modifyParamNdx,
1431 bool useNearlyConstantInputs,
1432 glu::Precision precision,
1433 bool isVertex,
1434 const InitialCalibrationStorage& initialCalibration)
1435 : OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description,
1436 isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
1437 , m_func (func)
1438 , m_returnType (returnType)
1439 , m_modifyParamNdx (modifyParamNdx)
1440 , m_useNearlyConstantInputs (useNearlyConstantInputs)
1441 , m_attribute (attribute)
1442 , m_precision (precision)
1443 {
1444 for (int i = 0; i < MAX_PARAMS; i++)
1445 m_paramTypes[i] = paramTypes[i];
1446 }
1447
sumExpr(const string & aExpr,const string & bExpr,glu::DataType type)1448 string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type)
1449 {
1450 if (glu::isDataTypeBoolOrBVec(type))
1451 {
1452 if (type == glu::TYPE_BOOL)
1453 return "(" + aExpr + " == " + bExpr + ")";
1454 else
1455 return "equal(" + aExpr + ", " + bExpr + ")";
1456 }
1457 else
1458 return "(" + aExpr + " + " + bExpr + ")";
1459 }
1460
incrementExpr(const string & baseExpr,glu::DataType type,bool divide)1461 string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide)
1462 {
1463 const string mulOrDiv = divide ? "/" : "*";
1464
1465 return glu::isDataTypeBoolOrBVec(type) ? baseExpr
1466 : glu::isDataTypeIntOrIVec(type) ? "(" + baseExpr + mulOrDiv + "(i+1))"
1467 : "(" + baseExpr + mulOrDiv + "float(i+1))";
1468 }
1469
generateSingleProgramData(ProgramID programID) const1470 FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const
1471 {
1472 const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
1473 const char* const precision = glu::getPrecisionName(m_precision);
1474 const char* const returnTypeName = getDataTypeName(m_returnType);
1475 const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
1476 const char* inputPrecision = DE_NULL;
1477 const bool isMatrixReturn = isDataTypeMatrix(m_returnType);
1478 int numParams = 0;
1479 const char* paramTypeNames[MAX_PARAMS];
1480 string paramPrecisionsMaybe[MAX_PARAMS];
1481
1482 for (int i = 0; i < MAX_PARAMS; i++)
1483 {
1484 paramTypeNames[i] = getDataTypeName(m_paramTypes[i]);
1485 paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
1486
1487 if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
1488 inputPrecision = "mediump";
1489
1490 if (m_paramTypes[i] != TYPE_INVALID)
1491 numParams = i+1;
1492 }
1493
1494 DE_ASSERT(numParams > 0);
1495
1496 if (inputPrecision == DE_NULL)
1497 inputPrecision = precision;
1498
1499 int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
1500 std::ostringstream vtx;
1501 std::ostringstream frag;
1502 std::ostringstream& op = isVertexCase ? vtx : frag;
1503
1504 vtx << "#version 300 es\n";
1505 frag << "#version 300 es\n"
1506 << "layout (location = 0) out mediump vec4 o_color;\n";
1507
1508 // Attributes.
1509 vtx << "in highp vec4 a_position;\n";
1510 for (int i = 0; i < numAttributes; i++)
1511 vtx << "in " << inputPrecision << " vec4 a_in" << i << ";\n";
1512
1513 if (isVertexCase)
1514 {
1515 vtx << "out mediump vec4 v_color;\n";
1516 frag << "in mediump vec4 v_color;\n";
1517 }
1518 else
1519 {
1520 for (int i = 0; i < numAttributes; i++)
1521 {
1522 vtx << "out " << inputPrecision << " vec4 v_in" << i << ";\n";
1523 frag << "in " << inputPrecision << " vec4 v_in" << i << ";\n";
1524 }
1525 }
1526
1527 op << "uniform mediump int u_numLoopIterations;\n";
1528 if (isVertexCase)
1529 op << "uniform mediump float u_zero;\n";
1530
1531 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1532 op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n";
1533
1534 vtx << "\n";
1535 vtx << "void main()\n";
1536 vtx << "{\n";
1537
1538 if (!isVertexCase)
1539 vtx << "\tgl_Position = a_position;\n";
1540
1541 frag << "\n";
1542 frag << "void main()\n";
1543 frag << "{\n";
1544
1545 // Function call input and return value accumulation variables.
1546 {
1547 const char* const inPrefix = isVertexCase ? "a_" : "v_";
1548
1549 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1550 {
1551 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1552 {
1553 const glu::DataType paramType = m_paramTypes[paramNdx];
1554 const bool mustCast = paramType != glu::TYPE_FLOAT_VEC4;
1555
1556 op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = ";
1557
1558 if (mustCast)
1559 op << paramTypeNames[paramNdx] << "(";
1560
1561 if (glu::isDataTypeMatrix(paramType))
1562 {
1563 static const char* const swizzles[3] = { "x", "xy", "xyz" };
1564 const int numRows = glu::getDataTypeMatrixNumRows(paramType);
1565 const int numCols = glu::getDataTypeMatrixNumColumns(paramType);
1566 const string swizzle = numRows < 4 ? string() + "." + swizzles[numRows-1] : "";
1567
1568 for (int i = 0; i < numCols; i++)
1569 op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle;
1570 }
1571 else
1572 {
1573 op << inPrefix << "in" << calcNdx+paramNdx;
1574
1575 if (paramNdx == m_modifyParamNdx)
1576 {
1577 DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
1578 op << " + 2.0";
1579 }
1580 }
1581
1582 if (mustCast)
1583 op << ")";
1584
1585 op << ";\n";
1586 }
1587
1588 op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n";
1589 }
1590 }
1591
1592 // Loop with expressions in it.
1593 op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
1594 op << "\t{\n";
1595 for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
1596 {
1597 if (calcNdx > 0)
1598 op << "\n";
1599
1600 op << "\t\t{\n";
1601
1602 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1603 {
1604 const string inputName = "in" + de::toString(calcNdx) + (char)('a'+inputNdx);
1605 const string incName = string() + "u_inc" + (char)('A'+inputNdx);
1606 const string incExpr = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
1607
1608 op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
1609 }
1610
1611 op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
1612
1613 if (programID == PROGRAM_WITH_FUNCTION_CALLS)
1614 {
1615 op << m_func << "(";
1616
1617 for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
1618 {
1619 if (paramNdx > 0)
1620 op << ", ";
1621
1622 op << "in" << calcNdx << (char)('a'+paramNdx);
1623 }
1624
1625 op << ")";
1626 }
1627 else
1628 {
1629 DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
1630 op << returnTypeName << "(1)";
1631 }
1632
1633 op << ";\n";
1634
1635 {
1636 const string resName = "res" + de::toString(calcNdx);
1637 const string evalName = "eval" + de::toString(calcNdx);
1638 const string incExpr = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
1639
1640 op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
1641 }
1642
1643 op << "\t\t}\n";
1644 }
1645 op << "\t}\n";
1646 op << "\n";
1647
1648 // Result variables.
1649 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1650 {
1651 op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = ";
1652 {
1653 string expr = string() + "in0" + (char)('a'+inputNdx);
1654 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1655 expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]);
1656 op << expr;
1657 }
1658 op << ";\n";
1659 }
1660
1661 op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
1662 {
1663 string expr = "res0";
1664 for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
1665 expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
1666 op << expr;
1667 }
1668 op << ";\n";
1669
1670 {
1671 glu::DataType finalResultDataType = glu::TYPE_LAST;
1672
1673 if (glu::isDataTypeMatrix(m_returnType))
1674 {
1675 finalResultDataType = m_returnType;
1676
1677 op << "\t" << precision << " " << returnTypeName << " finalRes = ";
1678
1679 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1680 {
1681 DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
1682 op << "sumIn" << (char)('A'+inputNdx) << " + ";
1683 }
1684 op << "sumRes;\n";
1685 }
1686 else
1687 {
1688 int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
1689 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1690 numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
1691
1692 finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
1693
1694 {
1695 const string finalResType = glu::getDataTypeName(finalResultDataType);
1696 op << "\t" << precision << " " << finalResType << " finalRes = ";
1697 for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
1698 op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + ";
1699 op << finalResType << "(sumRes);\n";
1700 }
1701 }
1702
1703 // Convert to color.
1704 op << "\tmediump vec4 color = ";
1705 if (finalResultDataType == TYPE_FLOAT_VEC4)
1706 op << "finalRes";
1707 else
1708 {
1709 int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType);
1710
1711 op << "vec4(";
1712
1713 if (isMatrixReturn)
1714 {
1715 for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
1716 {
1717 if (i > 0)
1718 op << " + ";
1719 op << "finalRes[" << i << "]";
1720 }
1721 }
1722 else
1723 op << "finalRes";
1724
1725 for (int i = size; i < 4; i++)
1726 op << ", " << (i == 3 ? "1.0" : "0.0");
1727
1728 op << ")";
1729 }
1730 op << ";\n";
1731 op << "\t" << (isVertexCase ? "v_color" : "o_color") << " = color;\n";
1732
1733 if (isVertexCase)
1734 {
1735 vtx << " gl_Position = a_position + u_zero*color;\n";
1736 frag << " o_color = v_color;\n";
1737 }
1738 else
1739 {
1740 for (int i = 0; i < numAttributes; i++)
1741 vtx << " v_in" << i << " = a_in" << i << ";\n";
1742 }
1743
1744 vtx << "}\n";
1745 frag << "}\n";
1746 }
1747
1748 {
1749 vector<AttribSpec> attributes;
1750 for (int i = 0; i < numAttributes; i++)
1751 attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
1752 m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
1753 m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4),
1754 m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4),
1755 m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4)));
1756
1757 {
1758 string description = "This is the program ";
1759
1760 description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without"
1761 : programID == PROGRAM_WITH_FUNCTION_CALLS ? "with"
1762 : DE_NULL;
1763
1764 description += " '" + m_func + "' function calls.\n"
1765 "Note: workload size for this program means the number of loop iterations.";
1766
1767 return ProgramContext(vtx.str(), frag.str(), attributes, description);
1768 }
1769 }
1770 }
1771
generateProgramData(void) const1772 vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const
1773 {
1774 vector<ProgramContext> progData;
1775 for (int i = 0; i < PROGRAM_LAST; i++)
1776 progData.push_back(generateSingleProgramData((ProgramID)i));
1777 return progData;
1778 }
1779
setGeneralUniforms(deUint32 program) const1780 void FunctionCase::setGeneralUniforms (deUint32 program) const
1781 {
1782 const glw::Functions& gl = m_renderCtx.getFunctions();
1783
1784 gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
1785
1786 for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
1787 {
1788 if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
1789 {
1790 const glu::DataType paramType = m_paramTypes[paramNdx];
1791 const int scalarSize = glu::getDataTypeScalarSize(paramType);
1792 const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str());
1793
1794 if (glu::isDataTypeFloatOrVec(paramType))
1795 {
1796 float values[4];
1797 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1798 values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values.
1799 uniformNfv(gl, scalarSize, location, 1, &values[0]);
1800 }
1801 else if (glu::isDataTypeIntOrIVec(paramType))
1802 {
1803 int values[4];
1804 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1805 values[i] = paramNdx*100 + i; // Arbitrary values.
1806 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1807 }
1808 else if (glu::isDataTypeBoolOrBVec(paramType))
1809 {
1810 int values[4];
1811 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1812 values[i] = (paramNdx >> i) & 1; // Arbitrary values.
1813 uniformNiv(gl, scalarSize, location, 1, &values[0]);
1814 }
1815 else if (glu::isDataTypeMatrix(paramType))
1816 {
1817 const int size = glu::getDataTypeMatrixNumRows(paramType);
1818 DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
1819 float values[4*4];
1820 for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
1821 values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values.
1822 uniformMatrixNfv(gl, size, location, 1, &values[0]);
1823 }
1824 else
1825 DE_ASSERT(false);
1826 }
1827 }
1828 }
1829
setWorkloadSizeUniform(deUint32 program,int numLoopIterations) const1830 void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
1831 {
1832 const glw::Functions& gl = m_renderCtx.getFunctions();
1833 const int loc = gl.getUniformLocation(program, "u_numLoopIterations");
1834
1835 gl.uniform1i(loc, numLoopIterations);
1836 }
1837
computeSingleOperationTime(const vector<float> & perProgramOperationCosts) const1838 float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
1839 {
1840 DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
1841 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1842 const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
1843
1844 return programOperationCostDiff / (float)numFunctionCalls;
1845 }
1846
logSingleOperationCalculationInfo(void) const1847 void FunctionCase::logSingleOperationCalculationInfo (void) const
1848 {
1849 const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
1850
1851 m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
1852 << numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
1853 << "cost of one operation is calculated as "
1854 << "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage;
1855 }
1856
1857 } // anonymous
1858
ShaderOperatorTests(Context & context)1859 ShaderOperatorTests::ShaderOperatorTests (Context& context)
1860 : TestCaseGroup(context, "operator", "Operator Performance Tests")
1861 {
1862 }
1863
~ShaderOperatorTests(void)1864 ShaderOperatorTests::~ShaderOperatorTests (void)
1865 {
1866 }
1867
init(void)1868 void ShaderOperatorTests::init (void)
1869 {
1870 // Binary operator cases
1871
1872 static const DataType binaryOpTypes[] =
1873 {
1874 TYPE_FLOAT,
1875 TYPE_FLOAT_VEC2,
1876 TYPE_FLOAT_VEC3,
1877 TYPE_FLOAT_VEC4,
1878 TYPE_INT,
1879 TYPE_INT_VEC2,
1880 TYPE_INT_VEC3,
1881 TYPE_INT_VEC4,
1882 };
1883 static const Precision precisions[] =
1884 {
1885 PRECISION_LOWP,
1886 PRECISION_MEDIUMP,
1887 PRECISION_HIGHP
1888 };
1889 static const struct
1890 {
1891 const char* name;
1892 const char* op;
1893 bool swizzle;
1894 } binaryOps[] =
1895 {
1896 { "add", "+", false },
1897 { "sub", "-", true },
1898 { "mul", "*", false },
1899 { "div", "/", true }
1900 };
1901
1902 tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
1903 addChild(binaryOpsGroup);
1904
1905 for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
1906 {
1907 tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
1908 binaryOpsGroup->addChild(opGroup);
1909
1910 for (int isFrag = 0; isFrag <= 1; isFrag++)
1911 {
1912 const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage (new BinaryOpCase::InitialCalibration);
1913 const bool isVertex = isFrag == 0;
1914 tcu::TestCaseGroup* const shaderGroup = new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
1915 opGroup->addChild(shaderGroup);
1916
1917 for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
1918 {
1919 for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
1920 {
1921 const DataType type = binaryOpTypes[typeNdx];
1922 const Precision precision = precisions[precNdx];
1923 const char* const op = binaryOps[opNdx].op;
1924 const bool useSwizzle = binaryOps[opNdx].swizzle;
1925 std::ostringstream name;
1926
1927 name << getPrecisionName(precision) << "_" << getDataTypeName(type);
1928
1929 shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage));
1930 }
1931 }
1932 }
1933 }
1934
1935 // Built-in function cases.
1936
1937 // Non-specific (i.e. includes gentypes) parameter types for the functions.
1938 enum ValueType
1939 {
1940 VALUE_NONE = 0,
1941 VALUE_FLOAT = (1<<0), // float scalar
1942 VALUE_FLOAT_VEC = (1<<1), // float vector
1943 VALUE_FLOAT_VEC34 = (1<<2), // float vector of size 3 or 4
1944 VALUE_FLOAT_GENTYPE = (1<<3), // float scalar/vector
1945 VALUE_VEC3 = (1<<4), // vec3 only
1946 VALUE_VEC4 = (1<<5), // vec4 only
1947 VALUE_MATRIX = (1<<6), // matrix
1948 VALUE_BOOL = (1<<7), // boolean scalar
1949 VALUE_BOOL_VEC = (1<<8), // boolean vector
1950 VALUE_BOOL_VEC4 = (1<<9), // bvec4 only
1951 VALUE_BOOL_GENTYPE = (1<<10), // boolean scalar/vector
1952 VALUE_INT = (1<<11), // int scalar
1953 VALUE_INT_VEC = (1<<12), // int vector
1954 VALUE_INT_VEC4 = (1<<13), // ivec4 only
1955 VALUE_INT_GENTYPE = (1<<14), // int scalar/vector
1956
1957 // Shorthands.
1958 N = VALUE_NONE,
1959 F = VALUE_FLOAT,
1960 FV = VALUE_FLOAT_VEC,
1961 VL = VALUE_FLOAT_VEC34, // L for "large"
1962 GT = VALUE_FLOAT_GENTYPE,
1963 V3 = VALUE_VEC3,
1964 V4 = VALUE_VEC4,
1965 M = VALUE_MATRIX,
1966 B = VALUE_BOOL,
1967 BV = VALUE_BOOL_VEC,
1968 B4 = VALUE_BOOL_VEC4,
1969 BGT = VALUE_BOOL_GENTYPE,
1970 I = VALUE_INT,
1971 IV = VALUE_INT_VEC,
1972 I4 = VALUE_INT_VEC4,
1973 IGT = VALUE_INT_GENTYPE,
1974
1975 VALUE_ANY_FLOAT = VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
1976 VALUE_ANY_INT = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE | VALUE_INT_VEC4,
1977 VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE | VALUE_BOOL_VEC4,
1978
1979 VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 |
1980 VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE |
1981 VALUE_INT_VEC | VALUE_INT_GENTYPE |
1982 VALUE_MATRIX
1983 };
1984 enum PrecisionMask
1985 {
1986 PRECMASK_NA = 0, //!< Precision not applicable (booleans)
1987 PRECMASK_LOWP = (1<<PRECISION_LOWP),
1988 PRECMASK_MEDIUMP = (1<<PRECISION_MEDIUMP),
1989 PRECMASK_HIGHP = (1<<PRECISION_HIGHP),
1990
1991 PRECMASK_MEDIUMP_HIGHP = (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP),
1992 PRECMASK_ALL = (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP)
1993 };
1994
1995 static const DataType floatTypes[] =
1996 {
1997 TYPE_FLOAT,
1998 TYPE_FLOAT_VEC2,
1999 TYPE_FLOAT_VEC3,
2000 TYPE_FLOAT_VEC4
2001 };
2002 static const DataType intTypes[] =
2003 {
2004 TYPE_INT,
2005 TYPE_INT_VEC2,
2006 TYPE_INT_VEC3,
2007 TYPE_INT_VEC4
2008 };
2009 static const DataType boolTypes[] =
2010 {
2011 TYPE_BOOL,
2012 TYPE_BOOL_VEC2,
2013 TYPE_BOOL_VEC3,
2014 TYPE_BOOL_VEC4
2015 };
2016 static const DataType matrixTypes[] =
2017 {
2018 TYPE_FLOAT_MAT2,
2019 TYPE_FLOAT_MAT3,
2020 TYPE_FLOAT_MAT4
2021 };
2022
2023 tcu::TestCaseGroup* const angleAndTrigonometryGroup = new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests");
2024 tcu::TestCaseGroup* const exponentialGroup = new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests");
2025 tcu::TestCaseGroup* const commonFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests");
2026 tcu::TestCaseGroup* const geometricFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests");
2027 tcu::TestCaseGroup* const matrixFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests");
2028 tcu::TestCaseGroup* const floatCompareGroup = new tcu::TestCaseGroup(m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests");
2029 tcu::TestCaseGroup* const intCompareGroup = new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests");
2030 tcu::TestCaseGroup* const boolCompareGroup = new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests");
2031
2032 addChild(angleAndTrigonometryGroup);
2033 addChild(exponentialGroup);
2034 addChild(commonFunctionsGroup);
2035 addChild(geometricFunctionsGroup);
2036 addChild(matrixFunctionsGroup);
2037 addChild(floatCompareGroup);
2038 addChild(intCompareGroup);
2039 addChild(boolCompareGroup);
2040
2041 // Some attributes to be used as parameters for the functions.
2042 const Vec4 attrPos = Vec4( 2.3f, 1.9f, 0.8f, 0.7f);
2043 const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f);
2044 const Vec4 attrSmall = Vec4(-0.9f, 0.8f, -0.4f, 0.2f);
2045 const Vec4 attrBig = Vec4( 1.3f, 2.4f, 3.0f, 4.0f);
2046
2047 // \todo The following functions and variants are missing, and should be added in the future:
2048 // - modf (has an output parameter, not currently handled by test code)
2049 // - functions with uint/uvec* return or parameter types
2050 // - non-matrix <-> matrix functions (outerProduct etc.)
2051 // \note Remember to update test spec when these are added.
2052
2053 // Function name, return type and parameter type information; also, what attribute should be used in the test.
2054 // \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
2055 // \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
2056 static const struct
2057 {
2058 tcu::TestCaseGroup* parentGroup;
2059 const char* groupName;
2060 const char* func;
2061 const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
2062 const Vec4& attribute;
2063 int modifyParamNdx;
2064 bool useNearlyConstantInputs;
2065 bool booleanCase;
2066 PrecisionMask precMask;
2067 } functionCaseGroups[] =
2068 {
2069 { angleAndTrigonometryGroup, "radians", "radians", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2070 { angleAndTrigonometryGroup, "degrees", "degrees", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2071 { angleAndTrigonometryGroup, "sin", "sin", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2072 { angleAndTrigonometryGroup, "cos", "cos", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2073 { angleAndTrigonometryGroup, "tan", "tan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2074 { angleAndTrigonometryGroup, "asin", "asin", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL },
2075 { angleAndTrigonometryGroup, "acos", "acos", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL },
2076 { angleAndTrigonometryGroup, "atan2", "atan", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2077 { angleAndTrigonometryGroup, "atan", "atan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2078 { angleAndTrigonometryGroup, "sinh", "sinh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2079 { angleAndTrigonometryGroup, "cosh", "cosh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2080 { angleAndTrigonometryGroup, "tanh", "tanh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2081 { angleAndTrigonometryGroup, "asinh", "asinh", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2082 { angleAndTrigonometryGroup, "acosh", "acosh", { F, F, N, N }, attrBig, -1, false, false, PRECMASK_ALL },
2083 { angleAndTrigonometryGroup, "atanh", "atanh", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL },
2084
2085 { exponentialGroup, "pow", "pow", { F, F, F, N }, attrPos, -1, false, false, PRECMASK_ALL },
2086 { exponentialGroup, "exp", "exp", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2087 { exponentialGroup, "log", "log", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
2088 { exponentialGroup, "exp2", "exp2", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2089 { exponentialGroup, "log2", "log2", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
2090 { exponentialGroup, "sqrt", "sqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
2091 { exponentialGroup, "inversesqrt", "inversesqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
2092
2093 { commonFunctionsGroup, "abs", "abs", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2094 { commonFunctionsGroup, "abs", "abs", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2095 { commonFunctionsGroup, "sign", "sign", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2096 { commonFunctionsGroup, "sign", "sign", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2097 { commonFunctionsGroup, "floor", "floor", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2098 { commonFunctionsGroup, "floor", "floor", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2099 { commonFunctionsGroup, "trunc", "trunc", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2100 { commonFunctionsGroup, "trunc", "trunc", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2101 { commonFunctionsGroup, "round", "round", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2102 { commonFunctionsGroup, "round", "round", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2103 { commonFunctionsGroup, "roundEven", "roundEven", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2104 { commonFunctionsGroup, "roundEven", "roundEven", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2105 { commonFunctionsGroup, "ceil", "ceil", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2106 { commonFunctionsGroup, "ceil", "ceil", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2107 { commonFunctionsGroup, "fract", "fract", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2108 { commonFunctionsGroup, "fract", "fract", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2109 { commonFunctionsGroup, "mod", "mod", { GT, GT, GT, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2110 { commonFunctionsGroup, "min", "min", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2111 { commonFunctionsGroup, "min", "min", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2112 { commonFunctionsGroup, "max", "max", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2113 { commonFunctionsGroup, "max", "max", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2114 { commonFunctionsGroup, "clamp", "clamp", { F, F, F, F }, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP },
2115 { commonFunctionsGroup, "clamp", "clamp", { V4, V4, V4, V4 }, attrSmall, 2, false, false, PRECMASK_ALL },
2116 { commonFunctionsGroup, "mix", "mix", { F, F, F, F }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2117 { commonFunctionsGroup, "mix", "mix", { V4, V4, V4, V4 }, attrNegPos, -1, false, false, PRECMASK_ALL },
2118 { commonFunctionsGroup, "mix", "mix", { F, F, F, B }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2119 { commonFunctionsGroup, "mix", "mix", { V4, V4, V4, B4 }, attrNegPos, -1, false, false, PRECMASK_ALL },
2120 { commonFunctionsGroup, "step", "step", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2121 { commonFunctionsGroup, "step", "step", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2122 { commonFunctionsGroup, "smoothstep", "smoothstep", { F, F, F, F }, attrSmall, 1, false, false, PRECMASK_MEDIUMP_HIGHP },
2123 { commonFunctionsGroup, "smoothstep", "smoothstep", { V4, V4, V4, V4 }, attrSmall, 1, false, false, PRECMASK_ALL },
2124 { commonFunctionsGroup, "isnan", "isnan", { B, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2125 { commonFunctionsGroup, "isnan", "isnan", { B4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2126 { commonFunctionsGroup, "isinf", "isinf", { B, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2127 { commonFunctionsGroup, "isinf", "isinf", { B4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2128 { commonFunctionsGroup, "floatBitsToInt", "floatBitsToInt", { I, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2129 { commonFunctionsGroup, "floatBitsToInt", "floatBitsToInt", { I4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2130 { commonFunctionsGroup, "intBitsToFloat", "intBitsToFloat", { F, I, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
2131 { commonFunctionsGroup, "intBitsToFloat", "intBitsToFloat", { V4, I4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2132
2133 { geometricFunctionsGroup, "length", "length", { F, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2134 { geometricFunctionsGroup, "distance", "distance", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2135 { geometricFunctionsGroup, "dot", "dot", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2136 { geometricFunctionsGroup, "cross", "cross", { V3, V3, V3, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2137 { geometricFunctionsGroup, "normalize", "normalize", { VL, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2138 { geometricFunctionsGroup, "faceforward", "faceforward", { VL, VL, VL, VL }, attrNegPos, -1, false, false, PRECMASK_ALL },
2139 { geometricFunctionsGroup, "reflect", "reflect", { VL, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2140 { geometricFunctionsGroup, "refract", "refract", { VL, VL, VL, F }, attrNegPos, -1, false, false, PRECMASK_ALL },
2141
2142 { matrixFunctionsGroup, "matrixCompMult", "matrixCompMult", { M, M, M, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2143 { matrixFunctionsGroup, "transpose", "transpose", { M, M, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2144 { matrixFunctionsGroup, "inverse", "inverse", { M, M, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2145
2146 { floatCompareGroup, "lessThan", "lessThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2147 { floatCompareGroup, "lessThanEqual", "lessThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2148 { floatCompareGroup, "greaterThan", "greaterThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2149 { floatCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2150 { floatCompareGroup, "equal", "equal", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2151 { floatCompareGroup, "notEqual", "notEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2152
2153 { intCompareGroup, "lessThan", "lessThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2154 { intCompareGroup, "lessThanEqual", "lessThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2155 { intCompareGroup, "greaterThan", "greaterThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2156 { intCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2157 { intCompareGroup, "equal", "equal", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2158 { intCompareGroup, "notEqual", "notEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
2159
2160 { boolCompareGroup, "equal", "equal", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
2161 { boolCompareGroup, "notEqual", "notEqual", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
2162 { boolCompareGroup, "any", "any", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
2163 { boolCompareGroup, "all", "all", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
2164 { boolCompareGroup, "not", "not", { BV, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }
2165 };
2166
2167 // vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
2168 // \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
2169 tcu::TestCaseGroup* vertexSubGroup = DE_NULL;
2170 tcu::TestCaseGroup* fragmentSubGroup = DE_NULL;
2171 FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage;
2172 FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage;
2173 for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
2174 {
2175 tcu::TestCaseGroup* const parentGroup = functionCaseGroups[funcNdx].parentGroup;
2176 const char* const groupName = functionCaseGroups[funcNdx].groupName;
2177 const char* const groupFunc = functionCaseGroups[funcNdx].func;
2178 const ValueType* const funcTypes = functionCaseGroups[funcNdx].types;
2179 const Vec4& groupAttribute = functionCaseGroups[funcNdx].attribute;
2180 const int modifyParamNdx = functionCaseGroups[funcNdx].modifyParamNdx;
2181 const bool useNearlyConstantInputs = functionCaseGroups[funcNdx].useNearlyConstantInputs;
2182 const bool booleanCase = functionCaseGroups[funcNdx].booleanCase;
2183 const PrecisionMask precMask = functionCaseGroups[funcNdx].precMask;
2184
2185 // If this is a new function and not just a different version of the previously defined function, create a new group.
2186 if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName)
2187 {
2188 tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
2189 functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
2190
2191 vertexSubGroup = new tcu::TestCaseGroup(m_testCtx, "vertex", "");
2192 fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", "");
2193
2194 funcGroup->addChild(vertexSubGroup);
2195 funcGroup->addChild(fragmentSubGroup);
2196
2197 vertexSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2198 fragmentSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
2199 }
2200
2201 DE_ASSERT(vertexSubGroup != DE_NULL);
2202 DE_ASSERT(fragmentSubGroup != DE_NULL);
2203
2204 // Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
2205 int genTypeFirstSize = 1;
2206 int genTypeLastSize = 1;
2207
2208 // Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
2209 // \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
2210 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
2211 {
2212 switch (funcTypes[i])
2213 {
2214 case VALUE_FLOAT_VEC:
2215 case VALUE_BOOL_VEC:
2216 case VALUE_INT_VEC: // \note Fall-through.
2217 genTypeFirstSize = 2;
2218 genTypeLastSize = 4;
2219 break;
2220 case VALUE_FLOAT_VEC34:
2221 genTypeFirstSize = 3;
2222 genTypeLastSize = 4;
2223 break;
2224 case VALUE_FLOAT_GENTYPE:
2225 case VALUE_BOOL_GENTYPE:
2226 case VALUE_INT_GENTYPE: // \note Fall-through.
2227 genTypeFirstSize = 1;
2228 genTypeLastSize = 4;
2229 break;
2230 case VALUE_MATRIX:
2231 genTypeFirstSize = 2;
2232 genTypeLastSize = 4;
2233 break;
2234 // If none of the above, keep looping.
2235 default:
2236 break;
2237 }
2238 }
2239
2240 // Create a case for each possible size of the gentype.
2241 for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
2242 {
2243 // Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
2244 DataType types[FunctionCase::MAX_PARAMS + 1];
2245 for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
2246 {
2247 if (funcTypes[i] == VALUE_NONE)
2248 types[i] = TYPE_INVALID;
2249 else
2250 {
2251 int isFloat = funcTypes[i] & VALUE_ANY_FLOAT;
2252 int isBool = funcTypes[i] & VALUE_ANY_BOOL;
2253 int isInt = funcTypes[i] & VALUE_ANY_INT;
2254 int isMat = funcTypes[i] == VALUE_MATRIX;
2255 int inSize = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize
2256 : funcTypes[i] == VALUE_VEC3 ? 3
2257 : funcTypes[i] == VALUE_VEC4 ? 4
2258 : funcTypes[i] == VALUE_BOOL_VEC4 ? 4
2259 : funcTypes[i] == VALUE_INT_VEC4 ? 4
2260 : 1;
2261 int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
2262
2263 types[i] = isFloat ? floatTypes[typeArrayNdx]
2264 : isBool ? boolTypes[typeArrayNdx]
2265 : isInt ? intTypes[typeArrayNdx]
2266 : isMat ? matrixTypes[typeArrayNdx]
2267 : TYPE_LAST;
2268 }
2269
2270 DE_ASSERT(types[i] != TYPE_LAST);
2271 }
2272
2273 // Array for just the parameter types.
2274 DataType paramTypes[FunctionCase::MAX_PARAMS];
2275 for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
2276 paramTypes[i] = types[i+1];
2277
2278 for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
2279 {
2280 if ((precMask & (1 << prec)) == 0)
2281 continue;
2282
2283 const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
2284 std::ostringstream caseName;
2285
2286 caseName << precisionPrefix;
2287
2288 // Write the name of each distinct parameter data type into the test case name.
2289 for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
2290 {
2291 if (i == 1 || types[i] != types[i-1])
2292 {
2293 if (i > 1)
2294 caseName << "_";
2295
2296 caseName << getDataTypeName(types[i]);
2297 }
2298 }
2299
2300 for (int fragI = 0; fragI <= 1; fragI++)
2301 {
2302 const bool vert = fragI == 0;
2303 tcu::TestCaseGroup* const group = vert ? vertexSubGroup : fragmentSubGroup;
2304 group->addChild (new FunctionCase(m_context,
2305 caseName.str().c_str(), "",
2306 groupFunc,
2307 types[0], paramTypes,
2308 groupAttribute, modifyParamNdx, useNearlyConstantInputs,
2309 (Precision)prec, vert,
2310 vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
2311 }
2312 }
2313 }
2314 }
2315 }
2316
2317 } // Performance
2318 } // gles3
2319 } // deqp
2320