1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 2.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Optimized vs unoptimized shader performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es2pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34 
35 #include "glwFunctions.hpp"
36 
37 #include <vector>
38 #include <string>
39 #include <map>
40 
41 using glu::ShaderProgram;
42 using tcu::TestLog;
43 using tcu::Vec4;
44 using de::SharedPtr;
45 using de::toString;
46 
47 using std::vector;
48 using std::string;
49 
50 namespace deqp
51 {
52 
53 using gls::ShaderPerformanceMeasurer;
54 
55 namespace gles2
56 {
57 namespace Performance
58 {
59 
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap (const string& key, const string& value)
61 {
62 	std::map<string, string> res;
63 	res[key] = value;
64 	return res;
65 }
66 
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat (const string& str, int numRepeats, const string& delim = "")
68 {
69 	string result = str;
70 	for (int i = 1; i < numRepeats; i++)
71 		result += delim + str;
72 	return result;
73 }
74 
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate (const string& strTempl, int numRepeats, const string& delim = "", int ndxStart = 0)
76 {
77 	const tcu::StringTemplate	templ(strTempl);
78 	string						result;
79 	std::map<string, string>	params;
80 
81 	for (int i = 0; i < numRepeats; i++)
82 	{
83 		params["PREV_NDX"]	= toString(i + ndxStart - 1);
84 		params["NDX"]		= toString(i + ndxStart);
85 
86 		result += (i > 0 ? delim : "") + templ.specialize(params);
87 	}
88 
89 	return result;
90 }
91 
92 namespace
93 {
94 
95 enum CaseShaderType
96 {
97 	CASESHADERTYPE_VERTEX = 0,
98 	CASESHADERTYPE_FRAGMENT,
99 
100 	CASESHADERTYPE_LAST
101 };
102 
getShaderPrecision(CaseShaderType shaderType)103 static inline string getShaderPrecision (CaseShaderType shaderType)
104 {
105 	switch (shaderType)
106 	{
107 		case CASESHADERTYPE_VERTEX:		return "highp";
108 		case CASESHADERTYPE_FRAGMENT:	return "mediump";
109 		default:
110 			DE_ASSERT(false);
111 			return DE_NULL;
112 	}
113 }
114 
115 struct ProgramData
116 {
117 	glu::ProgramSources			sources;
118 	vector<gls::AttribSpec>		attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
119 
ProgramDatadeqp::gles2::Performance::__anonf46dc3bb0111::ProgramData120 	ProgramData (void) {}
ProgramDatadeqp::gles2::Performance::__anonf46dc3bb0111::ProgramData121 	ProgramData (const glu::ProgramSources& sources_, const vector<gls::AttribSpec>& attributes_ = vector<gls::AttribSpec>())	: sources(sources_), attributes(attributes_)	{}
ProgramDatadeqp::gles2::Performance::__anonf46dc3bb0111::ProgramData122 	ProgramData (const glu::ProgramSources& sources_, const gls::AttribSpec& attribute)											: sources(sources_), attributes(1, attribute)	{}
123 };
124 
125 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)126 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& funcDefs, const string& mainStatements)
127 {
128 	const bool		isVertexCase	= shaderType == CASESHADERTYPE_VERTEX;
129 	const bool		isFragmentCase	= shaderType == CASESHADERTYPE_FRAGMENT;
130 	const string	vtxPrec			= getShaderPrecision(CASESHADERTYPE_VERTEX);
131 	const string	fragPrec		= getShaderPrecision(CASESHADERTYPE_FRAGMENT);
132 
133 	return ProgramData(glu::ProgramSources() << glu::VertexSource(		"attribute " + vtxPrec + " vec4 a_position;\n"
134 																		"attribute " + vtxPrec + " vec4 a_value;\n"
135 																		"varying " + fragPrec + " vec4 v_value;\n"
136 																		+ (isVertexCase ? funcDefs : "") +
137 																		"void main (void)\n"
138 																		"{\n"
139 																		"	gl_Position = a_position;\n"
140 																		"	" + vtxPrec + " vec4 value = a_value;\n"
141 																		+ (isVertexCase ? mainStatements : "") +
142 																		"	v_value = value;\n"
143 																		"}\n")
144 
145 											 << glu::FragmentSource(	"varying " + fragPrec + " vec4 v_value;\n"
146 																		+ (isFragmentCase ? funcDefs : "") +
147 																		"void main (void)\n"
148 																		"{\n"
149 																		"	" + fragPrec + " vec4 value = v_value;\n"
150 																		+ (isFragmentCase ? mainStatements : "") +
151 																		"	gl_FragColor = value;\n"
152 																		"}\n"),
153 					  gls::AttribSpec("a_value",
154 									  Vec4(1.0f, 0.0f, 0.0f, 0.0f),
155 									  Vec4(0.0f, 1.0f, 0.0f, 0.0f),
156 									  Vec4(0.0f, 0.0f, 1.0f, 0.0f),
157 									  Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
158 }
159 
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)160 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& mainStatements)
161 {
162 	return defaultProgramData(shaderType, "", mainStatements);
163 }
164 
165 class ShaderOptimizationCase : public TestCase
166 {
167 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)168 	ShaderOptimizationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType)
169 		: TestCase				(context, tcu::NODETYPE_PERFORMANCE, name, description)
170 		, m_caseShaderType		(caseShaderType)
171 		, m_state				(STATE_LAST)
172 		, m_measurer			(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX	? gls::CASETYPE_VERTEX
173 														   : caseShaderType == CASESHADERTYPE_FRAGMENT	? gls::CASETYPE_FRAGMENT
174 														   : gls::CASETYPE_LAST)
175 		, m_unoptimizedResult	(-1.0f, -1.0f)
176 		, m_optimizedResult		(-1.0f, -1.0f)
177 	{
178 	}
179 
~ShaderOptimizationCase(void)180 	virtual ~ShaderOptimizationCase (void) {}
181 
182 	void			init		(void);
183 	IterateResult	iterate		(void);
184 
185 protected:
186 	virtual ProgramData		generateProgramData (bool optimized) const = 0;
187 
188 	const CaseShaderType	m_caseShaderType;
189 
190 private:
191 	enum State
192 	{
193 		STATE_INIT_UNOPTIMIZED = 0,
194 		STATE_MEASURE_UNOPTIMIZED,
195 		STATE_INIT_OPTIMIZED,
196 		STATE_MEASURE_OPTIMIZED,
197 		STATE_FINISHED,
198 
199 		STATE_LAST
200 	};
201 
programData(bool optimized)202 	ProgramData&						programData		(bool optimized) { return optimized ? m_optimizedData		: m_unoptimizedData;		}
program(bool optimized)203 	SharedPtr<const ShaderProgram>&		program			(bool optimized) { return optimized ? m_optimizedProgram	: m_unoptimizedProgram;		}
result(bool optimized)204 	ShaderPerformanceMeasurer::Result&	result			(bool optimized) { return optimized ? m_optimizedResult		: m_unoptimizedResult;		}
205 
206 	State								m_state;
207 	ShaderPerformanceMeasurer			m_measurer;
208 
209 	ProgramData							m_unoptimizedData;
210 	ProgramData							m_optimizedData;
211 	SharedPtr<const ShaderProgram>		m_unoptimizedProgram;
212 	SharedPtr<const ShaderProgram>		m_optimizedProgram;
213 	ShaderPerformanceMeasurer::Result	m_unoptimizedResult;
214 	ShaderPerformanceMeasurer::Result	m_optimizedResult;
215 };
216 
init(void)217 void ShaderOptimizationCase::init (void)
218 {
219 	const glu::RenderContext&	renderCtx	= m_context.getRenderContext();
220 	TestLog&					log			= m_testCtx.getLog();
221 
222 	m_measurer.logParameters(log);
223 
224 	for (int ndx = 0; ndx < 2; ndx++)
225 	{
226 		const bool optimized = ndx == 1;
227 
228 		programData(optimized) = generateProgramData(optimized);
229 
230 		for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
231 			DE_ASSERT(programData(optimized).attributes[i].name != "a_position"); // \note Position attribute is set by m_measurer.
232 
233 		program(optimized) = SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
234 
235 		{
236 			const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram"			: "UnoptimizedProgram",
237 													 optimized ? "Hand-optimized program"	: "Unoptimized program");
238 			log << *program(optimized);
239 		}
240 
241 		if (!program(optimized)->isOk())
242 			TCU_FAIL("Shader compilation failed");
243 	}
244 
245 	m_state = STATE_INIT_UNOPTIMIZED;
246 }
247 
iterate(void)248 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate (void)
249 {
250 	TestLog& log = m_testCtx.getLog();
251 
252 	if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
253 	{
254 		const bool optimized = m_state == STATE_INIT_OPTIMIZED;
255 		m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
256 		m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
257 
258 		return CONTINUE;
259 	}
260 	else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
261 	{
262 		m_measurer.iterate();
263 
264 		if (m_measurer.isFinished())
265 		{
266 			const bool						optimized	= m_state == STATE_MEASURE_OPTIMIZED;
267 			const tcu::ScopedLogSection		section		(log, optimized ? "OptimizedResult"									: "UnoptimizedResult",
268 															  optimized ? "Measurement results for hand-optimized program"	: "Measurement result for unoptimized program");
269 			m_measurer.logMeasurementInfo(log);
270 			result(optimized) = m_measurer.getResult();
271 			m_measurer.deinit();
272 			m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
273 		}
274 
275 		return CONTINUE;
276 	}
277 	else
278 	{
279 		DE_ASSERT(m_state == STATE_FINISHED);
280 
281 		const float			unoptimizedRelevantResult	= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_unoptimizedResult.megaVertPerSec	: m_unoptimizedResult.megaFragPerSec;
282 		const float			optimizedRelevantResult		= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_optimizedResult.megaVertPerSec		: m_optimizedResult.megaFragPerSec;
283 		const char* const	relevantResultName			= m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex"								: "fragment";
284 		const float			ratio						= unoptimizedRelevantResult / optimizedRelevantResult;
285 		const int			handOptimizationGain		= (int)deFloatRound(100.0f/ratio) - 100;
286 
287 		log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio << TestLog::EndMessage;
288 
289 		if (handOptimizationGain >= 0)
290 			log << TestLog::Message << "Note: " << handOptimizationGain << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
291 		else
292 			log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain << "%" << TestLog::EndMessage;
293 
294 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
295 
296 		return STOP;
297 	}
298 }
299 
300 class LoopUnrollCase : public ShaderOptimizationCase
301 {
302 public:
303 	enum CaseType
304 	{
305 		CASETYPE_INDEPENDENT = 0,
306 		CASETYPE_DEPENDENT,
307 
308 		CASETYPE_LAST
309 	};
310 
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)311 	LoopUnrollCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, int numRepetitions)
312 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
313 		, m_numRepetitions			(numRepetitions)
314 		, m_caseType				(caseType)
315 	{
316 	}
317 
318 protected:
generateProgramData(bool optimized) const319 	ProgramData generateProgramData (bool optimized) const
320 	{
321 		const string repetition = optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions)
322 											: loop(m_numRepetitions, expressionTemplate(m_caseType));
323 
324 		return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) + " vec4 valueOrig = value;\n" + repetition);
325 	}
326 
327 private:
328 	const int		m_numRepetitions;
329 	const CaseType	m_caseType;
330 
expressionTemplate(CaseType caseType)331 	static inline string expressionTemplate (CaseType caseType)
332 	{
333 		switch (caseType)
334 		{
335 			case CASETYPE_INDEPENDENT:	return "value += sin(float(${NDX}+1)*valueOrig)";
336 			case CASETYPE_DEPENDENT:	return "value = sin(value)";
337 			default:
338 				DE_ASSERT(false);
339 				return DE_NULL;
340 		}
341 	}
342 
loop(int iterations,const string & innerExpr)343 	static inline string loop (int iterations, const string& innerExpr)
344 	{
345 		return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" + tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
346 	}
347 };
348 
349 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
350 {
351 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)352 	LoopInvariantCodeMotionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int numLoopIterations)
353 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
354 		, m_numLoopIterations		(numLoopIterations)
355 	{
356 	}
357 
358 protected:
generateProgramData(bool optimized) const359 	ProgramData generateProgramData (bool optimized) const
360 	{
361 		float scale = 0.0f;
362 		for (int i = 0; i < m_numLoopIterations; i++)
363 			scale += 3.2f*(float)i + 4.6f;
364 		scale = 1.0f / scale;
365 
366 		const string precision		= getShaderPrecision(m_caseShaderType);
367 		const string statements		= optimized ?	"	" + precision + " vec4 valueOrig = value;\n"
368 													"	" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
369 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
370 													"	{\n"
371 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
372 													"		value += x*y;\n"
373 													"	}\n"
374 													"	value *= " + toString(scale) + ";\n"
375 
376 												:	"	" + precision + " vec4 valueOrig = value;\n"
377 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
378 													"	{\n"
379 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
380 													"		" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
381 													"		value += x*y;\n"
382 													"	}\n"
383 													"	value *= " + toString(scale) + ";\n";
384 
385 		return defaultProgramData(m_caseShaderType, statements);
386 	}
387 
388 private:
389 	const int m_numLoopIterations;
390 };
391 
392 class FunctionInliningCase : public ShaderOptimizationCase
393 {
394 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)395 	FunctionInliningCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int callNestingDepth)
396 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
397 		, m_callNestingDepth		(callNestingDepth)
398 	{
399 	}
400 
401 protected:
generateProgramData(bool optimized) const402 	ProgramData generateProgramData (bool optimized) const
403 	{
404 		const string precision				= getShaderPrecision(m_caseShaderType);
405 		const string expression				= "value*vec4(0.8, 0.7, 0.6, 0.9)";
406 		const string maybeFuncDefs			= optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
407 		const string mainValueStatement		= (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth-1) + "(value)") + ";\n";
408 
409 		return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
410 	}
411 
412 private:
413 	const int m_callNestingDepth;
414 
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)415 	static inline string funcDefinitions (int callNestingDepth, const string& precision, const string& expression)
416 	{
417 		string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
418 
419 		for (int i = 1; i < callNestingDepth; i++)
420 			result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" + toString(i-1) + "(v); }\n";
421 
422 		return result;
423 	}
424 };
425 
426 class ConstantPropagationCase : public ShaderOptimizationCase
427 {
428 public:
429 	enum CaseType
430 	{
431 		CASETYPE_BUILT_IN_FUNCTIONS = 0,
432 		CASETYPE_ARRAY,
433 		CASETYPE_STRUCT,
434 
435 		CASETYPE_LAST
436 	};
437 
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)438 	ConstantPropagationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, bool useConstantExpressionsOnly)
439 		: ShaderOptimizationCase			(context, name, description, caseShaderType)
440 		, m_caseType						(caseType)
441 		, m_useConstantExpressionsOnly		(useConstantExpressionsOnly)
442 	{
443 		DE_ASSERT(!(m_caseType == CASETYPE_ARRAY && m_useConstantExpressionsOnly)); // \note Would need array constructors, which GLSL ES 1 doesn't have.
444 	}
445 
446 protected:
generateProgramData(bool optimized) const447 	ProgramData generateProgramData (bool optimized) const
448 	{
449 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
450 		const string	precision		= getShaderPrecision(m_caseShaderType);
451 		const string	statements		= m_caseType == CASETYPE_BUILT_IN_FUNCTIONS		? builtinFunctionsCaseStatements	(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
452 										: m_caseType == CASETYPE_ARRAY					? arrayCaseStatements				(optimized, precision, isVertexCase)
453 										: m_caseType == CASETYPE_STRUCT					? structCaseStatements				(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
454 										: DE_NULL;
455 
456 		return defaultProgramData(m_caseShaderType, statements);
457 	}
458 
459 private:
460 	const CaseType	m_caseType;
461 	const bool		m_useConstantExpressionsOnly;
462 
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)463 	static inline string builtinFunctionsCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
464 	{
465 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
466 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
467 
468 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
469 
470 						 :	"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
471 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
472 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
473 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
474 							"	" + constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n"
475 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n", numSinRows, "", 1) +
476 							"	" + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
477 							"	value = f*value;\n";
478 	}
479 
arrayCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)480 	static inline string arrayCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
481 	{
482 		const int numSinRows = useHeavierWorkload ? 12 : 1;
483 
484 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
485 
486 						 :	"	const int arrLen = 4;\n"
487 							"	" + precision + " vec4 arr[arrLen];\n"
488 							"	arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
489 							"	arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
490 							"	arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
491 							"	arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
492 							"	" + precision + " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * 0.25;\n"
493 							"	" + precision + " vec4 b0 = cos(sin(a));\n"
494 							+ repeatIndexedTemplate("	" + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
495 							"	" + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
496 							"	value = c*value;\n";
497 	}
498 
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)499 	static inline string structCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
500 	{
501 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
502 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
503 
504 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
505 
506 						 :	"	struct S\n"
507 							"	{\n"
508 							"		" + precision + " vec4 a;\n"
509 							"		" + precision + " vec4 b;\n"
510 							"		" + precision + " vec4 c;\n"
511 							"		" + precision + " vec4 d;\n"
512 							"	};\n"
513 							"\n"
514 							"	" + constMaybe + "S s =\n"
515 							"		S(vec4(0.1, 0.5, 0.9, 1.3),\n"
516 							"		  vec4(0.2, 0.6, 1.0, 1.4),\n"
517 							"		  vec4(0.3, 0.7, 1.1, 1.5),\n"
518 							"		  vec4(0.4, 0.8, 1.2, 1.6));\n"
519 							"	" + constMaybe + precision + " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
520 							"	" + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
521 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
522 							"	" + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
523 							"	value = c*value;\n";
524 	}
525 };
526 
527 class CommonSubexpressionCase : public ShaderOptimizationCase
528 {
529 public:
530 	enum CaseType
531 	{
532 		CASETYPE_SINGLE_STATEMENT = 0,
533 		CASETYPE_MULTIPLE_STATEMENTS,
534 		CASETYPE_STATIC_BRANCH,
535 		CASETYPE_LOOP,
536 
537 		CASETYPE_LAST
538 	};
539 
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)540 	CommonSubexpressionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
541 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
542 		, m_caseType				(caseType)
543 	{
544 	}
545 
546 protected:
generateProgramData(bool optimized) const547 	ProgramData generateProgramData (bool optimized) const
548 	{
549 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
550 		const string	precision		= getShaderPrecision(m_caseShaderType);
551 		const string	statements		= m_caseType == CASETYPE_SINGLE_STATEMENT		? singleStatementCaseStatements		(optimized, precision, isVertexCase)
552 										: m_caseType == CASETYPE_MULTIPLE_STATEMENTS	? multipleStatementsCaseStatements	(optimized, precision, isVertexCase)
553 										: m_caseType == CASETYPE_STATIC_BRANCH			? staticBranchCaseStatements		(optimized, precision, isVertexCase)
554 										: m_caseType == CASETYPE_LOOP					? loopCaseStatements				(optimized, precision, isVertexCase)
555 										: DE_NULL;
556 
557 		return defaultProgramData(m_caseShaderType, statements);
558 	}
559 
560 private:
561 	const CaseType m_caseType;
562 
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)563 	static inline string singleStatementCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
564 	{
565 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
566 
567 		return optimized ?	"	" + precision + " vec4 s = sin(value);\n"
568 							"	" + precision + " vec4 cs = cos(s);\n"
569 							"	" + precision + " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
570 							"	value = " + repeat("d", numTopLevelRepeats, "+") + ";\n"
571 
572 						 :	"	value = " + repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))", numTopLevelRepeats, "\n\t      + ") + ";\n";
573 	}
574 
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)575 	static inline string multipleStatementsCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
576 	{
577 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
578 		DE_ASSERT(numTopLevelRepeats >= 2);
579 
580 		return optimized ?	"	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
581 							"	" + precision + " vec4 b = cos(cos(a));\n"
582 							"	a = fract(exp(sqrt(b)));\n"
583 							"\n"
584 							+ repeat("\tvalue += a*b;\n", numTopLevelRepeats)
585 
586 						 :	repeatIndexedTemplate(	"	" + precision + " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
587 													"	" + precision + " vec4 b${NDX} = cos(cos(a${NDX}));\n"
588 													"	a${NDX} = fract(exp(sqrt(b${NDX})));\n"
589 													"\n",
590 													numTopLevelRepeats) +
591 
592 							repeatIndexedTemplate(	"	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
593 	}
594 
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)595 	static inline string staticBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
596 	{
597 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
598 		DE_ASSERT(numTopLevelRepeats >= 2);
599 
600 		if (optimized)
601 		{
602 			return "	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
603 				   "	" + precision + " vec4 b = cos(a);\n"
604 				   "	b = cos(b);\n"
605 				   "	a = fract(exp(sqrt(b)));\n"
606 				   "\n"
607 				   + repeat("	value += a*b;\n", numTopLevelRepeats);
608 		}
609 		else
610 		{
611 			string result;
612 
613 			for (int i = 0; i < numTopLevelRepeats; i++)
614 			{
615 				result +=	"	" + precision + " vec4 a" + toString(i) + " = sin(value) + cos(exp(value));\n"
616 							"	" + precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
617 
618 				if (i % 3 == 0)
619 					result +=	"	if (1 < 2)\n"
620 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
621 				else if (i % 3 == 1)
622 					result +=	"	b" + toString(i) + " = cos(b" + toString(i) + ");\n";
623 				else if (i % 3 == 2)
624 					result +=	"	if (2 < 1);\n"
625 								"	else\n"
626 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
627 				else
628 					DE_ASSERT(false);
629 
630 				result +=	"	a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
631 			}
632 
633 			result += repeatIndexedTemplate("	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
634 
635 			return result;
636 		}
637 	}
638 
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)639 	static inline string loopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
640 	{
641 		const int numLoopIterations = useHeavierWorkload ? 32 : 4;
642 
643 		return optimized ?	"	" + precision + " vec4 acc = value;\n"
644 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
645 							"		acc = sin(acc);\n"
646 							"\n"
647 							"	value += acc;\n"
648 							"	value += acc;\n"
649 
650 						 :	"	" + precision + " vec4 acc0 = value;\n"
651 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
652 							"		acc0 = sin(acc0);\n"
653 							"\n"
654 							"	" + precision + " vec4 acc1 = value;\n"
655 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
656 							"		acc1 = sin(acc1);\n"
657 							"\n"
658 							"	value += acc0;\n"
659 							"	value += acc1;\n";
660 	}
661 };
662 
663 class DeadCodeEliminationCase : public ShaderOptimizationCase
664 {
665 public:
666 	enum CaseType
667 	{
668 		CASETYPE_DEAD_BRANCH_SIMPLE = 0,
669 		CASETYPE_DEAD_BRANCH_COMPLEX,
670 		CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
671 		CASETYPE_DEAD_BRANCH_FUNC_CALL,
672 		CASETYPE_UNUSED_VALUE_BASIC,
673 		CASETYPE_UNUSED_VALUE_LOOP,
674 		CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
675 		CASETYPE_UNUSED_VALUE_AFTER_RETURN,
676 		CASETYPE_UNUSED_VALUE_MUL_ZERO,
677 
678 		CASETYPE_LAST
679 	};
680 
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)681 	DeadCodeEliminationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
682 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
683 		, m_caseType				(caseType)
684 	{
685 	}
686 
687 protected:
generateProgramData(bool optimized) const688 	ProgramData generateProgramData (bool optimized) const
689 	{
690 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
691 		const string	precision		= getShaderPrecision(m_caseShaderType);
692 		const string	funcDefs		= m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL		? deadBranchFuncCallCaseFuncDefs		(optimized, precision)
693 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN	? unusedValueAfterReturnCaseFuncDefs	(optimized, precision, isVertexCase)
694 										: "";
695 
696 		const string	statements		= m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE				? deadBranchSimpleCaseStatements			(optimized, isVertexCase)
697 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX			? deadBranchComplexCaseStatements			(optimized, precision, true,	isVertexCase)
698 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? deadBranchComplexCaseStatements			(optimized, precision, false,	isVertexCase)
699 										: m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL			? deadBranchFuncCallCaseStatements			(optimized, isVertexCase)
700 										: m_caseType == CASETYPE_UNUSED_VALUE_BASIC				? unusedValueBasicCaseStatements			(optimized, precision, isVertexCase)
701 										: m_caseType == CASETYPE_UNUSED_VALUE_LOOP				? unusedValueLoopCaseStatements				(optimized, precision, isVertexCase)
702 										: m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? unusedValueDeadBranchCaseStatements		(optimized, precision, isVertexCase)
703 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN		? unusedValueAfterReturnCaseStatements		()
704 										: m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO			? unusedValueMulZeroCaseStatements			(optimized, precision, isVertexCase)
705 										: DE_NULL;
706 
707 		return defaultProgramData(m_caseShaderType, funcDefs, statements);
708 	}
709 
710 private:
711 	const CaseType m_caseType;
712 
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)713 	static inline string deadBranchSimpleCaseStatements (bool optimized, bool useHeavierWorkload)
714 	{
715 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
716 
717 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
718 
719 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
720 							"	if (2 < 1)\n"
721 							"	{\n"
722 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
723 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
724 							"			value = sin(value);\n"
725 							"	}\n";
726 	}
727 
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)728 	static inline string deadBranchComplexCaseStatements (bool optimized, const string& precision, bool useConst, bool useHeavierWorkload)
729 	{
730 		const string	constMaybe			= useConst ? "const " : "";
731 		const int		numLoopIterations	= useHeavierWorkload ? 16 : 4;
732 
733 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
734 
735 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
736 							"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
737 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
738 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
739 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
740 							"	" + constMaybe + precision + " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
741 							"	if (e.x > 1.0)\n"
742 							"	{\n"
743 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
744 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
745 							"			value = sin(value);\n"
746 							"	}\n";
747 	}
748 
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)749 	static inline string deadBranchFuncCallCaseFuncDefs (bool optimized, const string& precision)
750 	{
751 		return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
752 	}
753 
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)754 	static inline string deadBranchFuncCallCaseStatements (bool optimized, bool useHeavierWorkload)
755 	{
756 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
757 
758 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
759 
760 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
761 							"	if (func(0.3) > 1.0)\n"
762 							"	{\n"
763 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
764 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
765 							"			value = sin(value);\n"
766 							"	}\n";
767 	}
768 
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)769 	static inline string unusedValueBasicCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
770 	{
771 		const int numSinRows = useHeavierWorkload ? 12 : 1;
772 
773 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
774 							"	value = used;\n"
775 
776 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
777 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n"
778 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
779 							"	value = used;\n";
780 	}
781 
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)782 	static inline string unusedValueLoopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
783 	{
784 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
785 
786 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
787 							"	value = used;\n"
788 
789 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
790 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
791 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
792 							"		unused = sin(unused + used);\n"
793 							"	value = used;\n";
794 	}
795 
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)796 	static inline string unusedValueAfterReturnCaseFuncDefs (bool optimized, const string& precision, bool useHeavierWorkload)
797 	{
798 		const int numSinRows = useHeavierWorkload ? 12 : 1;
799 
800 		return optimized ?	precision + " vec4 func (" + precision + " vec4 v)\n"
801 							"{\n"
802 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
803 							"	return used;\n"
804 							"}\n"
805 
806 						 :	precision + " vec4 func (" + precision + " vec4 v)\n"
807 							"{\n"
808 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
809 							"	" + precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n"
810 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
811 							"	return used;\n"
812 							"	used = used*unused;"
813 							"	return used;\n"
814 							"}\n";
815 	}
816 
unusedValueAfterReturnCaseStatements(void)817 	static inline string unusedValueAfterReturnCaseStatements (void)
818 	{
819 		return "	value = func(value);\n";
820 	}
821 
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)822 	static inline string unusedValueDeadBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
823 	{
824 		const int numSinRows = useHeavierWorkload ? 12 : 1;
825 
826 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
827 							"	value = used;\n"
828 
829 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
830 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
831 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
832 							"	if (2 < 1)\n"
833 							"		used = used*unused;\n"
834 							"	value = used;\n";
835 	}
836 
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)837 	static inline string unusedValueMulZeroCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
838 	{
839 		const int numSinRows = useHeavierWorkload ? 12 : 1;
840 
841 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
842 							"	value = used;\n"
843 
844 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
845 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
846 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
847 							"	value = used + unused*float(1-1);\n";
848 	}
849 };
850 
851 } // anonymous
852 
ShaderOptimizationTests(Context & context)853 ShaderOptimizationTests::ShaderOptimizationTests (Context& context)
854 	: TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
855 {
856 }
857 
~ShaderOptimizationTests(void)858 ShaderOptimizationTests::~ShaderOptimizationTests (void)
859 {
860 }
861 
init(void)862 void ShaderOptimizationTests::init (void)
863 {
864 	TestCaseGroup* const unrollGroup					= new TestCaseGroup(m_context, "loop_unrolling",					"Loop Unrolling Cases");
865 	TestCaseGroup* const loopInvariantCodeMotionGroup	= new TestCaseGroup(m_context, "loop_invariant_code_motion",		"Loop-Invariant Code Motion Cases");
866 	TestCaseGroup* const inlineGroup					= new TestCaseGroup(m_context, "function_inlining",					"Function Inlining Cases");
867 	TestCaseGroup* const constantPropagationGroup		= new TestCaseGroup(m_context, "constant_propagation",				"Constant Propagation Cases");
868 	TestCaseGroup* const commonSubexpressionGroup		= new TestCaseGroup(m_context, "common_subexpression_elimination",	"Common Subexpression Elimination Cases");
869 	TestCaseGroup* const deadCodeEliminationGroup		= new TestCaseGroup(m_context, "dead_code_elimination",				"Dead Code Elimination Cases");
870 	addChild(unrollGroup);
871 	addChild(loopInvariantCodeMotionGroup);
872 	addChild(inlineGroup);
873 	addChild(constantPropagationGroup);
874 	addChild(commonSubexpressionGroup);
875 	addChild(deadCodeEliminationGroup);
876 
877 	for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
878 	{
879 		const CaseShaderType	caseShaderType			= (CaseShaderType)caseShaderTypeI;
880 		const char* const		caseShaderTypeSuffix	= caseShaderType == CASESHADERTYPE_VERTEX		? "_vertex"
881 														: caseShaderType == CASESHADERTYPE_FRAGMENT		? "_fragment"
882 														: DE_NULL;
883 
884 		// Loop unrolling cases.
885 
886 		{
887 			static const int loopIterationCounts[] = { 4, 8, 32 };
888 
889 			for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
890 			{
891 				const LoopUnrollCase::CaseType	caseType		= (LoopUnrollCase::CaseType)caseTypeI;
892 				const string					caseTypeName	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "independent_iterations"
893 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "dependent_iterations"
894 																: DE_NULL;
895 				const string					caseTypeDesc	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "loop iterations don't depend on each other"
896 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "loop iterations depend on each other"
897 																: DE_NULL;
898 
899 				for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
900 				{
901 					const int			loopIterations	= loopIterationCounts[loopIterNdx];
902 					const string		name			= caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
903 					const string		description		= toString(loopIterations) + " iterations; " + caseTypeDesc;
904 
905 					unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(), caseShaderType, caseType, loopIterations));
906 				}
907 			}
908 		}
909 
910 		// Loop-invariant code motion cases.
911 
912 		{
913 			static const int loopIterationCounts[] = { 4, 8, 32 };
914 
915 			for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
916 			{
917 				const int		loopIterations	= loopIterationCounts[loopIterNdx];
918 				const string	name			= toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
919 
920 				loopInvariantCodeMotionGroup->addChild(new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
921 			}
922 		}
923 
924 		// Function inlining cases.
925 
926 		{
927 			static const int callNestingDepths[] = { 4, 8, 32 };
928 
929 			for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
930 			{
931 				const int		nestingDepth	= callNestingDepths[nestDepthNdx];
932 				const string	name			= toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
933 
934 				inlineGroup->addChild(new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
935 			}
936 		}
937 
938 		// Constant propagation cases.
939 
940 		for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
941 		{
942 			const ConstantPropagationCase::CaseType		caseType		= (ConstantPropagationCase::CaseType)caseTypeI;
943 			const string								caseTypeName	= caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS		? "built_in_functions"
944 																		: caseType == ConstantPropagationCase::CASETYPE_ARRAY					? "array"
945 																		: caseType == ConstantPropagationCase::CASETYPE_STRUCT					? "struct"
946 																		: DE_NULL;
947 
948 			for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
949 			{
950 				const bool		constantExpressionsOnly		= constantExpressionsOnlyI != 0;
951 				const string	name						= caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
952 
953 				if (caseType == ConstantPropagationCase::CASETYPE_ARRAY && constantExpressionsOnly) // \note See ConstantPropagationCase's constructor for explanation.
954 					continue;
955 
956 				constantPropagationGroup->addChild(new ConstantPropagationCase(m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
957 			}
958 		}
959 
960 		// Common subexpression cases.
961 
962 		for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
963 		{
964 			const CommonSubexpressionCase::CaseType		caseType		= (CommonSubexpressionCase::CaseType)caseTypeI;
965 
966 			const string								caseTypeName	= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "single_statement"
967 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "multiple_statements"
968 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "static_branch"
969 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "loop"
970 																		: DE_NULL;
971 
972 			const string								description		= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "A single statement containing multiple uses of same subexpression"
973 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "Multiple statements performing same computations"
974 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "Multiple statements including a static conditional"
975 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "Multiple loops performing the same computations"
976 																		: DE_NULL;
977 
978 			commonSubexpressionGroup->addChild(new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(), description.c_str(), caseShaderType, caseType));
979 		}
980 
981 		// Dead code elimination cases.
982 
983 		for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
984 		{
985 			const DeadCodeEliminationCase::CaseType		caseType				= (DeadCodeEliminationCase::CaseType)caseTypeI;
986 			const char* const							caseTypeName			= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "dead_branch_simple"
987 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "dead_branch_complex"
988 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "dead_branch_complex_no_const"
989 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "dead_branch_func_call"
990 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "unused_value_basic"
991 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "unused_value_loop"
992 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "unused_value_dead_branch"
993 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "unused_value_after_return"
994 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "unused_value_mul_zero"
995 																				: DE_NULL;
996 
997 			const char* const							caseTypeDescription		= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "Do computation inside a branch that is never taken (condition is simple false constant expression)"
998 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "Do computation inside a branch that is never taken (condition is complex false constant expression)"
999 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "Do computation inside a branch that is never taken (condition is complex false expression, not constant expression but still compile-time computable)"
1000 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "Do computation inside a branch that is never taken (condition is compile-time computable false expression containing function call to a simple inlineable function)"
1001 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "Compute a value that is never used even statically"
1002 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "Compute a value, using a loop, that is never used even statically"
1003 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "Compute a value that is used only inside a statically dead branch"
1004 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "Compute a value that is used only after a return statement"
1005 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "Compute a value that is used but multiplied by a zero constant expression"
1006 																				: DE_NULL;
1007 
1008 			deadCodeEliminationGroup->addChild(new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(), caseTypeDescription, caseShaderType, caseType));
1009 		}
1010 	}
1011 }
1012 
1013 } // Performance
1014 } // gles2
1015 } // deqp
1016