1 /*-------------------------------------------------------------------------
2  * drawElements Quality Program OpenGL ES 3.0 Module
3  * -------------------------------------------------
4  *
5  * Copyright 2014 The Android Open Source Project
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief Optimized vs unoptimized shader performance tests.
22  *//*--------------------------------------------------------------------*/
23 
24 #include "es3pShaderOptimizationTests.hpp"
25 #include "glsShaderPerformanceMeasurer.hpp"
26 #include "gluRenderContext.hpp"
27 #include "gluShaderProgram.hpp"
28 #include "tcuTestLog.hpp"
29 #include "tcuVector.hpp"
30 #include "tcuStringTemplate.hpp"
31 #include "deSharedPtr.hpp"
32 #include "deStringUtil.hpp"
33 #include "deMath.h"
34 
35 #include "glwFunctions.hpp"
36 
37 #include <vector>
38 #include <string>
39 #include <map>
40 
41 using glu::ShaderProgram;
42 using tcu::TestLog;
43 using tcu::Vec4;
44 using de::SharedPtr;
45 using de::toString;
46 
47 using std::vector;
48 using std::string;
49 
50 namespace deqp
51 {
52 
53 using gls::ShaderPerformanceMeasurer;
54 
55 namespace gles3
56 {
57 namespace Performance
58 {
59 
singleMap(const string & key,const string & value)60 static inline std::map<string, string> singleMap (const string& key, const string& value)
61 {
62 	std::map<string, string> res;
63 	res[key] = value;
64 	return res;
65 }
66 
repeat(const string & str,int numRepeats,const string & delim="")67 static inline string repeat (const string& str, int numRepeats, const string& delim = "")
68 {
69 	string result = str;
70 	for (int i = 1; i < numRepeats; i++)
71 		result += delim + str;
72 	return result;
73 }
74 
repeatIndexedTemplate(const string & strTempl,int numRepeats,const string & delim="",int ndxStart=0)75 static inline string repeatIndexedTemplate (const string& strTempl, int numRepeats, const string& delim = "", int ndxStart = 0)
76 {
77 	const tcu::StringTemplate	templ(strTempl);
78 	string						result;
79 	std::map<string, string>	params;
80 
81 	for (int i = 0; i < numRepeats; i++)
82 	{
83 		params["PREV_NDX"]	= toString(i + ndxStart - 1);
84 		params["NDX"]		= toString(i + ndxStart);
85 
86 		result += (i > 0 ? delim : "") + templ.specialize(params);
87 	}
88 
89 	return result;
90 }
91 
92 namespace
93 {
94 
95 enum CaseShaderType
96 {
97 	CASESHADERTYPE_VERTEX = 0,
98 	CASESHADERTYPE_FRAGMENT,
99 
100 	CASESHADERTYPE_LAST
101 };
102 
getShaderPrecision(CaseShaderType shaderType)103 static inline string getShaderPrecision (CaseShaderType shaderType)
104 {
105 	switch (shaderType)
106 	{
107 		case CASESHADERTYPE_VERTEX:		return "highp";
108 		case CASESHADERTYPE_FRAGMENT:	return "highp";
109 		default:
110 			DE_ASSERT(false);
111 			return DE_NULL;
112 	}
113 }
114 
115 struct ProgramData
116 {
117 	glu::ProgramSources			sources;
118 	vector<gls::AttribSpec>		attributes; //!< \note Shouldn't contain a_position; that one is set by gls::ShaderPerformanceMeasurer.
119 
ProgramDatadeqp::gles3::Performance::__anon78e67cbd0111::ProgramData120 	ProgramData (void) {}
ProgramDatadeqp::gles3::Performance::__anon78e67cbd0111::ProgramData121 	ProgramData (const glu::ProgramSources& sources_, const vector<gls::AttribSpec>& attributes_ = vector<gls::AttribSpec>())	: sources(sources_), attributes(attributes_)	{}
ProgramDatadeqp::gles3::Performance::__anon78e67cbd0111::ProgramData122 	ProgramData (const glu::ProgramSources& sources_, const gls::AttribSpec& attribute)											: sources(sources_), attributes(1, attribute)	{}
123 };
124 
125 //! Shader boilerplate helper; most cases have similar basic shader structure.
defaultProgramData(CaseShaderType shaderType,const string & funcDefs,const string & mainStatements)126 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& funcDefs, const string& mainStatements)
127 {
128 	const bool		isVertexCase	= shaderType == CASESHADERTYPE_VERTEX;
129 	const bool		isFragmentCase	= shaderType == CASESHADERTYPE_FRAGMENT;
130 	const string	vtxPrec			= getShaderPrecision(CASESHADERTYPE_VERTEX);
131 	const string	fragPrec		= getShaderPrecision(CASESHADERTYPE_FRAGMENT);
132 
133 	return ProgramData(glu::ProgramSources() << glu::VertexSource(		"#version 300 es\n"
134 																		"in " + vtxPrec + " vec4 a_position;\n"
135 																		"in " + vtxPrec + " vec4 a_value;\n"
136 																		"out " + fragPrec + " vec4 v_value;\n"
137 																		+ (isVertexCase ? funcDefs : "") +
138 																		"void main (void)\n"
139 																		"{\n"
140 																		"	gl_Position = a_position;\n"
141 																		"	" + vtxPrec + " vec4 value = a_value;\n"
142 																		+ (isVertexCase ? mainStatements : "") +
143 																		"	v_value = value;\n"
144 																		"}\n")
145 
146 											 << glu::FragmentSource(	"#version 300 es\n"
147 																		"layout (location = 0) out " + fragPrec + " vec4 o_color;\n"
148 																		"in " + fragPrec + " vec4 v_value;\n"
149 																		+ (isFragmentCase ? funcDefs : "") +
150 																		"void main (void)\n"
151 																		"{\n"
152 																		"	" + fragPrec + " vec4 value = v_value;\n"
153 																		+ (isFragmentCase ? mainStatements : "") +
154 																		"	o_color = value;\n"
155 																		"}\n"),
156 					  gls::AttribSpec("a_value",
157 									  Vec4(1.0f, 0.0f, 0.0f, 0.0f),
158 									  Vec4(0.0f, 1.0f, 0.0f, 0.0f),
159 									  Vec4(0.0f, 0.0f, 1.0f, 0.0f),
160 									  Vec4(0.0f, 0.0f, 0.0f, 1.0f)));
161 }
162 
defaultProgramData(CaseShaderType shaderType,const string & mainStatements)163 static inline ProgramData defaultProgramData (CaseShaderType shaderType, const string& mainStatements)
164 {
165 	return defaultProgramData(shaderType, "", mainStatements);
166 }
167 
168 class ShaderOptimizationCase : public TestCase
169 {
170 public:
ShaderOptimizationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType)171 	ShaderOptimizationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType)
172 		: TestCase				(context, tcu::NODETYPE_PERFORMANCE, name, description)
173 		, m_caseShaderType		(caseShaderType)
174 		, m_state				(STATE_LAST)
175 		, m_measurer			(context.getRenderContext(), caseShaderType == CASESHADERTYPE_VERTEX	? gls::CASETYPE_VERTEX
176 														   : caseShaderType == CASESHADERTYPE_FRAGMENT	? gls::CASETYPE_FRAGMENT
177 														   : gls::CASETYPE_LAST)
178 		, m_unoptimizedResult	(-1.0f, -1.0f)
179 		, m_optimizedResult		(-1.0f, -1.0f)
180 	{
181 	}
182 
~ShaderOptimizationCase(void)183 	virtual ~ShaderOptimizationCase (void) {}
184 
185 	void			init		(void);
186 	IterateResult	iterate		(void);
187 
188 protected:
189 	virtual ProgramData		generateProgramData (bool optimized) const = 0;
190 
191 	const CaseShaderType	m_caseShaderType;
192 
193 private:
194 	enum State
195 	{
196 		STATE_INIT_UNOPTIMIZED = 0,
197 		STATE_MEASURE_UNOPTIMIZED,
198 		STATE_INIT_OPTIMIZED,
199 		STATE_MEASURE_OPTIMIZED,
200 		STATE_FINISHED,
201 
202 		STATE_LAST
203 	};
204 
programData(bool optimized)205 	ProgramData&						programData		(bool optimized) { return optimized ? m_optimizedData		: m_unoptimizedData;		}
program(bool optimized)206 	SharedPtr<const ShaderProgram>&		program			(bool optimized) { return optimized ? m_optimizedProgram	: m_unoptimizedProgram;		}
result(bool optimized)207 	ShaderPerformanceMeasurer::Result&	result			(bool optimized) { return optimized ? m_optimizedResult		: m_unoptimizedResult;		}
208 
209 	State								m_state;
210 	ShaderPerformanceMeasurer			m_measurer;
211 
212 	ProgramData							m_unoptimizedData;
213 	ProgramData							m_optimizedData;
214 	SharedPtr<const ShaderProgram>		m_unoptimizedProgram;
215 	SharedPtr<const ShaderProgram>		m_optimizedProgram;
216 	ShaderPerformanceMeasurer::Result	m_unoptimizedResult;
217 	ShaderPerformanceMeasurer::Result	m_optimizedResult;
218 };
219 
init(void)220 void ShaderOptimizationCase::init (void)
221 {
222 	const glu::RenderContext&	renderCtx	= m_context.getRenderContext();
223 	TestLog&					log			= m_testCtx.getLog();
224 
225 	m_measurer.logParameters(log);
226 
227 	for (int ndx = 0; ndx < 2; ndx++)
228 	{
229 		const bool optimized = ndx == 1;
230 
231 		programData(optimized) = generateProgramData(optimized);
232 
233 		for (int i = 0; i < (int)programData(optimized).attributes.size(); i++)
234 			DE_ASSERT(programData(optimized).attributes[i].name != "a_position"); // \note Position attribute is set by m_measurer.
235 
236 		program(optimized) = SharedPtr<const ShaderProgram>(new ShaderProgram(renderCtx, programData(optimized).sources));
237 
238 		{
239 			const tcu::ScopedLogSection section(log, optimized ? "OptimizedProgram"			: "UnoptimizedProgram",
240 													 optimized ? "Hand-optimized program"	: "Unoptimized program");
241 			log << *program(optimized);
242 		}
243 
244 		if (!program(optimized)->isOk())
245 			TCU_FAIL("Shader compilation failed");
246 	}
247 
248 	m_state = STATE_INIT_UNOPTIMIZED;
249 }
250 
iterate(void)251 ShaderOptimizationCase::IterateResult ShaderOptimizationCase::iterate (void)
252 {
253 	TestLog& log = m_testCtx.getLog();
254 
255 	if (m_state == STATE_INIT_UNOPTIMIZED || m_state == STATE_INIT_OPTIMIZED)
256 	{
257 		const bool optimized = m_state == STATE_INIT_OPTIMIZED;
258 		m_measurer.init(program(optimized)->getProgram(), programData(optimized).attributes, 1);
259 		m_state = optimized ? STATE_MEASURE_OPTIMIZED : STATE_MEASURE_UNOPTIMIZED;
260 
261 		return CONTINUE;
262 	}
263 	else if (m_state == STATE_MEASURE_UNOPTIMIZED || m_state == STATE_MEASURE_OPTIMIZED)
264 	{
265 		m_measurer.iterate();
266 
267 		if (m_measurer.isFinished())
268 		{
269 			const bool						optimized	= m_state == STATE_MEASURE_OPTIMIZED;
270 			const tcu::ScopedLogSection		section		(log, optimized ? "OptimizedResult"									: "UnoptimizedResult",
271 															  optimized ? "Measurement results for hand-optimized program"	: "Measurement result for unoptimized program");
272 			m_measurer.logMeasurementInfo(log);
273 			result(optimized) = m_measurer.getResult();
274 			m_measurer.deinit();
275 			m_state = optimized ? STATE_FINISHED : STATE_INIT_OPTIMIZED;
276 		}
277 
278 		return CONTINUE;
279 	}
280 	else
281 	{
282 		DE_ASSERT(m_state == STATE_FINISHED);
283 
284 		const float			unoptimizedRelevantResult	= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_unoptimizedResult.megaVertPerSec	: m_unoptimizedResult.megaFragPerSec;
285 		const float			optimizedRelevantResult		= m_caseShaderType == CASESHADERTYPE_VERTEX ? m_optimizedResult.megaVertPerSec		: m_optimizedResult.megaFragPerSec;
286 		const char* const	relevantResultName			= m_caseShaderType == CASESHADERTYPE_VERTEX ? "vertex"								: "fragment";
287 		const float			ratio						= unoptimizedRelevantResult / optimizedRelevantResult;
288 		const int			handOptimizationGain		= (int)deFloatRound(100.0f/ratio) - 100;
289 
290 		log << TestLog::Message << "Unoptimized / optimized " << relevantResultName << " performance ratio: " << ratio << TestLog::EndMessage;
291 
292 		if (handOptimizationGain >= 0)
293 			log << TestLog::Message << "Note: " << handOptimizationGain << "% performance gain was achieved with hand-optimized version" << TestLog::EndMessage;
294 		else
295 			log << TestLog::Message << "Note: hand-optimization degraded performance by " << -handOptimizationGain << "%" << TestLog::EndMessage;
296 
297 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString(ratio, 2).c_str());
298 
299 		return STOP;
300 	}
301 }
302 
303 class LoopUnrollCase : public ShaderOptimizationCase
304 {
305 public:
306 	enum CaseType
307 	{
308 		CASETYPE_INDEPENDENT = 0,
309 		CASETYPE_DEPENDENT,
310 
311 		CASETYPE_LAST
312 	};
313 
LoopUnrollCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,int numRepetitions)314 	LoopUnrollCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, int numRepetitions)
315 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
316 		, m_numRepetitions			(numRepetitions)
317 		, m_caseType				(caseType)
318 	{
319 	}
320 
321 protected:
generateProgramData(bool optimized) const322 	ProgramData generateProgramData (bool optimized) const
323 	{
324 		const string repetition = optimized ? repeatIndexedTemplate("\t" + expressionTemplate(m_caseType) + ";\n", m_numRepetitions)
325 											: loop(m_numRepetitions, expressionTemplate(m_caseType));
326 
327 		return defaultProgramData(m_caseShaderType, "\t" + getShaderPrecision(m_caseShaderType) + " vec4 valueOrig = value;\n" + repetition);
328 	}
329 
330 private:
331 	const int		m_numRepetitions;
332 	const CaseType	m_caseType;
333 
expressionTemplate(CaseType caseType)334 	static inline string expressionTemplate (CaseType caseType)
335 	{
336 		switch (caseType)
337 		{
338 			case CASETYPE_INDEPENDENT:	return "value += sin(float(${NDX}+1)*valueOrig)";
339 			case CASETYPE_DEPENDENT:	return "value = sin(value)";
340 			default:
341 				DE_ASSERT(false);
342 				return DE_NULL;
343 		}
344 	}
345 
loop(int iterations,const string & innerExpr)346 	static inline string loop (int iterations, const string& innerExpr)
347 	{
348 		return "\tfor (int i = 0; i < " + toString(iterations) + "; i++)\n\t\t" + tcu::StringTemplate(innerExpr).specialize(singleMap("NDX", "i")) + ";\n";
349 	}
350 };
351 
352 class LoopInvariantCodeMotionCase : public ShaderOptimizationCase
353 {
354 public:
LoopInvariantCodeMotionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int numLoopIterations)355 	LoopInvariantCodeMotionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int numLoopIterations)
356 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
357 		, m_numLoopIterations		(numLoopIterations)
358 	{
359 	}
360 
361 protected:
generateProgramData(bool optimized) const362 	ProgramData generateProgramData (bool optimized) const
363 	{
364 		float scale = 0.0f;
365 		for (int i = 0; i < m_numLoopIterations; i++)
366 			scale += 3.2f*(float)i + 4.6f;
367 		scale = 1.0f / scale;
368 
369 		const string precision		= getShaderPrecision(m_caseShaderType);
370 		const string statements		= optimized ?	"	" + precision + " vec4 valueOrig = value;\n"
371 													"	" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
372 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
373 													"	{\n"
374 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
375 													"		value += x*y;\n"
376 													"	}\n"
377 													"	value *= " + toString(scale) + ";\n"
378 
379 												:	"	" + precision + " vec4 valueOrig = value;\n"
380 													"	for (int i = 0; i < " + toString(m_numLoopIterations) + "; i++)\n"
381 													"	{\n"
382 													"		" + precision + " float x = 3.2*float(i) + 4.6;\n"
383 													"		" + precision + " vec4 y = sin(cos(sin(valueOrig)));\n"
384 													"		value += x*y;\n"
385 													"	}\n"
386 													"	value *= " + toString(scale) + ";\n";
387 
388 		return defaultProgramData(m_caseShaderType, statements);
389 	}
390 
391 private:
392 	const int m_numLoopIterations;
393 };
394 
395 class FunctionInliningCase : public ShaderOptimizationCase
396 {
397 public:
FunctionInliningCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,int callNestingDepth)398 	FunctionInliningCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, int callNestingDepth)
399 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
400 		, m_callNestingDepth		(callNestingDepth)
401 	{
402 	}
403 
404 protected:
generateProgramData(bool optimized) const405 	ProgramData generateProgramData (bool optimized) const
406 	{
407 		const string precision				= getShaderPrecision(m_caseShaderType);
408 		const string expression				= "value*vec4(0.8, 0.7, 0.6, 0.9)";
409 		const string maybeFuncDefs			= optimized ? "" : funcDefinitions(m_callNestingDepth, precision, expression);
410 		const string mainValueStatement		= (optimized ? "\tvalue = " + expression : "\tvalue = func" + toString(m_callNestingDepth-1) + "(value)") + ";\n";
411 
412 		return defaultProgramData(m_caseShaderType, maybeFuncDefs, mainValueStatement);
413 	}
414 
415 private:
416 	const int m_callNestingDepth;
417 
funcDefinitions(int callNestingDepth,const string & precision,const string & expression)418 	static inline string funcDefinitions (int callNestingDepth, const string& precision, const string& expression)
419 	{
420 		string result = precision + " vec4 func0 (" + precision + " vec4 value) { return " + expression + "; }\n";
421 
422 		for (int i = 1; i < callNestingDepth; i++)
423 			result += precision + " vec4 func" + toString(i) + " (" + precision + " vec4 v) { return func" + toString(i-1) + "(v); }\n";
424 
425 		return result;
426 	}
427 };
428 
429 class ConstantPropagationCase : public ShaderOptimizationCase
430 {
431 public:
432 	enum CaseType
433 	{
434 		CASETYPE_BUILT_IN_FUNCTIONS = 0,
435 		CASETYPE_ARRAY,
436 		CASETYPE_STRUCT,
437 
438 		CASETYPE_LAST
439 	};
440 
ConstantPropagationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType,bool useConstantExpressionsOnly)441 	ConstantPropagationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType, bool useConstantExpressionsOnly)
442 		: ShaderOptimizationCase			(context, name, description, caseShaderType)
443 		, m_caseType						(caseType)
444 		, m_useConstantExpressionsOnly		(useConstantExpressionsOnly)
445 	{
446 	}
447 
448 protected:
generateProgramData(bool optimized) const449 	ProgramData generateProgramData (bool optimized) const
450 	{
451 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
452 		const string	precision		= getShaderPrecision(m_caseShaderType);
453 		const string	statements		= m_caseType == CASETYPE_BUILT_IN_FUNCTIONS		? builtinFunctionsCaseStatements	(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
454 										: m_caseType == CASETYPE_ARRAY					? arrayCaseStatements				(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
455 										: m_caseType == CASETYPE_STRUCT					? structCaseStatements				(optimized, m_useConstantExpressionsOnly, precision, isVertexCase)
456 										: DE_NULL;
457 
458 		return defaultProgramData(m_caseShaderType, statements);
459 	}
460 
461 private:
462 	const CaseType	m_caseType;
463 	const bool		m_useConstantExpressionsOnly;
464 
builtinFunctionsCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)465 	static inline string builtinFunctionsCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
466 	{
467 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
468 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
469 
470 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
471 
472 						 :	"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
473 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
474 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
475 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
476 							"	" + constMaybe + precision + " vec4 e0 = inversesqrt(mix(d+a, d+b, a));\n"
477 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 e${NDX} = sin(sin(sin(sin(e${PREV_NDX}))));\n", numSinRows, "", 1) +
478 							"	" + constMaybe + precision + " vec4 f = abs(e" + toString(numSinRows) + ");\n" +
479 							"	value = f*value;\n";
480 	}
481 
arrayCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)482 	static inline string arrayCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
483 	{
484 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
485 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
486 
487 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
488 
489 						 :	"	const int arrLen = 4;\n"
490 							+ (constantExpressionsOnly ?
491 								"	const " + precision + " vec4 arr[arrLen] =\n"
492 								"		vec4[](vec4(0.1, 0.5, 0.9, 1.3),\n"
493 								"		       vec4(0.2, 0.6, 1.0, 1.4),\n"
494 								"		       vec4(0.3, 0.7, 1.1, 1.5),\n"
495 								"		       vec4(0.4, 0.8, 1.2, 1.6));\n"
496 
497 							 :	"	" + precision + " vec4 arr[arrLen];\n"
498 								"	arr[0] = vec4(0.1, 0.5, 0.9, 1.3);\n"
499 								"	arr[1] = vec4(0.2, 0.6, 1.0, 1.4);\n"
500 								"	arr[2] = vec4(0.3, 0.7, 1.1, 1.5);\n"
501 								"	arr[3] = vec4(0.4, 0.8, 1.2, 1.6);\n"
502 							) +
503 							"	" + constMaybe + precision + " vec4 a = (arr[0] + arr[1] + arr[2] + arr[3]) * (1.0 / float(arr.length()));\n"
504 							"	" + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
505 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
506 							"	" + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
507 							"	value = c*value;\n";
508 	}
509 
structCaseStatements(bool optimized,bool constantExpressionsOnly,const string & precision,bool useHeavierWorkload)510 	static inline string structCaseStatements (bool optimized, bool constantExpressionsOnly, const string& precision, bool useHeavierWorkload)
511 	{
512 		const string	constMaybe = constantExpressionsOnly ? "const " : "";
513 		const int		numSinRows = useHeavierWorkload ? 12 : 1;
514 
515 		return optimized ?	"	value = vec4(0.4, 0.5, 0.6, 0.7) * value; // NOTE: factor doesn't necessarily match the one in unoptimized shader, but shouldn't make a difference performance-wise\n"
516 
517 						 :	"	struct S\n"
518 							"	{\n"
519 							"		" + precision + " vec4 a;\n"
520 							"		" + precision + " vec4 b;\n"
521 							"		" + precision + " vec4 c;\n"
522 							"		" + precision + " vec4 d;\n"
523 							"	};\n"
524 							"\n"
525 							"	" + constMaybe + "S s =\n"
526 							"		S(vec4(0.1, 0.5, 0.9, 1.3),\n"
527 							"		  vec4(0.2, 0.6, 1.0, 1.4),\n"
528 							"		  vec4(0.3, 0.7, 1.1, 1.5),\n"
529 							"		  vec4(0.4, 0.8, 1.2, 1.6));\n"
530 							"	" + constMaybe + precision + " vec4 a = (s.a + s.b + s.c + s.d) * 0.25;\n"
531 							"	" + constMaybe + precision + " vec4 b0 = cos(sin(a));\n"
532 							+ repeatIndexedTemplate("	" + constMaybe + precision + " vec4 b${NDX} = sin(sin(sin(sin(b${PREV_NDX}))));\n", numSinRows, "", 1) +
533 							"	" + constMaybe + precision + " vec4 c = abs(b" + toString(numSinRows) + ");\n" +
534 							"	value = c*value;\n";
535 	}
536 };
537 
538 class CommonSubexpressionCase : public ShaderOptimizationCase
539 {
540 public:
541 	enum CaseType
542 	{
543 		CASETYPE_SINGLE_STATEMENT = 0,
544 		CASETYPE_MULTIPLE_STATEMENTS,
545 		CASETYPE_STATIC_BRANCH,
546 		CASETYPE_LOOP,
547 
548 		CASETYPE_LAST
549 	};
550 
CommonSubexpressionCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)551 	CommonSubexpressionCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
552 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
553 		, m_caseType				(caseType)
554 	{
555 	}
556 
557 protected:
generateProgramData(bool optimized) const558 	ProgramData generateProgramData (bool optimized) const
559 	{
560 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
561 		const string	precision		= getShaderPrecision(m_caseShaderType);
562 		const string	statements		= m_caseType == CASETYPE_SINGLE_STATEMENT		? singleStatementCaseStatements		(optimized, precision, isVertexCase)
563 										: m_caseType == CASETYPE_MULTIPLE_STATEMENTS	? multipleStatementsCaseStatements	(optimized, precision, isVertexCase)
564 										: m_caseType == CASETYPE_STATIC_BRANCH			? staticBranchCaseStatements		(optimized, precision, isVertexCase)
565 										: m_caseType == CASETYPE_LOOP					? loopCaseStatements				(optimized, precision, isVertexCase)
566 										: DE_NULL;
567 
568 		return defaultProgramData(m_caseShaderType, statements);
569 	}
570 
571 private:
572 	const CaseType m_caseType;
573 
singleStatementCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)574 	static inline string singleStatementCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
575 	{
576 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 1;
577 
578 		return optimized ?	"	" + precision + " vec4 s = sin(value);\n"
579 							"	" + precision + " vec4 cs = cos(s);\n"
580 							"	" + precision + " vec4 d = fract(s + cs) + sqrt(s + exp(cs));\n"
581 							"	value = " + repeat("d", numTopLevelRepeats, "+") + ";\n"
582 
583 						 :	"	value = " + repeat("fract(sin(value) + cos(sin(value))) + sqrt(sin(value) + exp(cos(sin(value))))", numTopLevelRepeats, "\n\t      + ") + ";\n";
584 	}
585 
multipleStatementsCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)586 	static inline string multipleStatementsCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
587 	{
588 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
589 		DE_ASSERT(numTopLevelRepeats >= 2);
590 
591 		return optimized ?	"	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
592 							"	" + precision + " vec4 b = cos(cos(a));\n"
593 							"	a = fract(exp(sqrt(b)));\n"
594 							"\n"
595 							+ repeat("\tvalue += a*b;\n", numTopLevelRepeats)
596 
597 						 :	repeatIndexedTemplate(	"	" + precision + " vec4 a${NDX} = sin(value) + cos(exp(value));\n"
598 													"	" + precision + " vec4 b${NDX} = cos(cos(a${NDX}));\n"
599 													"	a${NDX} = fract(exp(sqrt(b${NDX})));\n"
600 													"\n",
601 													numTopLevelRepeats) +
602 
603 							repeatIndexedTemplate(	"	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
604 	}
605 
staticBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)606 	static inline string staticBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
607 	{
608 		const int numTopLevelRepeats = useHeavierWorkload ? 4 : 2;
609 		DE_ASSERT(numTopLevelRepeats >= 2);
610 
611 		if (optimized)
612 		{
613 			return "	" + precision + " vec4 a = sin(value) + cos(exp(value));\n"
614 				   "	" + precision + " vec4 b = cos(a);\n"
615 				   "	b = cos(b);\n"
616 				   "	a = fract(exp(sqrt(b)));\n"
617 				   "\n"
618 				   + repeat("	value += a*b;\n", numTopLevelRepeats);
619 		}
620 		else
621 		{
622 			string result;
623 
624 			for (int i = 0; i < numTopLevelRepeats; i++)
625 			{
626 				result +=	"	" + precision + " vec4 a" + toString(i) + " = sin(value) + cos(exp(value));\n"
627 							"	" + precision + " vec4 b" + toString(i) + " = cos(a" + toString(i) + ");\n";
628 
629 				if (i % 3 == 0)
630 					result +=	"	if (1 < 2)\n"
631 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
632 				else if (i % 3 == 1)
633 					result +=	"	b" + toString(i) + " = cos(b" + toString(i) + ");\n";
634 				else if (i % 3 == 2)
635 					result +=	"	if (2 < 1);\n"
636 								"	else\n"
637 								"		b" + toString(i) + " = cos(b" + toString(i) + ");\n";
638 				else
639 					DE_ASSERT(false);
640 
641 				result +=	"	a" + toString(i) + " = fract(exp(sqrt(b" + toString(i) + ")));\n\n";
642 			}
643 
644 			result += repeatIndexedTemplate("	value += a${NDX}*b${NDX};\n", numTopLevelRepeats);
645 
646 			return result;
647 		}
648 	}
649 
loopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)650 	static inline string loopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
651 	{
652 		const int numLoopIterations = useHeavierWorkload ? 32 : 4;
653 
654 		return optimized ?	"	" + precision + " vec4 acc = value;\n"
655 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
656 							"		acc = sin(acc);\n"
657 							"\n"
658 							"	value += acc;\n"
659 							"	value += acc;\n"
660 
661 						 :	"	" + precision + " vec4 acc0 = value;\n"
662 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
663 							"		acc0 = sin(acc0);\n"
664 							"\n"
665 							"	" + precision + " vec4 acc1 = value;\n"
666 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
667 							"		acc1 = sin(acc1);\n"
668 							"\n"
669 							"	value += acc0;\n"
670 							"	value += acc1;\n";
671 	}
672 };
673 
674 class DeadCodeEliminationCase : public ShaderOptimizationCase
675 {
676 public:
677 	enum CaseType
678 	{
679 		CASETYPE_DEAD_BRANCH_SIMPLE = 0,
680 		CASETYPE_DEAD_BRANCH_COMPLEX,
681 		CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST,
682 		CASETYPE_DEAD_BRANCH_FUNC_CALL,
683 		CASETYPE_UNUSED_VALUE_BASIC,
684 		CASETYPE_UNUSED_VALUE_LOOP,
685 		CASETYPE_UNUSED_VALUE_DEAD_BRANCH,
686 		CASETYPE_UNUSED_VALUE_AFTER_RETURN,
687 		CASETYPE_UNUSED_VALUE_MUL_ZERO,
688 
689 		CASETYPE_LAST
690 	};
691 
DeadCodeEliminationCase(Context & context,const char * name,const char * description,CaseShaderType caseShaderType,CaseType caseType)692 	DeadCodeEliminationCase (Context& context, const char* name, const char* description, CaseShaderType caseShaderType, CaseType caseType)
693 		: ShaderOptimizationCase	(context, name, description, caseShaderType)
694 		, m_caseType				(caseType)
695 	{
696 	}
697 
698 protected:
generateProgramData(bool optimized) const699 	ProgramData generateProgramData (bool optimized) const
700 	{
701 		const bool		isVertexCase	= m_caseShaderType == CASESHADERTYPE_VERTEX;
702 		const string	precision		= getShaderPrecision(m_caseShaderType);
703 		const string	funcDefs		= m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL		? deadBranchFuncCallCaseFuncDefs		(optimized, precision)
704 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN	? unusedValueAfterReturnCaseFuncDefs	(optimized, precision, isVertexCase)
705 										: "";
706 
707 		const string	statements		= m_caseType == CASETYPE_DEAD_BRANCH_SIMPLE				? deadBranchSimpleCaseStatements			(optimized, isVertexCase)
708 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX			? deadBranchComplexCaseStatements			(optimized, precision, true,	isVertexCase)
709 										: m_caseType == CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? deadBranchComplexCaseStatements			(optimized, precision, false,	isVertexCase)
710 										: m_caseType == CASETYPE_DEAD_BRANCH_FUNC_CALL			? deadBranchFuncCallCaseStatements			(optimized, isVertexCase)
711 										: m_caseType == CASETYPE_UNUSED_VALUE_BASIC				? unusedValueBasicCaseStatements			(optimized, precision, isVertexCase)
712 										: m_caseType == CASETYPE_UNUSED_VALUE_LOOP				? unusedValueLoopCaseStatements				(optimized, precision, isVertexCase)
713 										: m_caseType == CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? unusedValueDeadBranchCaseStatements		(optimized, precision, isVertexCase)
714 										: m_caseType == CASETYPE_UNUSED_VALUE_AFTER_RETURN		? unusedValueAfterReturnCaseStatements		()
715 										: m_caseType == CASETYPE_UNUSED_VALUE_MUL_ZERO			? unusedValueMulZeroCaseStatements			(optimized, precision, isVertexCase)
716 										: DE_NULL;
717 
718 		return defaultProgramData(m_caseShaderType, funcDefs, statements);
719 	}
720 
721 private:
722 	const CaseType m_caseType;
723 
deadBranchSimpleCaseStatements(bool optimized,bool useHeavierWorkload)724 	static inline string deadBranchSimpleCaseStatements (bool optimized, bool useHeavierWorkload)
725 	{
726 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
727 
728 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
729 
730 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
731 							"	if (2 < 1)\n"
732 							"	{\n"
733 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
734 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
735 							"			value = sin(value);\n"
736 							"	}\n";
737 	}
738 
deadBranchComplexCaseStatements(bool optimized,const string & precision,bool useConst,bool useHeavierWorkload)739 	static inline string deadBranchComplexCaseStatements (bool optimized, const string& precision, bool useConst, bool useHeavierWorkload)
740 	{
741 		const string	constMaybe			= useConst ? "const " : "";
742 		const int		numLoopIterations	= useHeavierWorkload ? 16 : 4;
743 
744 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
745 
746 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
747 							"	" + constMaybe + precision + " vec4 a = vec4(sin(0.7), cos(0.2), sin(0.9), abs(-0.5));\n"
748 							"	" + constMaybe + precision + " vec4 b = cos(a) + fract(3.0*a.xzzw);\n"
749 							"	" + constMaybe + "bvec4 c = bvec4(true, false, true, true);\n"
750 							"	" + constMaybe + precision + " vec4 d = exp(b + vec4(c));\n"
751 							"	" + constMaybe + precision + " vec4 e = 1.8*abs(sin(sin(inversesqrt(mix(d+a, d+b, a)))));\n"
752 							"	if (e.x > 1.0)\n"
753 							"	{\n"
754 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
755 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
756 							"			value = sin(value);\n"
757 							"	}\n";
758 	}
759 
deadBranchFuncCallCaseFuncDefs(bool optimized,const string & precision)760 	static inline string deadBranchFuncCallCaseFuncDefs (bool optimized, const string& precision)
761 	{
762 		return optimized ? "" : precision + " float func (" + precision + " float x) { return 2.0*x; }\n";
763 	}
764 
deadBranchFuncCallCaseStatements(bool optimized,bool useHeavierWorkload)765 	static inline string deadBranchFuncCallCaseStatements (bool optimized, bool useHeavierWorkload)
766 	{
767 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
768 
769 		return optimized ?	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
770 
771 						 :	"	value = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
772 							"	if (func(0.3) > 1.0)\n"
773 							"	{\n"
774 							"		value = cos(exp(sin(value))*log(sqrt(value)));\n"
775 							"		for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
776 							"			value = sin(value);\n"
777 							"	}\n";
778 	}
779 
unusedValueBasicCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)780 	static inline string unusedValueBasicCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
781 	{
782 		const int numSinRows = useHeavierWorkload ? 12 : 1;
783 
784 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
785 							"	value = used;\n"
786 
787 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
788 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value))) + used;\n"
789 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
790 							"	value = used;\n";
791 	}
792 
unusedValueLoopCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)793 	static inline string unusedValueLoopCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
794 	{
795 		const int numLoopIterations = useHeavierWorkload ? 16 : 4;
796 
797 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
798 							"	value = used;\n"
799 
800 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
801 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
802 							"	for (int i = 0; i < " + toString(numLoopIterations) + "; i++)\n"
803 							"		unused = sin(unused + used);\n"
804 							"	value = used;\n";
805 	}
806 
unusedValueAfterReturnCaseFuncDefs(bool optimized,const string & precision,bool useHeavierWorkload)807 	static inline string unusedValueAfterReturnCaseFuncDefs (bool optimized, const string& precision, bool useHeavierWorkload)
808 	{
809 		const int numSinRows = useHeavierWorkload ? 12 : 1;
810 
811 		return optimized ?	precision + " vec4 func (" + precision + " vec4 v)\n"
812 							"{\n"
813 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
814 							"	return used;\n"
815 							"}\n"
816 
817 						 :	precision + " vec4 func (" + precision + " vec4 v)\n"
818 							"{\n"
819 							"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * v;\n"
820 							"	" + precision + " vec4 unused = cos(exp(sin(v))*log(sqrt(v)));\n"
821 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
822 							"	return used;\n"
823 							"	used = used*unused;"
824 							"	return used;\n"
825 							"}\n";
826 	}
827 
unusedValueAfterReturnCaseStatements(void)828 	static inline string unusedValueAfterReturnCaseStatements (void)
829 	{
830 		return "	value = func(value);\n";
831 	}
832 
unusedValueDeadBranchCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)833 	static inline string unusedValueDeadBranchCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
834 	{
835 		const int numSinRows = useHeavierWorkload ? 12 : 1;
836 
837 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
838 							"	value = used;\n"
839 
840 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
841 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
842 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
843 							"	if (2 < 1)\n"
844 							"		used = used*unused;\n"
845 							"	value = used;\n";
846 	}
847 
unusedValueMulZeroCaseStatements(bool optimized,const string & precision,bool useHeavierWorkload)848 	static inline string unusedValueMulZeroCaseStatements (bool optimized, const string& precision, bool useHeavierWorkload)
849 	{
850 		const int numSinRows = useHeavierWorkload ? 12 : 1;
851 
852 		return optimized ?	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
853 							"	value = used;\n"
854 
855 						 :	"	" + precision + " vec4 used = vec4(0.6, 0.7, 0.8, 0.9) * value;\n"
856 							"	" + precision + " vec4 unused = cos(exp(sin(value))*log(sqrt(value)));\n"
857 							+ repeat("	unused = sin(sin(sin(sin(unused))));\n", numSinRows) +
858 							"	value = used + unused*float(1-1);\n";
859 	}
860 };
861 
862 } // anonymous
863 
ShaderOptimizationTests(Context & context)864 ShaderOptimizationTests::ShaderOptimizationTests (Context& context)
865 	: TestCaseGroup(context, "optimization", "Shader Optimization Performance Tests")
866 {
867 }
868 
~ShaderOptimizationTests(void)869 ShaderOptimizationTests::~ShaderOptimizationTests (void)
870 {
871 }
872 
init(void)873 void ShaderOptimizationTests::init (void)
874 {
875 	TestCaseGroup* const unrollGroup					= new TestCaseGroup(m_context, "loop_unrolling",					"Loop Unrolling Cases");
876 	TestCaseGroup* const loopInvariantCodeMotionGroup	= new TestCaseGroup(m_context, "loop_invariant_code_motion",		"Loop-Invariant Code Motion Cases");
877 	TestCaseGroup* const inlineGroup					= new TestCaseGroup(m_context, "function_inlining",					"Function Inlining Cases");
878 	TestCaseGroup* const constantPropagationGroup		= new TestCaseGroup(m_context, "constant_propagation",				"Constant Propagation Cases");
879 	TestCaseGroup* const commonSubexpressionGroup		= new TestCaseGroup(m_context, "common_subexpression_elimination",	"Common Subexpression Elimination Cases");
880 	TestCaseGroup* const deadCodeEliminationGroup		= new TestCaseGroup(m_context, "dead_code_elimination",				"Dead Code Elimination Cases");
881 	addChild(unrollGroup);
882 	addChild(loopInvariantCodeMotionGroup);
883 	addChild(inlineGroup);
884 	addChild(constantPropagationGroup);
885 	addChild(commonSubexpressionGroup);
886 	addChild(deadCodeEliminationGroup);
887 
888 	for (int caseShaderTypeI = 0; caseShaderTypeI < CASESHADERTYPE_LAST; caseShaderTypeI++)
889 	{
890 		const CaseShaderType	caseShaderType			= (CaseShaderType)caseShaderTypeI;
891 		const char* const		caseShaderTypeSuffix	= caseShaderType == CASESHADERTYPE_VERTEX		? "_vertex"
892 														: caseShaderType == CASESHADERTYPE_FRAGMENT		? "_fragment"
893 														: DE_NULL;
894 
895 		// Loop unrolling cases.
896 
897 		{
898 			static const int loopIterationCounts[] = { 4, 8, 32 };
899 
900 			for (int caseTypeI = 0; caseTypeI < LoopUnrollCase::CASETYPE_LAST; caseTypeI++)
901 			{
902 				const LoopUnrollCase::CaseType	caseType		= (LoopUnrollCase::CaseType)caseTypeI;
903 				const string					caseTypeName	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "independent_iterations"
904 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "dependent_iterations"
905 																: DE_NULL;
906 				const string					caseTypeDesc	= caseType == LoopUnrollCase::CASETYPE_INDEPENDENT	? "loop iterations don't depend on each other"
907 																: caseType == LoopUnrollCase::CASETYPE_DEPENDENT	? "loop iterations depend on each other"
908 																: DE_NULL;
909 
910 				for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
911 				{
912 					const int			loopIterations	= loopIterationCounts[loopIterNdx];
913 					const string		name			= caseTypeName + "_" + toString(loopIterations) + caseShaderTypeSuffix;
914 					const string		description		= toString(loopIterations) + " iterations; " + caseTypeDesc;
915 
916 					unrollGroup->addChild(new LoopUnrollCase(m_context, name.c_str(), description.c_str(), caseShaderType, caseType, loopIterations));
917 				}
918 			}
919 		}
920 
921 		// Loop-invariant code motion cases.
922 
923 		{
924 			static const int loopIterationCounts[] = { 4, 8, 32 };
925 
926 			for (int loopIterNdx = 0; loopIterNdx < DE_LENGTH_OF_ARRAY(loopIterationCounts); loopIterNdx++)
927 			{
928 				const int		loopIterations	= loopIterationCounts[loopIterNdx];
929 				const string	name			= toString(loopIterations) + "_iterations" + caseShaderTypeSuffix;
930 
931 				loopInvariantCodeMotionGroup->addChild(new LoopInvariantCodeMotionCase(m_context, name.c_str(), "", caseShaderType, loopIterations));
932 			}
933 		}
934 
935 		// Function inlining cases.
936 
937 		{
938 			static const int callNestingDepths[] = { 4, 8, 32 };
939 
940 			for (int nestDepthNdx = 0; nestDepthNdx < DE_LENGTH_OF_ARRAY(callNestingDepths); nestDepthNdx++)
941 			{
942 				const int		nestingDepth	= callNestingDepths[nestDepthNdx];
943 				const string	name			= toString(nestingDepth) + "_nested" + caseShaderTypeSuffix;
944 
945 				inlineGroup->addChild(new FunctionInliningCase(m_context, name.c_str(), "", caseShaderType, nestingDepth));
946 			}
947 		}
948 
949 		// Constant propagation cases.
950 
951 		for (int caseTypeI = 0; caseTypeI < ConstantPropagationCase::CASETYPE_LAST; caseTypeI++)
952 		{
953 			const ConstantPropagationCase::CaseType		caseType		= (ConstantPropagationCase::CaseType)caseTypeI;
954 			const string								caseTypeName	= caseType == ConstantPropagationCase::CASETYPE_BUILT_IN_FUNCTIONS		? "built_in_functions"
955 																		: caseType == ConstantPropagationCase::CASETYPE_ARRAY					? "array"
956 																		: caseType == ConstantPropagationCase::CASETYPE_STRUCT					? "struct"
957 																		: DE_NULL;
958 
959 			for (int constantExpressionsOnlyI = 0; constantExpressionsOnlyI <= 1; constantExpressionsOnlyI++)
960 			{
961 				const bool		constantExpressionsOnly		= constantExpressionsOnlyI != 0;
962 				const string	name						= caseTypeName + (constantExpressionsOnly ? "" : "_no_const") + caseShaderTypeSuffix;
963 
964 				constantPropagationGroup->addChild(new ConstantPropagationCase(m_context, name.c_str(), "", caseShaderType, caseType, constantExpressionsOnly));
965 			}
966 		}
967 
968 		// Common subexpression cases.
969 
970 		for (int caseTypeI = 0; caseTypeI < CommonSubexpressionCase::CASETYPE_LAST; caseTypeI++)
971 		{
972 			const CommonSubexpressionCase::CaseType		caseType		= (CommonSubexpressionCase::CaseType)caseTypeI;
973 
974 			const string								caseTypeName	= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "single_statement"
975 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "multiple_statements"
976 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "static_branch"
977 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "loop"
978 																		: DE_NULL;
979 
980 			const string								description		= caseType == CommonSubexpressionCase::CASETYPE_SINGLE_STATEMENT		? "A single statement containing multiple uses of same subexpression"
981 																		: caseType == CommonSubexpressionCase::CASETYPE_MULTIPLE_STATEMENTS		? "Multiple statements performing same computations"
982 																		: caseType == CommonSubexpressionCase::CASETYPE_STATIC_BRANCH			? "Multiple statements including a static conditional"
983 																		: caseType == CommonSubexpressionCase::CASETYPE_LOOP					? "Multiple loops performing the same computations"
984 																		: DE_NULL;
985 
986 			commonSubexpressionGroup->addChild(new CommonSubexpressionCase(m_context, (caseTypeName + caseShaderTypeSuffix).c_str(), description.c_str(), caseShaderType, caseType));
987 		}
988 
989 		// Dead code elimination cases.
990 
991 		for (int caseTypeI = 0; caseTypeI < DeadCodeEliminationCase::CASETYPE_LAST; caseTypeI++)
992 		{
993 			const DeadCodeEliminationCase::CaseType		caseType				= (DeadCodeEliminationCase::CaseType)caseTypeI;
994 			const char* const							caseTypeName			= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "dead_branch_simple"
995 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "dead_branch_complex"
996 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "dead_branch_complex_no_const"
997 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "dead_branch_func_call"
998 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "unused_value_basic"
999 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "unused_value_loop"
1000 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "unused_value_dead_branch"
1001 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "unused_value_after_return"
1002 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "unused_value_mul_zero"
1003 																				: DE_NULL;
1004 
1005 			const char* const							caseTypeDescription		= caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_SIMPLE				? "Do computation inside a branch that is never taken (condition is simple false constant expression)"
1006 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX				? "Do computation inside a branch that is never taken (condition is complex false constant expression)"
1007 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_COMPLEX_NO_CONST	? "Do computation inside a branch that is never taken (condition is complex false expression, not constant expression but still compile-time computable)"
1008 																				: caseType == DeadCodeEliminationCase::CASETYPE_DEAD_BRANCH_FUNC_CALL			? "Do computation inside a branch that is never taken (condition is compile-time computable false expression containing function call to a simple inlineable function)"
1009 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_BASIC				? "Compute a value that is never used even statically"
1010 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_LOOP				? "Compute a value, using a loop, that is never used even statically"
1011 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_DEAD_BRANCH		? "Compute a value that is used only inside a statically dead branch"
1012 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_AFTER_RETURN		? "Compute a value that is used only after a return statement"
1013 																				: caseType == DeadCodeEliminationCase::CASETYPE_UNUSED_VALUE_MUL_ZERO			? "Compute a value that is used but multiplied by a zero constant expression"
1014 																				: DE_NULL;
1015 
1016 			deadCodeEliminationGroup->addChild(new DeadCodeEliminationCase(m_context, (string() + caseTypeName + caseShaderTypeSuffix).c_str(), caseTypeDescription, caseShaderType, caseType));
1017 		}
1018 	}
1019 }
1020 
1021 } // Performance
1022 } // gles3
1023 } // deqp
1024