1 /*-------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 Google Inc.
6  *
7  * Licensed under the Apache License, Version 2.0 (the "License");
8  * you may not use this file except in compliance with the License.
9  * You may obtain a copy of the License at
10  *
11  *      http://www.apache.org/licenses/LICENSE-2.0
12  *
13  * Unless required by applicable law or agreed to in writing, software
14  * distributed under the License is distributed on an "AS IS" BASIS,
15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  * See the License for the specific language governing permissions and
17  * limitations under the License.
18  *
19  *//*!
20  * \file
21  * \brief SPIR-V Assembly Tests for Instructions (special opcode/operand)
22  *//*--------------------------------------------------------------------*/
23 
24 #include "vktSpvAsmInstructionTests.hpp"
25 
26 #include "tcuCommandLine.hpp"
27 #include "tcuFormatUtil.hpp"
28 #include "tcuRGBA.hpp"
29 #include "tcuStringTemplate.hpp"
30 #include "tcuTestLog.hpp"
31 #include "tcuVectorUtil.hpp"
32 
33 #include "vkDefs.hpp"
34 #include "vkDeviceUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkPlatform.hpp"
37 #include "vkPrograms.hpp"
38 #include "vkQueryUtil.hpp"
39 #include "vkRef.hpp"
40 #include "vkRefUtil.hpp"
41 #include "vkStrUtil.hpp"
42 #include "vkTypeUtil.hpp"
43 
44 #include "deRandom.hpp"
45 #include "deStringUtil.hpp"
46 #include "deUniquePtr.hpp"
47 #include "tcuStringTemplate.hpp"
48 
49 #include <cmath>
50 #include "vktSpvAsmComputeShaderCase.hpp"
51 #include "vktSpvAsmComputeShaderTestUtil.hpp"
52 #include "vktTestCaseUtil.hpp"
53 
54 #include <cmath>
55 #include <limits>
56 #include <map>
57 #include <string>
58 #include <sstream>
59 
60 namespace vkt
61 {
62 namespace SpirVAssembly
63 {
64 
65 namespace
66 {
67 
68 using namespace vk;
69 using std::map;
70 using std::string;
71 using std::vector;
72 using tcu::IVec3;
73 using tcu::IVec4;
74 using tcu::RGBA;
75 using tcu::TestLog;
76 using tcu::TestStatus;
77 using tcu::Vec4;
78 using de::UniquePtr;
79 using tcu::StringTemplate;
80 using tcu::Vec4;
81 
82 typedef Unique<VkShaderModule>			ModuleHandleUp;
83 typedef de::SharedPtr<ModuleHandleUp>	ModuleHandleSp;
84 
85 template<typename T>	T			randomScalar	(de::Random& rnd, T minValue, T maxValue);
randomScalar(de::Random & rnd,float minValue,float maxValue)86 template<> inline		float		randomScalar	(de::Random& rnd, float minValue, float maxValue)		{ return rnd.getFloat(minValue, maxValue);	}
randomScalar(de::Random & rnd,deInt32 minValue,deInt32 maxValue)87 template<> inline		deInt32		randomScalar	(de::Random& rnd, deInt32 minValue, deInt32 maxValue)	{ return rnd.getInt(minValue, maxValue);	}
88 
89 template<typename T>
fillRandomScalars(de::Random & rnd,T minValue,T maxValue,void * dst,int numValues,int offset=0)90 static void fillRandomScalars (de::Random& rnd, T minValue, T maxValue, void* dst, int numValues, int offset = 0)
91 {
92 	T* const typedPtr = (T*)dst;
93 	for (int ndx = 0; ndx < numValues; ndx++)
94 		typedPtr[offset + ndx] = randomScalar<T>(rnd, minValue, maxValue);
95 }
96 
floorAll(vector<float> & values)97 static void floorAll (vector<float>& values)
98 {
99 	for (size_t i = 0; i < values.size(); i++)
100 		values[i] = deFloatFloor(values[i]);
101 }
102 
floorAll(vector<Vec4> & values)103 static void floorAll (vector<Vec4>& values)
104 {
105 	for (size_t i = 0; i < values.size(); i++)
106 		values[i] = floor(values[i]);
107 }
108 
109 struct CaseParameter
110 {
111 	const char*		name;
112 	string			param;
113 
CaseParametervkt::SpirVAssembly::__anon889ef7250111::CaseParameter114 	CaseParameter	(const char* case_, const string& param_) : name(case_), param(param_) {}
115 };
116 
117 // Assembly code used for testing OpNop, OpConstant{Null|Composite}, Op[No]Line, OpSource[Continued], OpSourceExtension, OpUndef is based on GLSL source code:
118 //
119 // #version 430
120 //
121 // layout(std140, set = 0, binding = 0) readonly buffer Input {
122 //   float elements[];
123 // } input_data;
124 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
125 //   float elements[];
126 // } output_data;
127 //
128 // layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
129 //
130 // void main() {
131 //   uint x = gl_GlobalInvocationID.x;
132 //   output_data.elements[x] = -input_data.elements[x];
133 // }
134 
135 static const char* const s_ShaderPreamble =
136 	"OpCapability Shader\n"
137 	"OpMemoryModel Logical GLSL450\n"
138 	"OpEntryPoint GLCompute %main \"main\" %id\n"
139 	"OpExecutionMode %main LocalSize 1 1 1\n";
140 
141 static const char* const s_CommonTypes =
142 	"%bool      = OpTypeBool\n"
143 	"%void      = OpTypeVoid\n"
144 	"%voidf     = OpTypeFunction %void\n"
145 	"%u32       = OpTypeInt 32 0\n"
146 	"%i32       = OpTypeInt 32 1\n"
147 	"%f32       = OpTypeFloat 32\n"
148 	"%uvec3     = OpTypeVector %u32 3\n"
149 	"%fvec3     = OpTypeVector %f32 3\n"
150 	"%uvec3ptr  = OpTypePointer Input %uvec3\n"
151 	"%f32ptr    = OpTypePointer Uniform %f32\n"
152 	"%f32arr    = OpTypeRuntimeArray %f32\n";
153 
154 // Declares two uniform variables (indata, outdata) of type "struct { float[] }". Depends on type "f32arr" (for "float[]").
155 static const char* const s_InputOutputBuffer =
156 	"%buf     = OpTypeStruct %f32arr\n"
157 	"%bufptr  = OpTypePointer Uniform %buf\n"
158 	"%indata    = OpVariable %bufptr Uniform\n"
159 	"%outdata   = OpVariable %bufptr Uniform\n";
160 
161 // Declares buffer type and layout for uniform variables indata and outdata. Both of them are SSBO bounded to descriptor set 0.
162 // indata is at binding point 0, while outdata is at 1.
163 static const char* const s_InputOutputBufferTraits =
164 	"OpDecorate %buf BufferBlock\n"
165 	"OpDecorate %indata DescriptorSet 0\n"
166 	"OpDecorate %indata Binding 0\n"
167 	"OpDecorate %outdata DescriptorSet 0\n"
168 	"OpDecorate %outdata Binding 1\n"
169 	"OpDecorate %f32arr ArrayStride 4\n"
170 	"OpMemberDecorate %buf 0 Offset 0\n";
171 
createOpNopGroup(tcu::TestContext & testCtx)172 tcu::TestCaseGroup* createOpNopGroup (tcu::TestContext& testCtx)
173 {
174 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnop", "Test the OpNop instruction"));
175 	ComputeShaderSpec				spec;
176 	de::Random						rnd				(deStringHash(group->getName()));
177 	const int						numElements		= 100;
178 	vector<float>					positiveFloats	(numElements, 0);
179 	vector<float>					negativeFloats	(numElements, 0);
180 
181 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
182 
183 	for (size_t ndx = 0; ndx < numElements; ++ndx)
184 		negativeFloats[ndx] = -positiveFloats[ndx];
185 
186 	spec.assembly =
187 		string(s_ShaderPreamble) +
188 
189 		"OpSource GLSL 430\n"
190 		"OpName %main           \"main\"\n"
191 		"OpName %id             \"gl_GlobalInvocationID\"\n"
192 
193 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
194 
195 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes)
196 
197 		+ string(s_InputOutputBuffer) +
198 
199 		"%id        = OpVariable %uvec3ptr Input\n"
200 		"%zero      = OpConstant %i32 0\n"
201 
202 		"%main      = OpFunction %void None %voidf\n"
203 		"%label     = OpLabel\n"
204 		"%idval     = OpLoad %uvec3 %id\n"
205 		"%x         = OpCompositeExtract %u32 %idval 0\n"
206 
207 		"             OpNop\n" // Inside a function body
208 
209 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
210 		"%inval     = OpLoad %f32 %inloc\n"
211 		"%neg       = OpFNegate %f32 %inval\n"
212 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
213 		"             OpStore %outloc %neg\n"
214 		"             OpReturn\n"
215 		"             OpFunctionEnd\n";
216 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
217 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
218 	spec.numWorkGroups = IVec3(numElements, 1, 1);
219 
220 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNop appearing at different places", spec));
221 
222 	return group.release();
223 }
224 
createOpLineGroup(tcu::TestContext & testCtx)225 tcu::TestCaseGroup* createOpLineGroup (tcu::TestContext& testCtx)
226 {
227 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opline", "Test the OpLine instruction"));
228 	ComputeShaderSpec				spec;
229 	de::Random						rnd				(deStringHash(group->getName()));
230 	const int						numElements		= 100;
231 	vector<float>					positiveFloats	(numElements, 0);
232 	vector<float>					negativeFloats	(numElements, 0);
233 
234 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
235 
236 	for (size_t ndx = 0; ndx < numElements; ++ndx)
237 		negativeFloats[ndx] = -positiveFloats[ndx];
238 
239 	spec.assembly =
240 		string(s_ShaderPreamble) +
241 
242 		"%fname1 = OpString \"negateInputs.comp\"\n"
243 		"%fname2 = OpString \"negateInputs\"\n"
244 
245 		"OpSource GLSL 430\n"
246 		"OpName %main           \"main\"\n"
247 		"OpName %id             \"gl_GlobalInvocationID\"\n"
248 
249 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
250 
251 		+ string(s_InputOutputBufferTraits) +
252 
253 		"OpLine %fname1 0 0\n" // At the earliest possible position
254 
255 		+ string(s_CommonTypes) + string(s_InputOutputBuffer) +
256 
257 		"OpLine %fname1 0 1\n" // Multiple OpLines in sequence
258 		"OpLine %fname2 1 0\n" // Different filenames
259 		"OpLine %fname1 1000 100000\n"
260 
261 		"%id        = OpVariable %uvec3ptr Input\n"
262 		"%zero      = OpConstant %i32 0\n"
263 
264 		"OpLine %fname1 1 1\n" // Before a function
265 
266 		"%main      = OpFunction %void None %voidf\n"
267 		"%label     = OpLabel\n"
268 
269 		"OpLine %fname1 1 1\n" // In a function
270 
271 		"%idval     = OpLoad %uvec3 %id\n"
272 		"%x         = OpCompositeExtract %u32 %idval 0\n"
273 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
274 		"%inval     = OpLoad %f32 %inloc\n"
275 		"%neg       = OpFNegate %f32 %inval\n"
276 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
277 		"             OpStore %outloc %neg\n"
278 		"             OpReturn\n"
279 		"             OpFunctionEnd\n";
280 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
281 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
282 	spec.numWorkGroups = IVec3(numElements, 1, 1);
283 
284 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpLine appearing at different places", spec));
285 
286 	return group.release();
287 }
288 
createOpNoLineGroup(tcu::TestContext & testCtx)289 tcu::TestCaseGroup* createOpNoLineGroup (tcu::TestContext& testCtx)
290 {
291 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opnoline", "Test the OpNoLine instruction"));
292 	ComputeShaderSpec				spec;
293 	de::Random						rnd				(deStringHash(group->getName()));
294 	const int						numElements		= 100;
295 	vector<float>					positiveFloats	(numElements, 0);
296 	vector<float>					negativeFloats	(numElements, 0);
297 
298 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
299 
300 	for (size_t ndx = 0; ndx < numElements; ++ndx)
301 		negativeFloats[ndx] = -positiveFloats[ndx];
302 
303 	spec.assembly =
304 		string(s_ShaderPreamble) +
305 
306 		"%fname = OpString \"negateInputs.comp\"\n"
307 
308 		"OpSource GLSL 430\n"
309 		"OpName %main           \"main\"\n"
310 		"OpName %id             \"gl_GlobalInvocationID\"\n"
311 
312 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
313 
314 		+ string(s_InputOutputBufferTraits) +
315 
316 		"OpNoLine\n" // At the earliest possible position, without preceding OpLine
317 
318 		+ string(s_CommonTypes) + string(s_InputOutputBuffer) +
319 
320 		"OpLine %fname 0 1\n"
321 		"OpNoLine\n" // Immediately following a preceding OpLine
322 
323 		"OpLine %fname 1000 1\n"
324 
325 		"%id        = OpVariable %uvec3ptr Input\n"
326 		"%zero      = OpConstant %i32 0\n"
327 
328 		"OpNoLine\n" // Contents after the previous OpLine
329 
330 		"%main      = OpFunction %void None %voidf\n"
331 		"%label     = OpLabel\n"
332 		"%idval     = OpLoad %uvec3 %id\n"
333 		"%x         = OpCompositeExtract %u32 %idval 0\n"
334 
335 		"OpNoLine\n" // Multiple OpNoLine
336 		"OpNoLine\n"
337 		"OpNoLine\n"
338 
339 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
340 		"%inval     = OpLoad %f32 %inloc\n"
341 		"%neg       = OpFNegate %f32 %inval\n"
342 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
343 		"             OpStore %outloc %neg\n"
344 		"             OpReturn\n"
345 		"             OpFunctionEnd\n";
346 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
347 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
348 	spec.numWorkGroups = IVec3(numElements, 1, 1);
349 
350 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpNoLine appearing at different places", spec));
351 
352 	return group.release();
353 }
354 
355 // Compare instruction for the contraction compute case.
356 // Returns true if the output is what is expected from the test case.
compareNoContractCase(const std::vector<BufferSp> &,const vector<AllocationSp> & outputAllocs,const std::vector<BufferSp> & expectedOutputs)357 bool compareNoContractCase(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
358 {
359 	if (outputAllocs.size() != 1)
360 		return false;
361 
362 	// We really just need this for size because we are not comparing the exact values.
363 	const BufferSp&	expectedOutput	= expectedOutputs[0];
364 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
365 
366 	for(size_t i = 0; i < expectedOutput->getNumBytes() / sizeof(float); ++i) {
367 		if (outputAsFloat[i] != 0.f &&
368 			outputAsFloat[i] != -ldexp(1, -24)) {
369 			return false;
370 		}
371 	}
372 
373 	return true;
374 }
375 
createNoContractionGroup(tcu::TestContext & testCtx)376 tcu::TestCaseGroup* createNoContractionGroup (tcu::TestContext& testCtx)
377 {
378 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
379 	vector<CaseParameter>			cases;
380 	const int						numElements		= 100;
381 	vector<float>					inputFloats1	(numElements, 0);
382 	vector<float>					inputFloats2	(numElements, 0);
383 	vector<float>					outputFloats	(numElements, 0);
384 	const StringTemplate			shaderTemplate	(
385 		string(s_ShaderPreamble) +
386 
387 		"OpName %main           \"main\"\n"
388 		"OpName %id             \"gl_GlobalInvocationID\"\n"
389 
390 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
391 
392 		"${DECORATION}\n"
393 
394 		"OpDecorate %buf BufferBlock\n"
395 		"OpDecorate %indata1 DescriptorSet 0\n"
396 		"OpDecorate %indata1 Binding 0\n"
397 		"OpDecorate %indata2 DescriptorSet 0\n"
398 		"OpDecorate %indata2 Binding 1\n"
399 		"OpDecorate %outdata DescriptorSet 0\n"
400 		"OpDecorate %outdata Binding 2\n"
401 		"OpDecorate %f32arr ArrayStride 4\n"
402 		"OpMemberDecorate %buf 0 Offset 0\n"
403 
404 		+ string(s_CommonTypes) +
405 
406 		"%buf        = OpTypeStruct %f32arr\n"
407 		"%bufptr     = OpTypePointer Uniform %buf\n"
408 		"%indata1    = OpVariable %bufptr Uniform\n"
409 		"%indata2    = OpVariable %bufptr Uniform\n"
410 		"%outdata    = OpVariable %bufptr Uniform\n"
411 
412 		"%id         = OpVariable %uvec3ptr Input\n"
413 		"%zero       = OpConstant %i32 0\n"
414 		"%c_f_m1     = OpConstant %f32 -1.\n"
415 
416 		"%main       = OpFunction %void None %voidf\n"
417 		"%label      = OpLabel\n"
418 		"%idval      = OpLoad %uvec3 %id\n"
419 		"%x          = OpCompositeExtract %u32 %idval 0\n"
420 		"%inloc1     = OpAccessChain %f32ptr %indata1 %zero %x\n"
421 		"%inval1     = OpLoad %f32 %inloc1\n"
422 		"%inloc2     = OpAccessChain %f32ptr %indata2 %zero %x\n"
423 		"%inval2     = OpLoad %f32 %inloc2\n"
424 		"%mul        = OpFMul %f32 %inval1 %inval2\n"
425 		"%add        = OpFAdd %f32 %mul %c_f_m1\n"
426 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
427 		"              OpStore %outloc %add\n"
428 		"              OpReturn\n"
429 		"              OpFunctionEnd\n");
430 
431 	cases.push_back(CaseParameter("multiplication",	"OpDecorate %mul NoContraction"));
432 	cases.push_back(CaseParameter("addition",		"OpDecorate %add NoContraction"));
433 	cases.push_back(CaseParameter("both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"));
434 
435 	for (size_t ndx = 0; ndx < numElements; ++ndx)
436 	{
437 		inputFloats1[ndx]	= 1.f + std::ldexp(1.f, -23); // 1 + 2^-23.
438 		inputFloats2[ndx]	= 1.f - std::ldexp(1.f, -23); // 1 - 2^-23.
439 		// Result for (1 + 2^-23) * (1 - 2^-23) - 1. With NoContraction, the multiplication will be
440 		// conducted separately and the result is rounded to 1, or 0x1.fffffcp-1
441 		// So the final result will be 0.f or 0x1p-24.
442 		// If the operation is combined into a precise fused multiply-add, then the result would be
443 		// 2^-46 (0xa8800000).
444 		outputFloats[ndx]	= 0.f;
445 	}
446 
447 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
448 	{
449 		map<string, string>		specializations;
450 		ComputeShaderSpec		spec;
451 
452 		specializations["DECORATION"] = cases[caseNdx].param;
453 		spec.assembly = shaderTemplate.specialize(specializations);
454 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
455 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
456 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
457 		spec.numWorkGroups = IVec3(numElements, 1, 1);
458 		// Check against the two possible answers based on rounding mode.
459 		spec.verifyIO = &compareNoContractCase;
460 
461 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
462 	}
463 	return group.release();
464 }
465 
compareFRem(const std::vector<BufferSp> &,const vector<AllocationSp> & outputAllocs,const std::vector<BufferSp> & expectedOutputs)466 bool compareFRem(const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
467 {
468 	if (outputAllocs.size() != 1)
469 		return false;
470 
471 	const BufferSp& expectedOutput = expectedOutputs[0];
472 	const float *expectedOutputAsFloat = static_cast<const float*>(expectedOutput->data());
473 	const float* outputAsFloat = static_cast<const float*>(outputAllocs[0]->getHostPtr());;
474 
475 	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
476 	{
477 		const float f0 = expectedOutputAsFloat[idx];
478 		const float f1 = outputAsFloat[idx];
479 		// \todo relative error needs to be fairly high because FRem may be implemented as
480 		// (roughly) frac(a/b)*b, so LSB errors can be magnified. But this should be fine for now.
481 		if (deFloatAbs((f1 - f0) / f0) > 0.02)
482 			return false;
483 	}
484 
485 	return true;
486 }
487 
createOpFRemGroup(tcu::TestContext & testCtx)488 tcu::TestCaseGroup* createOpFRemGroup (tcu::TestContext& testCtx)
489 {
490 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opfrem", "Test the OpFRem instruction"));
491 	ComputeShaderSpec				spec;
492 	de::Random						rnd				(deStringHash(group->getName()));
493 	const int						numElements		= 200;
494 	vector<float>					inputFloats1	(numElements, 0);
495 	vector<float>					inputFloats2	(numElements, 0);
496 	vector<float>					outputFloats	(numElements, 0);
497 
498 	fillRandomScalars(rnd, -10000.f, 10000.f, &inputFloats1[0], numElements);
499 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats2[0], numElements);
500 
501 	for (size_t ndx = 0; ndx < numElements; ++ndx)
502 	{
503 		// Guard against divisors near zero.
504 		if (std::fabs(inputFloats2[ndx]) < 1e-3)
505 			inputFloats2[ndx] = 8.f;
506 
507 		// The return value of std::fmod() has the same sign as its first operand, which is how OpFRem spec'd.
508 		outputFloats[ndx] = std::fmod(inputFloats1[ndx], inputFloats2[ndx]);
509 	}
510 
511 	spec.assembly =
512 		string(s_ShaderPreamble) +
513 
514 		"OpName %main           \"main\"\n"
515 		"OpName %id             \"gl_GlobalInvocationID\"\n"
516 
517 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
518 
519 		"OpDecorate %buf BufferBlock\n"
520 		"OpDecorate %indata1 DescriptorSet 0\n"
521 		"OpDecorate %indata1 Binding 0\n"
522 		"OpDecorate %indata2 DescriptorSet 0\n"
523 		"OpDecorate %indata2 Binding 1\n"
524 		"OpDecorate %outdata DescriptorSet 0\n"
525 		"OpDecorate %outdata Binding 2\n"
526 		"OpDecorate %f32arr ArrayStride 4\n"
527 		"OpMemberDecorate %buf 0 Offset 0\n"
528 
529 		+ string(s_CommonTypes) +
530 
531 		"%buf        = OpTypeStruct %f32arr\n"
532 		"%bufptr     = OpTypePointer Uniform %buf\n"
533 		"%indata1    = OpVariable %bufptr Uniform\n"
534 		"%indata2    = OpVariable %bufptr Uniform\n"
535 		"%outdata    = OpVariable %bufptr Uniform\n"
536 
537 		"%id        = OpVariable %uvec3ptr Input\n"
538 		"%zero      = OpConstant %i32 0\n"
539 
540 		"%main      = OpFunction %void None %voidf\n"
541 		"%label     = OpLabel\n"
542 		"%idval     = OpLoad %uvec3 %id\n"
543 		"%x         = OpCompositeExtract %u32 %idval 0\n"
544 		"%inloc1    = OpAccessChain %f32ptr %indata1 %zero %x\n"
545 		"%inval1    = OpLoad %f32 %inloc1\n"
546 		"%inloc2    = OpAccessChain %f32ptr %indata2 %zero %x\n"
547 		"%inval2    = OpLoad %f32 %inloc2\n"
548 		"%rem       = OpFRem %f32 %inval1 %inval2\n"
549 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
550 		"             OpStore %outloc %rem\n"
551 		"             OpReturn\n"
552 		"             OpFunctionEnd\n";
553 
554 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
555 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
556 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
557 	spec.numWorkGroups = IVec3(numElements, 1, 1);
558 	spec.verifyIO = &compareFRem;
559 
560 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "", spec));
561 
562 	return group.release();
563 }
564 
565 // Copy contents in the input buffer to the output buffer.
createOpCopyMemoryGroup(tcu::TestContext & testCtx)566 tcu::TestCaseGroup* createOpCopyMemoryGroup (tcu::TestContext& testCtx)
567 {
568 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopymemory", "Test the OpCopyMemory instruction"));
569 	de::Random						rnd				(deStringHash(group->getName()));
570 	const int						numElements		= 100;
571 
572 	// The following case adds vec4(0., 0.5, 1.5, 2.5) to each of the elements in the input buffer and writes output to the output buffer.
573 	ComputeShaderSpec				spec1;
574 	vector<Vec4>					inputFloats1	(numElements);
575 	vector<Vec4>					outputFloats1	(numElements);
576 
577 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats1[0], numElements * 4);
578 
579 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
580 	floorAll(inputFloats1);
581 
582 	for (size_t ndx = 0; ndx < numElements; ++ndx)
583 		outputFloats1[ndx] = inputFloats1[ndx] + Vec4(0.f, 0.5f, 1.5f, 2.5f);
584 
585 	spec1.assembly =
586 		string(s_ShaderPreamble) +
587 
588 		"OpName %main           \"main\"\n"
589 		"OpName %id             \"gl_GlobalInvocationID\"\n"
590 
591 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
592 		"OpDecorate %vec4arr ArrayStride 16\n"
593 
594 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
595 
596 		"%vec4       = OpTypeVector %f32 4\n"
597 		"%vec4ptr_u  = OpTypePointer Uniform %vec4\n"
598 		"%vec4ptr_f  = OpTypePointer Function %vec4\n"
599 		"%vec4arr    = OpTypeRuntimeArray %vec4\n"
600 		"%buf        = OpTypeStruct %vec4arr\n"
601 		"%bufptr     = OpTypePointer Uniform %buf\n"
602 		"%indata     = OpVariable %bufptr Uniform\n"
603 		"%outdata    = OpVariable %bufptr Uniform\n"
604 
605 		"%id         = OpVariable %uvec3ptr Input\n"
606 		"%zero       = OpConstant %i32 0\n"
607 		"%c_f_0      = OpConstant %f32 0.\n"
608 		"%c_f_0_5    = OpConstant %f32 0.5\n"
609 		"%c_f_1_5    = OpConstant %f32 1.5\n"
610 		"%c_f_2_5    = OpConstant %f32 2.5\n"
611 		"%c_vec4     = OpConstantComposite %vec4 %c_f_0 %c_f_0_5 %c_f_1_5 %c_f_2_5\n"
612 
613 		"%main       = OpFunction %void None %voidf\n"
614 		"%label      = OpLabel\n"
615 		"%v_vec4     = OpVariable %vec4ptr_f Function\n"
616 		"%idval      = OpLoad %uvec3 %id\n"
617 		"%x          = OpCompositeExtract %u32 %idval 0\n"
618 		"%inloc      = OpAccessChain %vec4ptr_u %indata %zero %x\n"
619 		"%outloc     = OpAccessChain %vec4ptr_u %outdata %zero %x\n"
620 		"              OpCopyMemory %v_vec4 %inloc\n"
621 		"%v_vec4_val = OpLoad %vec4 %v_vec4\n"
622 		"%add        = OpFAdd %vec4 %v_vec4_val %c_vec4\n"
623 		"              OpStore %outloc %add\n"
624 		"              OpReturn\n"
625 		"              OpFunctionEnd\n";
626 
627 	spec1.inputs.push_back(BufferSp(new Vec4Buffer(inputFloats1)));
628 	spec1.outputs.push_back(BufferSp(new Vec4Buffer(outputFloats1)));
629 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
630 
631 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector", "OpCopyMemory elements of vector type", spec1));
632 
633 	// The following case copies a float[100] variable from the input buffer to the output buffer.
634 	ComputeShaderSpec				spec2;
635 	vector<float>					inputFloats2	(numElements);
636 	vector<float>					outputFloats2	(numElements);
637 
638 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats2[0], numElements);
639 
640 	for (size_t ndx = 0; ndx < numElements; ++ndx)
641 		outputFloats2[ndx] = inputFloats2[ndx];
642 
643 	spec2.assembly =
644 		string(s_ShaderPreamble) +
645 
646 		"OpName %main           \"main\"\n"
647 		"OpName %id             \"gl_GlobalInvocationID\"\n"
648 
649 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
650 		"OpDecorate %f32arr100 ArrayStride 4\n"
651 
652 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
653 
654 		"%hundred        = OpConstant %u32 100\n"
655 		"%f32arr100      = OpTypeArray %f32 %hundred\n"
656 		"%f32arr100ptr_f = OpTypePointer Function %f32arr100\n"
657 		"%f32arr100ptr_u = OpTypePointer Uniform %f32arr100\n"
658 		"%buf            = OpTypeStruct %f32arr100\n"
659 		"%bufptr         = OpTypePointer Uniform %buf\n"
660 		"%indata         = OpVariable %bufptr Uniform\n"
661 		"%outdata        = OpVariable %bufptr Uniform\n"
662 
663 		"%id             = OpVariable %uvec3ptr Input\n"
664 		"%zero           = OpConstant %i32 0\n"
665 
666 		"%main           = OpFunction %void None %voidf\n"
667 		"%label          = OpLabel\n"
668 		"%var            = OpVariable %f32arr100ptr_f Function\n"
669 		"%inarr          = OpAccessChain %f32arr100ptr_u %indata %zero\n"
670 		"%outarr         = OpAccessChain %f32arr100ptr_u %outdata %zero\n"
671 		"                  OpCopyMemory %var %inarr\n"
672 		"                  OpCopyMemory %outarr %var\n"
673 		"                  OpReturn\n"
674 		"                  OpFunctionEnd\n";
675 
676 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
677 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
678 	spec2.numWorkGroups = IVec3(1, 1, 1);
679 
680 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "array", "OpCopyMemory elements of array type", spec2));
681 
682 	// The following case copies a struct{vec4, vec4, vec4, vec4} variable from the input buffer to the output buffer.
683 	ComputeShaderSpec				spec3;
684 	vector<float>					inputFloats3	(16);
685 	vector<float>					outputFloats3	(16);
686 
687 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats3[0], 16);
688 
689 	for (size_t ndx = 0; ndx < 16; ++ndx)
690 		outputFloats3[ndx] = inputFloats3[ndx];
691 
692 	spec3.assembly =
693 		string(s_ShaderPreamble) +
694 
695 		"OpName %main           \"main\"\n"
696 		"OpName %id             \"gl_GlobalInvocationID\"\n"
697 
698 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
699 		"OpMemberDecorate %buf 0 Offset 0\n"
700 		"OpMemberDecorate %buf 1 Offset 16\n"
701 		"OpMemberDecorate %buf 2 Offset 32\n"
702 		"OpMemberDecorate %buf 3 Offset 48\n"
703 
704 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
705 
706 		"%vec4      = OpTypeVector %f32 4\n"
707 		"%buf       = OpTypeStruct %vec4 %vec4 %vec4 %vec4\n"
708 		"%bufptr    = OpTypePointer Uniform %buf\n"
709 		"%indata    = OpVariable %bufptr Uniform\n"
710 		"%outdata   = OpVariable %bufptr Uniform\n"
711 		"%vec4stptr = OpTypePointer Function %buf\n"
712 
713 		"%id        = OpVariable %uvec3ptr Input\n"
714 		"%zero      = OpConstant %i32 0\n"
715 
716 		"%main      = OpFunction %void None %voidf\n"
717 		"%label     = OpLabel\n"
718 		"%var       = OpVariable %vec4stptr Function\n"
719 		"             OpCopyMemory %var %indata\n"
720 		"             OpCopyMemory %outdata %var\n"
721 		"             OpReturn\n"
722 		"             OpFunctionEnd\n";
723 
724 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
725 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
726 	spec3.numWorkGroups = IVec3(1, 1, 1);
727 
728 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "struct", "OpCopyMemory elements of struct type", spec3));
729 
730 	// The following case negates multiple float variables from the input buffer and stores the results to the output buffer.
731 	ComputeShaderSpec				spec4;
732 	vector<float>					inputFloats4	(numElements);
733 	vector<float>					outputFloats4	(numElements);
734 
735 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats4[0], numElements);
736 
737 	for (size_t ndx = 0; ndx < numElements; ++ndx)
738 		outputFloats4[ndx] = -inputFloats4[ndx];
739 
740 	spec4.assembly =
741 		string(s_ShaderPreamble) +
742 
743 		"OpName %main           \"main\"\n"
744 		"OpName %id             \"gl_GlobalInvocationID\"\n"
745 
746 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
747 
748 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
749 
750 		"%f32ptr_f  = OpTypePointer Function %f32\n"
751 		"%id        = OpVariable %uvec3ptr Input\n"
752 		"%zero      = OpConstant %i32 0\n"
753 
754 		"%main      = OpFunction %void None %voidf\n"
755 		"%label     = OpLabel\n"
756 		"%var       = OpVariable %f32ptr_f Function\n"
757 		"%idval     = OpLoad %uvec3 %id\n"
758 		"%x         = OpCompositeExtract %u32 %idval 0\n"
759 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
760 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
761 		"             OpCopyMemory %var %inloc\n"
762 		"%val       = OpLoad %f32 %var\n"
763 		"%neg       = OpFNegate %f32 %val\n"
764 		"             OpStore %outloc %neg\n"
765 		"             OpReturn\n"
766 		"             OpFunctionEnd\n";
767 
768 	spec4.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
769 	spec4.outputs.push_back(BufferSp(new Float32Buffer(outputFloats4)));
770 	spec4.numWorkGroups = IVec3(numElements, 1, 1);
771 
772 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "float", "OpCopyMemory elements of float type", spec4));
773 
774 	return group.release();
775 }
776 
createOpCopyObjectGroup(tcu::TestContext & testCtx)777 tcu::TestCaseGroup* createOpCopyObjectGroup (tcu::TestContext& testCtx)
778 {
779 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opcopyobject", "Test the OpCopyObject instruction"));
780 	ComputeShaderSpec				spec;
781 	de::Random						rnd				(deStringHash(group->getName()));
782 	const int						numElements		= 100;
783 	vector<float>					inputFloats		(numElements, 0);
784 	vector<float>					outputFloats	(numElements, 0);
785 
786 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
787 
788 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
789 	floorAll(inputFloats);
790 
791 	for (size_t ndx = 0; ndx < numElements; ++ndx)
792 		outputFloats[ndx] = inputFloats[ndx] + 7.5f;
793 
794 	spec.assembly =
795 		string(s_ShaderPreamble) +
796 
797 		"OpName %main           \"main\"\n"
798 		"OpName %id             \"gl_GlobalInvocationID\"\n"
799 
800 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
801 
802 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
803 
804 		"%fmat     = OpTypeMatrix %fvec3 3\n"
805 		"%three    = OpConstant %u32 3\n"
806 		"%farr     = OpTypeArray %f32 %three\n"
807 		"%fst      = OpTypeStruct %f32 %f32\n"
808 
809 		+ string(s_InputOutputBuffer) +
810 
811 		"%id            = OpVariable %uvec3ptr Input\n"
812 		"%zero          = OpConstant %i32 0\n"
813 		"%c_f           = OpConstant %f32 1.5\n"
814 		"%c_fvec3       = OpConstantComposite %fvec3 %c_f %c_f %c_f\n"
815 		"%c_fmat        = OpConstantComposite %fmat %c_fvec3 %c_fvec3 %c_fvec3\n"
816 		"%c_farr        = OpConstantComposite %farr %c_f %c_f %c_f\n"
817 		"%c_fst         = OpConstantComposite %fst %c_f %c_f\n"
818 
819 		"%main          = OpFunction %void None %voidf\n"
820 		"%label         = OpLabel\n"
821 		"%c_f_copy      = OpCopyObject %f32   %c_f\n"
822 		"%c_fvec3_copy  = OpCopyObject %fvec3 %c_fvec3\n"
823 		"%c_fmat_copy   = OpCopyObject %fmat  %c_fmat\n"
824 		"%c_farr_copy   = OpCopyObject %farr  %c_farr\n"
825 		"%c_fst_copy    = OpCopyObject %fst   %c_fst\n"
826 		"%fvec3_elem    = OpCompositeExtract %f32 %c_fvec3_copy 0\n"
827 		"%fmat_elem     = OpCompositeExtract %f32 %c_fmat_copy 1 2\n"
828 		"%farr_elem     = OpCompositeExtract %f32 %c_farr_copy 2\n"
829 		"%fst_elem      = OpCompositeExtract %f32 %c_fst_copy 1\n"
830 		// Add up. 1.5 * 5 = 7.5.
831 		"%add1          = OpFAdd %f32 %c_f_copy %fvec3_elem\n"
832 		"%add2          = OpFAdd %f32 %add1     %fmat_elem\n"
833 		"%add3          = OpFAdd %f32 %add2     %farr_elem\n"
834 		"%add4          = OpFAdd %f32 %add3     %fst_elem\n"
835 
836 		"%idval         = OpLoad %uvec3 %id\n"
837 		"%x             = OpCompositeExtract %u32 %idval 0\n"
838 		"%inloc         = OpAccessChain %f32ptr %indata %zero %x\n"
839 		"%outloc        = OpAccessChain %f32ptr %outdata %zero %x\n"
840 		"%inval         = OpLoad %f32 %inloc\n"
841 		"%add           = OpFAdd %f32 %add4 %inval\n"
842 		"                 OpStore %outloc %add\n"
843 		"                 OpReturn\n"
844 		"                 OpFunctionEnd\n";
845 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
846 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
847 	spec.numWorkGroups = IVec3(numElements, 1, 1);
848 
849 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "OpCopyObject on different types", spec));
850 
851 	return group.release();
852 }
853 // Assembly code used for testing OpUnreachable is based on GLSL source code:
854 //
855 // #version 430
856 //
857 // layout(std140, set = 0, binding = 0) readonly buffer Input {
858 //   float elements[];
859 // } input_data;
860 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
861 //   float elements[];
862 // } output_data;
863 //
864 // void not_called_func() {
865 //   // place OpUnreachable here
866 // }
867 //
868 // uint modulo4(uint val) {
869 //   switch (val % uint(4)) {
870 //     case 0:  return 3;
871 //     case 1:  return 2;
872 //     case 2:  return 1;
873 //     case 3:  return 0;
874 //     default: return 100; // place OpUnreachable here
875 //   }
876 // }
877 //
878 // uint const5() {
879 //   return 5;
880 //   // place OpUnreachable here
881 // }
882 //
883 // void main() {
884 //   uint x = gl_GlobalInvocationID.x;
885 //   if (const5() > modulo4(1000)) {
886 //     output_data.elements[x] = -input_data.elements[x];
887 //   } else {
888 //     // place OpUnreachable here
889 //     output_data.elements[x] = input_data.elements[x];
890 //   }
891 // }
892 
createOpUnreachableGroup(tcu::TestContext & testCtx)893 tcu::TestCaseGroup* createOpUnreachableGroup (tcu::TestContext& testCtx)
894 {
895 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opunreachable", "Test the OpUnreachable instruction"));
896 	ComputeShaderSpec				spec;
897 	de::Random						rnd				(deStringHash(group->getName()));
898 	const int						numElements		= 100;
899 	vector<float>					positiveFloats	(numElements, 0);
900 	vector<float>					negativeFloats	(numElements, 0);
901 
902 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
903 
904 	for (size_t ndx = 0; ndx < numElements; ++ndx)
905 		negativeFloats[ndx] = -positiveFloats[ndx];
906 
907 	spec.assembly =
908 		string(s_ShaderPreamble) +
909 
910 		"OpSource GLSL 430\n"
911 		"OpName %main            \"main\"\n"
912 		"OpName %func_not_called_func \"not_called_func(\"\n"
913 		"OpName %func_modulo4         \"modulo4(u1;\"\n"
914 		"OpName %func_const5          \"const5(\"\n"
915 		"OpName %id                   \"gl_GlobalInvocationID\"\n"
916 
917 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
918 
919 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
920 
921 		"%u32ptr    = OpTypePointer Function %u32\n"
922 		"%uintfuint = OpTypeFunction %u32 %u32ptr\n"
923 		"%unitf     = OpTypeFunction %u32\n"
924 
925 		"%id        = OpVariable %uvec3ptr Input\n"
926 		"%zero      = OpConstant %u32 0\n"
927 		"%one       = OpConstant %u32 1\n"
928 		"%two       = OpConstant %u32 2\n"
929 		"%three     = OpConstant %u32 3\n"
930 		"%four      = OpConstant %u32 4\n"
931 		"%five      = OpConstant %u32 5\n"
932 		"%hundred   = OpConstant %u32 100\n"
933 		"%thousand  = OpConstant %u32 1000\n"
934 
935 		+ string(s_InputOutputBuffer) +
936 
937 		// Main()
938 		"%main   = OpFunction %void None %voidf\n"
939 		"%main_entry  = OpLabel\n"
940 		"%v_thousand  = OpVariable %u32ptr Function %thousand\n"
941 		"%idval       = OpLoad %uvec3 %id\n"
942 		"%x           = OpCompositeExtract %u32 %idval 0\n"
943 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
944 		"%inval       = OpLoad %f32 %inloc\n"
945 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
946 		"%ret_const5  = OpFunctionCall %u32 %func_const5\n"
947 		"%ret_modulo4 = OpFunctionCall %u32 %func_modulo4 %v_thousand\n"
948 		"%cmp_gt      = OpUGreaterThan %bool %ret_const5 %ret_modulo4\n"
949 		"               OpSelectionMerge %if_end None\n"
950 		"               OpBranchConditional %cmp_gt %if_true %if_false\n"
951 		"%if_true     = OpLabel\n"
952 		"%negate      = OpFNegate %f32 %inval\n"
953 		"               OpStore %outloc %negate\n"
954 		"               OpBranch %if_end\n"
955 		"%if_false    = OpLabel\n"
956 		"               OpUnreachable\n" // Unreachable else branch for if statement
957 		"%if_end      = OpLabel\n"
958 		"               OpReturn\n"
959 		"               OpFunctionEnd\n"
960 
961 		// not_called_function()
962 		"%func_not_called_func  = OpFunction %void None %voidf\n"
963 		"%not_called_func_entry = OpLabel\n"
964 		"                         OpUnreachable\n" // Unreachable entry block in not called static function
965 		"                         OpFunctionEnd\n"
966 
967 		// modulo4()
968 		"%func_modulo4  = OpFunction %u32 None %uintfuint\n"
969 		"%valptr        = OpFunctionParameter %u32ptr\n"
970 		"%modulo4_entry = OpLabel\n"
971 		"%val           = OpLoad %u32 %valptr\n"
972 		"%modulo        = OpUMod %u32 %val %four\n"
973 		"                 OpSelectionMerge %switch_merge None\n"
974 		"                 OpSwitch %modulo %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
975 		"%case0         = OpLabel\n"
976 		"                 OpReturnValue %three\n"
977 		"%case1         = OpLabel\n"
978 		"                 OpReturnValue %two\n"
979 		"%case2         = OpLabel\n"
980 		"                 OpReturnValue %one\n"
981 		"%case3         = OpLabel\n"
982 		"                 OpReturnValue %zero\n"
983 		"%default       = OpLabel\n"
984 		"                 OpUnreachable\n" // Unreachable default case for switch statement
985 		"%switch_merge  = OpLabel\n"
986 		"                 OpUnreachable\n" // Unreachable merge block for switch statement
987 		"                 OpFunctionEnd\n"
988 
989 		// const5()
990 		"%func_const5  = OpFunction %u32 None %unitf\n"
991 		"%const5_entry = OpLabel\n"
992 		"                OpReturnValue %five\n"
993 		"%unreachable  = OpLabel\n"
994 		"                OpUnreachable\n" // Unreachable block in function
995 		"                OpFunctionEnd\n";
996 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
997 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
998 	spec.numWorkGroups = IVec3(numElements, 1, 1);
999 
1000 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "OpUnreachable appearing at different places", spec));
1001 
1002 	return group.release();
1003 }
1004 
1005 // Assembly code used for testing decoration group is based on GLSL source code:
1006 //
1007 // #version 430
1008 //
1009 // layout(std140, set = 0, binding = 0) readonly buffer Input0 {
1010 //   float elements[];
1011 // } input_data0;
1012 // layout(std140, set = 0, binding = 1) readonly buffer Input1 {
1013 //   float elements[];
1014 // } input_data1;
1015 // layout(std140, set = 0, binding = 2) readonly buffer Input2 {
1016 //   float elements[];
1017 // } input_data2;
1018 // layout(std140, set = 0, binding = 3) readonly buffer Input3 {
1019 //   float elements[];
1020 // } input_data3;
1021 // layout(std140, set = 0, binding = 4) readonly buffer Input4 {
1022 //   float elements[];
1023 // } input_data4;
1024 // layout(std140, set = 0, binding = 5) writeonly buffer Output {
1025 //   float elements[];
1026 // } output_data;
1027 //
1028 // void main() {
1029 //   uint x = gl_GlobalInvocationID.x;
1030 //   output_data.elements[x] = input_data0.elements[x] + input_data1.elements[x] + input_data2.elements[x] + input_data3.elements[x] + input_data4.elements[x];
1031 // }
createDecorationGroupGroup(tcu::TestContext & testCtx)1032 tcu::TestCaseGroup* createDecorationGroupGroup (tcu::TestContext& testCtx)
1033 {
1034 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "decoration_group", "Test the OpDecorationGroup & OpGroupDecorate instruction"));
1035 	ComputeShaderSpec				spec;
1036 	de::Random						rnd				(deStringHash(group->getName()));
1037 	const int						numElements		= 100;
1038 	vector<float>					inputFloats0	(numElements, 0);
1039 	vector<float>					inputFloats1	(numElements, 0);
1040 	vector<float>					inputFloats2	(numElements, 0);
1041 	vector<float>					inputFloats3	(numElements, 0);
1042 	vector<float>					inputFloats4	(numElements, 0);
1043 	vector<float>					outputFloats	(numElements, 0);
1044 
1045 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats0[0], numElements);
1046 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats1[0], numElements);
1047 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats2[0], numElements);
1048 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats3[0], numElements);
1049 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats4[0], numElements);
1050 
1051 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1052 	floorAll(inputFloats0);
1053 	floorAll(inputFloats1);
1054 	floorAll(inputFloats2);
1055 	floorAll(inputFloats3);
1056 	floorAll(inputFloats4);
1057 
1058 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1059 		outputFloats[ndx] = inputFloats0[ndx] + inputFloats1[ndx] + inputFloats2[ndx] + inputFloats3[ndx] + inputFloats4[ndx];
1060 
1061 	spec.assembly =
1062 		string(s_ShaderPreamble) +
1063 
1064 		"OpSource GLSL 430\n"
1065 		"OpName %main \"main\"\n"
1066 		"OpName %id \"gl_GlobalInvocationID\"\n"
1067 
1068 		// Not using group decoration on variable.
1069 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1070 		// Not using group decoration on type.
1071 		"OpDecorate %f32arr ArrayStride 4\n"
1072 
1073 		"OpDecorate %groups BufferBlock\n"
1074 		"OpDecorate %groupm Offset 0\n"
1075 		"%groups = OpDecorationGroup\n"
1076 		"%groupm = OpDecorationGroup\n"
1077 
1078 		// Group decoration on multiple structs.
1079 		"OpGroupDecorate %groups %outbuf %inbuf0 %inbuf1 %inbuf2 %inbuf3 %inbuf4\n"
1080 		// Group decoration on multiple struct members.
1081 		"OpGroupMemberDecorate %groupm %outbuf 0 %inbuf0 0 %inbuf1 0 %inbuf2 0 %inbuf3 0 %inbuf4 0\n"
1082 
1083 		"OpDecorate %group1 DescriptorSet 0\n"
1084 		"OpDecorate %group3 DescriptorSet 0\n"
1085 		"OpDecorate %group3 NonWritable\n"
1086 		"OpDecorate %group3 Restrict\n"
1087 		"%group0 = OpDecorationGroup\n"
1088 		"%group1 = OpDecorationGroup\n"
1089 		"%group3 = OpDecorationGroup\n"
1090 
1091 		// Applying the same decoration group multiple times.
1092 		"OpGroupDecorate %group1 %outdata\n"
1093 		"OpGroupDecorate %group1 %outdata\n"
1094 		"OpGroupDecorate %group1 %outdata\n"
1095 		"OpDecorate %outdata DescriptorSet 0\n"
1096 		"OpDecorate %outdata Binding 5\n"
1097 		// Applying decoration group containing nothing.
1098 		"OpGroupDecorate %group0 %indata0\n"
1099 		"OpDecorate %indata0 DescriptorSet 0\n"
1100 		"OpDecorate %indata0 Binding 0\n"
1101 		// Applying decoration group containing one decoration.
1102 		"OpGroupDecorate %group1 %indata1\n"
1103 		"OpDecorate %indata1 Binding 1\n"
1104 		// Applying decoration group containing multiple decorations.
1105 		"OpGroupDecorate %group3 %indata2 %indata3\n"
1106 		"OpDecorate %indata2 Binding 2\n"
1107 		"OpDecorate %indata3 Binding 3\n"
1108 		// Applying multiple decoration groups (with overlapping).
1109 		"OpGroupDecorate %group0 %indata4\n"
1110 		"OpGroupDecorate %group1 %indata4\n"
1111 		"OpGroupDecorate %group3 %indata4\n"
1112 		"OpDecorate %indata4 Binding 4\n"
1113 
1114 		+ string(s_CommonTypes) +
1115 
1116 		"%id   = OpVariable %uvec3ptr Input\n"
1117 		"%zero = OpConstant %i32 0\n"
1118 
1119 		"%outbuf    = OpTypeStruct %f32arr\n"
1120 		"%outbufptr = OpTypePointer Uniform %outbuf\n"
1121 		"%outdata   = OpVariable %outbufptr Uniform\n"
1122 		"%inbuf0    = OpTypeStruct %f32arr\n"
1123 		"%inbuf0ptr = OpTypePointer Uniform %inbuf0\n"
1124 		"%indata0   = OpVariable %inbuf0ptr Uniform\n"
1125 		"%inbuf1    = OpTypeStruct %f32arr\n"
1126 		"%inbuf1ptr = OpTypePointer Uniform %inbuf1\n"
1127 		"%indata1   = OpVariable %inbuf1ptr Uniform\n"
1128 		"%inbuf2    = OpTypeStruct %f32arr\n"
1129 		"%inbuf2ptr = OpTypePointer Uniform %inbuf2\n"
1130 		"%indata2   = OpVariable %inbuf2ptr Uniform\n"
1131 		"%inbuf3    = OpTypeStruct %f32arr\n"
1132 		"%inbuf3ptr = OpTypePointer Uniform %inbuf3\n"
1133 		"%indata3   = OpVariable %inbuf3ptr Uniform\n"
1134 		"%inbuf4    = OpTypeStruct %f32arr\n"
1135 		"%inbufptr  = OpTypePointer Uniform %inbuf4\n"
1136 		"%indata4   = OpVariable %inbufptr Uniform\n"
1137 
1138 		"%main   = OpFunction %void None %voidf\n"
1139 		"%label  = OpLabel\n"
1140 		"%idval  = OpLoad %uvec3 %id\n"
1141 		"%x      = OpCompositeExtract %u32 %idval 0\n"
1142 		"%inloc0 = OpAccessChain %f32ptr %indata0 %zero %x\n"
1143 		"%inloc1 = OpAccessChain %f32ptr %indata1 %zero %x\n"
1144 		"%inloc2 = OpAccessChain %f32ptr %indata2 %zero %x\n"
1145 		"%inloc3 = OpAccessChain %f32ptr %indata3 %zero %x\n"
1146 		"%inloc4 = OpAccessChain %f32ptr %indata4 %zero %x\n"
1147 		"%outloc = OpAccessChain %f32ptr %outdata %zero %x\n"
1148 		"%inval0 = OpLoad %f32 %inloc0\n"
1149 		"%inval1 = OpLoad %f32 %inloc1\n"
1150 		"%inval2 = OpLoad %f32 %inloc2\n"
1151 		"%inval3 = OpLoad %f32 %inloc3\n"
1152 		"%inval4 = OpLoad %f32 %inloc4\n"
1153 		"%add0   = OpFAdd %f32 %inval0 %inval1\n"
1154 		"%add1   = OpFAdd %f32 %add0 %inval2\n"
1155 		"%add2   = OpFAdd %f32 %add1 %inval3\n"
1156 		"%add    = OpFAdd %f32 %add2 %inval4\n"
1157 		"          OpStore %outloc %add\n"
1158 		"          OpReturn\n"
1159 		"          OpFunctionEnd\n";
1160 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats0)));
1161 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats1)));
1162 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats2)));
1163 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats3)));
1164 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats4)));
1165 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1166 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1167 
1168 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "decoration group cases", spec));
1169 
1170 	return group.release();
1171 }
1172 
1173 struct SpecConstantTwoIntCase
1174 {
1175 	const char*		caseName;
1176 	const char*		scDefinition0;
1177 	const char*		scDefinition1;
1178 	const char*		scResultType;
1179 	const char*		scOperation;
1180 	deInt32			scActualValue0;
1181 	deInt32			scActualValue1;
1182 	const char*		resultOperation;
1183 	vector<deInt32>	expectedOutput;
1184 
SpecConstantTwoIntCasevkt::SpirVAssembly::__anon889ef7250111::SpecConstantTwoIntCase1185 					SpecConstantTwoIntCase (const char* name,
1186 											const char* definition0,
1187 											const char* definition1,
1188 											const char* resultType,
1189 											const char* operation,
1190 											deInt32 value0,
1191 											deInt32 value1,
1192 											const char* resultOp,
1193 											const vector<deInt32>& output)
1194 						: caseName			(name)
1195 						, scDefinition0		(definition0)
1196 						, scDefinition1		(definition1)
1197 						, scResultType		(resultType)
1198 						, scOperation		(operation)
1199 						, scActualValue0	(value0)
1200 						, scActualValue1	(value1)
1201 						, resultOperation	(resultOp)
1202 						, expectedOutput	(output) {}
1203 };
1204 
createSpecConstantGroup(tcu::TestContext & testCtx)1205 tcu::TestCaseGroup* createSpecConstantGroup (tcu::TestContext& testCtx)
1206 {
1207 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
1208 	vector<SpecConstantTwoIntCase>	cases;
1209 	de::Random						rnd				(deStringHash(group->getName()));
1210 	const int						numElements		= 100;
1211 	vector<deInt32>					inputInts		(numElements, 0);
1212 	vector<deInt32>					outputInts1		(numElements, 0);
1213 	vector<deInt32>					outputInts2		(numElements, 0);
1214 	vector<deInt32>					outputInts3		(numElements, 0);
1215 	vector<deInt32>					outputInts4		(numElements, 0);
1216 	const StringTemplate			shaderTemplate	(
1217 		string(s_ShaderPreamble) +
1218 
1219 		"OpName %main           \"main\"\n"
1220 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1221 
1222 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1223 		"OpDecorate %sc_0  SpecId 0\n"
1224 		"OpDecorate %sc_1  SpecId 1\n"
1225 		"OpDecorate %i32arr ArrayStride 4\n"
1226 
1227 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1228 
1229 		"%i32ptr    = OpTypePointer Uniform %i32\n"
1230 		"%i32arr    = OpTypeRuntimeArray %i32\n"
1231 		"%boolptr   = OpTypePointer Uniform %bool\n"
1232 		"%boolarr   = OpTypeRuntimeArray %bool\n"
1233 		"%buf     = OpTypeStruct %i32arr\n"
1234 		"%bufptr  = OpTypePointer Uniform %buf\n"
1235 		"%indata    = OpVariable %bufptr Uniform\n"
1236 		"%outdata   = OpVariable %bufptr Uniform\n"
1237 
1238 		"%id        = OpVariable %uvec3ptr Input\n"
1239 		"%zero      = OpConstant %i32 0\n"
1240 
1241 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
1242 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
1243 		"%sc_final  = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n"
1244 
1245 		"%main      = OpFunction %void None %voidf\n"
1246 		"%label     = OpLabel\n"
1247 		"%idval     = OpLoad %uvec3 %id\n"
1248 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1249 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1250 		"%inval     = OpLoad %i32 %inloc\n"
1251 		"%final     = ${GEN_RESULT}\n"
1252 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1253 		"             OpStore %outloc %final\n"
1254 		"             OpReturn\n"
1255 		"             OpFunctionEnd\n");
1256 
1257 	fillRandomScalars(rnd, -65536, 65536, &inputInts[0], numElements);
1258 
1259 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1260 	{
1261 		outputInts1[ndx] = inputInts[ndx] + 42;
1262 		outputInts2[ndx] = inputInts[ndx];
1263 		outputInts3[ndx] = inputInts[ndx] - 11200;
1264 		outputInts4[ndx] = inputInts[ndx] + 1;
1265 	}
1266 
1267 	const char addScToInput[]		= "OpIAdd %i32 %inval %sc_final";
1268 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_final %inval %zero";
1269 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_final %zero %inval";
1270 
1271 	cases.push_back(SpecConstantTwoIntCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",			62,		-20,	addScToInput,		outputInts1));
1272 	cases.push_back(SpecConstantTwoIntCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",			100,	58,		addScToInput,		outputInts1));
1273 	cases.push_back(SpecConstantTwoIntCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",			-2,		-21,	addScToInput,		outputInts1));
1274 	cases.push_back(SpecConstantTwoIntCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",			-126,	-3,		addScToInput,		outputInts1));
1275 	cases.push_back(SpecConstantTwoIntCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",			126,	3,		addScToInput,		outputInts1));
1276 	cases.push_back(SpecConstantTwoIntCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
1277 	cases.push_back(SpecConstantTwoIntCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",			7,		3,		addScToInput,		outputInts4));
1278 	cases.push_back(SpecConstantTwoIntCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",			342,	50,		addScToInput,		outputInts1));
1279 	cases.push_back(SpecConstantTwoIntCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",			42,		63,		addScToInput,		outputInts1));
1280 	cases.push_back(SpecConstantTwoIntCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",			34,		8,		addScToInput,		outputInts1));
1281 	cases.push_back(SpecConstantTwoIntCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",			18,		56,		addScToInput,		outputInts1));
1282 	cases.push_back(SpecConstantTwoIntCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
1283 	cases.push_back(SpecConstantTwoIntCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",			168,	2,		addScToInput,		outputInts1));
1284 	cases.push_back(SpecConstantTwoIntCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",			21,		1,		addScToInput,		outputInts1));
1285 	cases.push_back(SpecConstantTwoIntCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",			-20,	-10,	selectTrueUsingSc,	outputInts2));
1286 	cases.push_back(SpecConstantTwoIntCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",			10,		20,		selectTrueUsingSc,	outputInts2));
1287 	cases.push_back(SpecConstantTwoIntCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
1288 	cases.push_back(SpecConstantTwoIntCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",			10,		5,		selectTrueUsingSc,	outputInts2));
1289 	cases.push_back(SpecConstantTwoIntCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",			-10,	-10,	selectTrueUsingSc,	outputInts2));
1290 	cases.push_back(SpecConstantTwoIntCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",			50,		100,	selectTrueUsingSc,	outputInts2));
1291 	cases.push_back(SpecConstantTwoIntCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",			-1000,	50,		selectFalseUsingSc,	outputInts2));
1292 	cases.push_back(SpecConstantTwoIntCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",			10,		10,		selectTrueUsingSc,	outputInts2));
1293 	cases.push_back(SpecConstantTwoIntCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",			42,		24,		selectFalseUsingSc,	outputInts2));
1294 	cases.push_back(SpecConstantTwoIntCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
1295 	cases.push_back(SpecConstantTwoIntCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
1296 	cases.push_back(SpecConstantTwoIntCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",			0,		1,		selectFalseUsingSc,	outputInts2));
1297 	cases.push_back(SpecConstantTwoIntCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",			1,		0,		selectTrueUsingSc,	outputInts2));
1298 	cases.push_back(SpecConstantTwoIntCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",				-42,	0,		addScToInput,		outputInts1));
1299 	cases.push_back(SpecConstantTwoIntCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",				-43,	0,		addScToInput,		outputInts1));
1300 	cases.push_back(SpecConstantTwoIntCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",				1,		0,		selectFalseUsingSc,	outputInts2));
1301 	cases.push_back(SpecConstantTwoIntCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %zero",	1,		42,		addScToInput,		outputInts1));
1302 	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
1303 
1304 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1305 	{
1306 		map<string, string>		specializations;
1307 		ComputeShaderSpec		spec;
1308 
1309 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
1310 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
1311 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
1312 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
1313 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
1314 
1315 		spec.assembly = shaderTemplate.specialize(specializations);
1316 		spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
1317 		spec.outputs.push_back(BufferSp(new Int32Buffer(cases[caseNdx].expectedOutput)));
1318 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1319 		spec.specConstants.push_back(cases[caseNdx].scActualValue0);
1320 		spec.specConstants.push_back(cases[caseNdx].scActualValue1);
1321 
1322 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].caseName, cases[caseNdx].caseName, spec));
1323 	}
1324 
1325 	ComputeShaderSpec				spec;
1326 
1327 	spec.assembly =
1328 		string(s_ShaderPreamble) +
1329 
1330 		"OpName %main           \"main\"\n"
1331 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1332 
1333 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1334 		"OpDecorate %sc_0  SpecId 0\n"
1335 		"OpDecorate %sc_1  SpecId 1\n"
1336 		"OpDecorate %sc_2  SpecId 2\n"
1337 		"OpDecorate %i32arr ArrayStride 4\n"
1338 
1339 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1340 
1341 		"%ivec3     = OpTypeVector %i32 3\n"
1342 		"%i32ptr    = OpTypePointer Uniform %i32\n"
1343 		"%i32arr    = OpTypeRuntimeArray %i32\n"
1344 		"%boolptr   = OpTypePointer Uniform %bool\n"
1345 		"%boolarr   = OpTypeRuntimeArray %bool\n"
1346 		"%buf     = OpTypeStruct %i32arr\n"
1347 		"%bufptr  = OpTypePointer Uniform %buf\n"
1348 		"%indata    = OpVariable %bufptr Uniform\n"
1349 		"%outdata   = OpVariable %bufptr Uniform\n"
1350 
1351 		"%id        = OpVariable %uvec3ptr Input\n"
1352 		"%zero      = OpConstant %i32 0\n"
1353 		"%ivec3_0   = OpConstantComposite %ivec3 %zero %zero %zero\n"
1354 
1355 		"%sc_0        = OpSpecConstant %i32 0\n"
1356 		"%sc_1        = OpSpecConstant %i32 0\n"
1357 		"%sc_2        = OpSpecConstant %i32 0\n"
1358 		"%sc_vec3_0   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_0        %ivec3_0   0\n"     // (sc_0, 0, 0)
1359 		"%sc_vec3_1   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_1        %ivec3_0   1\n"     // (0, sc_1, 0)
1360 		"%sc_vec3_2   = OpSpecConstantOp %ivec3 CompositeInsert  %sc_2        %ivec3_0   2\n"     // (0, 0, sc_2)
1361 		"%sc_vec3_01  = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
1362 		"%sc_vec3_012 = OpSpecConstantOp %ivec3 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
1363 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
1364 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
1365 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
1366 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
1367 		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n"        // (sc_2 - sc_0) * sc_1
1368 
1369 		"%main      = OpFunction %void None %voidf\n"
1370 		"%label     = OpLabel\n"
1371 		"%idval     = OpLoad %uvec3 %id\n"
1372 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1373 		"%inloc     = OpAccessChain %i32ptr %indata %zero %x\n"
1374 		"%inval     = OpLoad %i32 %inloc\n"
1375 		"%final     = OpIAdd %i32 %inval %sc_final\n"
1376 		"%outloc    = OpAccessChain %i32ptr %outdata %zero %x\n"
1377 		"             OpStore %outloc %final\n"
1378 		"             OpReturn\n"
1379 		"             OpFunctionEnd\n";
1380 	spec.inputs.push_back(BufferSp(new Int32Buffer(inputInts)));
1381 	spec.outputs.push_back(BufferSp(new Int32Buffer(outputInts3)));
1382 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1383 	spec.specConstants.push_back(123);
1384 	spec.specConstants.push_back(56);
1385 	spec.specConstants.push_back(-77);
1386 
1387 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "vector_related", "VectorShuffle, CompositeExtract, & CompositeInsert", spec));
1388 
1389 	return group.release();
1390 }
1391 
createOpPhiGroup(tcu::TestContext & testCtx)1392 tcu::TestCaseGroup* createOpPhiGroup (tcu::TestContext& testCtx)
1393 {
1394 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
1395 	ComputeShaderSpec				spec1;
1396 	ComputeShaderSpec				spec2;
1397 	ComputeShaderSpec				spec3;
1398 	de::Random						rnd				(deStringHash(group->getName()));
1399 	const int						numElements		= 100;
1400 	vector<float>					inputFloats		(numElements, 0);
1401 	vector<float>					outputFloats1	(numElements, 0);
1402 	vector<float>					outputFloats2	(numElements, 0);
1403 	vector<float>					outputFloats3	(numElements, 0);
1404 
1405 	fillRandomScalars(rnd, -300.f, 300.f, &inputFloats[0], numElements);
1406 
1407 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1408 	floorAll(inputFloats);
1409 
1410 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1411 	{
1412 		switch (ndx % 3)
1413 		{
1414 			case 0:		outputFloats1[ndx] = inputFloats[ndx] + 5.5f;	break;
1415 			case 1:		outputFloats1[ndx] = inputFloats[ndx] + 20.5f;	break;
1416 			case 2:		outputFloats1[ndx] = inputFloats[ndx] + 1.75f;	break;
1417 			default:	break;
1418 		}
1419 		outputFloats2[ndx] = inputFloats[ndx] + 6.5f * 3;
1420 		outputFloats3[ndx] = 8.5f - inputFloats[ndx];
1421 	}
1422 
1423 	spec1.assembly =
1424 		string(s_ShaderPreamble) +
1425 
1426 		"OpSource GLSL 430\n"
1427 		"OpName %main \"main\"\n"
1428 		"OpName %id \"gl_GlobalInvocationID\"\n"
1429 
1430 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1431 
1432 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1433 
1434 		"%id = OpVariable %uvec3ptr Input\n"
1435 		"%zero       = OpConstant %i32 0\n"
1436 		"%three      = OpConstant %u32 3\n"
1437 		"%constf5p5  = OpConstant %f32 5.5\n"
1438 		"%constf20p5 = OpConstant %f32 20.5\n"
1439 		"%constf1p75 = OpConstant %f32 1.75\n"
1440 		"%constf8p5  = OpConstant %f32 8.5\n"
1441 		"%constf6p5  = OpConstant %f32 6.5\n"
1442 
1443 		"%main     = OpFunction %void None %voidf\n"
1444 		"%entry    = OpLabel\n"
1445 		"%idval    = OpLoad %uvec3 %id\n"
1446 		"%x        = OpCompositeExtract %u32 %idval 0\n"
1447 		"%selector = OpUMod %u32 %x %three\n"
1448 		"            OpSelectionMerge %phi None\n"
1449 		"            OpSwitch %selector %default 0 %case0 1 %case1 2 %case2\n"
1450 
1451 		// Case 1 before OpPhi.
1452 		"%case1    = OpLabel\n"
1453 		"            OpBranch %phi\n"
1454 
1455 		"%default  = OpLabel\n"
1456 		"            OpUnreachable\n"
1457 
1458 		"%phi      = OpLabel\n"
1459 		"%operand  = OpPhi %f32   %constf1p75 %case2   %constf20p5 %case1   %constf5p5 %case0\n" // not in the order of blocks
1460 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
1461 		"%inval    = OpLoad %f32 %inloc\n"
1462 		"%add      = OpFAdd %f32 %inval %operand\n"
1463 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
1464 		"            OpStore %outloc %add\n"
1465 		"            OpReturn\n"
1466 
1467 		// Case 0 after OpPhi.
1468 		"%case0    = OpLabel\n"
1469 		"            OpBranch %phi\n"
1470 
1471 
1472 		// Case 2 after OpPhi.
1473 		"%case2    = OpLabel\n"
1474 		"            OpBranch %phi\n"
1475 
1476 		"            OpFunctionEnd\n";
1477 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1478 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
1479 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
1480 
1481 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "block", "out-of-order and unreachable blocks for OpPhi", spec1));
1482 
1483 	spec2.assembly =
1484 		string(s_ShaderPreamble) +
1485 
1486 		"OpName %main \"main\"\n"
1487 		"OpName %id \"gl_GlobalInvocationID\"\n"
1488 
1489 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1490 
1491 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1492 
1493 		"%id         = OpVariable %uvec3ptr Input\n"
1494 		"%zero       = OpConstant %i32 0\n"
1495 		"%one        = OpConstant %i32 1\n"
1496 		"%three      = OpConstant %i32 3\n"
1497 		"%constf6p5  = OpConstant %f32 6.5\n"
1498 
1499 		"%main       = OpFunction %void None %voidf\n"
1500 		"%entry      = OpLabel\n"
1501 		"%idval      = OpLoad %uvec3 %id\n"
1502 		"%x          = OpCompositeExtract %u32 %idval 0\n"
1503 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
1504 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1505 		"%inval      = OpLoad %f32 %inloc\n"
1506 		"              OpBranch %phi\n"
1507 
1508 		"%phi        = OpLabel\n"
1509 		"%step       = OpPhi %i32 %zero  %entry %step_next  %phi\n"
1510 		"%accum      = OpPhi %f32 %inval %entry %accum_next %phi\n"
1511 		"%step_next  = OpIAdd %i32 %step %one\n"
1512 		"%accum_next = OpFAdd %f32 %accum %constf6p5\n"
1513 		"%still_loop = OpSLessThan %bool %step %three\n"
1514 		"              OpLoopMerge %exit %phi None\n"
1515 		"              OpBranchConditional %still_loop %phi %exit\n"
1516 
1517 		"%exit       = OpLabel\n"
1518 		"              OpStore %outloc %accum\n"
1519 		"              OpReturn\n"
1520 		"              OpFunctionEnd\n";
1521 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1522 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
1523 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
1524 
1525 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "induction", "The usual way induction variables are handled in LLVM IR", spec2));
1526 
1527 	spec3.assembly =
1528 		string(s_ShaderPreamble) +
1529 
1530 		"OpName %main \"main\"\n"
1531 		"OpName %id \"gl_GlobalInvocationID\"\n"
1532 
1533 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1534 
1535 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1536 
1537 		"%f32ptr_f   = OpTypePointer Function %f32\n"
1538 		"%id         = OpVariable %uvec3ptr Input\n"
1539 		"%true       = OpConstantTrue %bool\n"
1540 		"%false      = OpConstantFalse %bool\n"
1541 		"%zero       = OpConstant %i32 0\n"
1542 		"%constf8p5  = OpConstant %f32 8.5\n"
1543 
1544 		"%main       = OpFunction %void None %voidf\n"
1545 		"%entry      = OpLabel\n"
1546 		"%b          = OpVariable %f32ptr_f Function %constf8p5\n"
1547 		"%idval      = OpLoad %uvec3 %id\n"
1548 		"%x          = OpCompositeExtract %u32 %idval 0\n"
1549 		"%inloc      = OpAccessChain %f32ptr %indata %zero %x\n"
1550 		"%outloc     = OpAccessChain %f32ptr %outdata %zero %x\n"
1551 		"%a_init     = OpLoad %f32 %inloc\n"
1552 		"%b_init     = OpLoad %f32 %b\n"
1553 		"              OpBranch %phi\n"
1554 
1555 		"%phi        = OpLabel\n"
1556 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
1557 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
1558 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
1559 		"              OpLoopMerge %exit %phi None\n"
1560 		"              OpBranchConditional %still_loop %phi %exit\n"
1561 
1562 		"%exit       = OpLabel\n"
1563 		"%sub        = OpFSub %f32 %a_next %b_next\n"
1564 		"              OpStore %outloc %sub\n"
1565 		"              OpReturn\n"
1566 		"              OpFunctionEnd\n";
1567 	spec3.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1568 	spec3.outputs.push_back(BufferSp(new Float32Buffer(outputFloats3)));
1569 	spec3.numWorkGroups = IVec3(numElements, 1, 1);
1570 
1571 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "swap", "Swap the values of two variables using OpPhi", spec3));
1572 
1573 	return group.release();
1574 }
1575 
1576 // Assembly code used for testing block order is based on GLSL source code:
1577 //
1578 // #version 430
1579 //
1580 // layout(std140, set = 0, binding = 0) readonly buffer Input {
1581 //   float elements[];
1582 // } input_data;
1583 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
1584 //   float elements[];
1585 // } output_data;
1586 //
1587 // void main() {
1588 //   uint x = gl_GlobalInvocationID.x;
1589 //   output_data.elements[x] = input_data.elements[x];
1590 //   if (x > uint(50)) {
1591 //     switch (x % uint(3)) {
1592 //       case 0: output_data.elements[x] += 1.5f; break;
1593 //       case 1: output_data.elements[x] += 42.f; break;
1594 //       case 2: output_data.elements[x] -= 27.f; break;
1595 //       default: break;
1596 //     }
1597 //   } else {
1598 //     output_data.elements[x] = -input_data.elements[x];
1599 //   }
1600 // }
createBlockOrderGroup(tcu::TestContext & testCtx)1601 tcu::TestCaseGroup* createBlockOrderGroup (tcu::TestContext& testCtx)
1602 {
1603 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "block_order", "Test block orders"));
1604 	ComputeShaderSpec				spec;
1605 	de::Random						rnd				(deStringHash(group->getName()));
1606 	const int						numElements		= 100;
1607 	vector<float>					inputFloats		(numElements, 0);
1608 	vector<float>					outputFloats	(numElements, 0);
1609 
1610 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
1611 
1612 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
1613 	floorAll(inputFloats);
1614 
1615 	for (size_t ndx = 0; ndx <= 50; ++ndx)
1616 		outputFloats[ndx] = -inputFloats[ndx];
1617 
1618 	for (size_t ndx = 51; ndx < numElements; ++ndx)
1619 	{
1620 		switch (ndx % 3)
1621 		{
1622 			case 0:		outputFloats[ndx] = inputFloats[ndx] + 1.5f; break;
1623 			case 1:		outputFloats[ndx] = inputFloats[ndx] + 42.f; break;
1624 			case 2:		outputFloats[ndx] = inputFloats[ndx] - 27.f; break;
1625 			default:	break;
1626 		}
1627 	}
1628 
1629 	spec.assembly =
1630 		string(s_ShaderPreamble) +
1631 
1632 		"OpSource GLSL 430\n"
1633 		"OpName %main \"main\"\n"
1634 		"OpName %id \"gl_GlobalInvocationID\"\n"
1635 
1636 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1637 
1638 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
1639 
1640 		"%u32ptr       = OpTypePointer Function %u32\n"
1641 		"%u32ptr_input = OpTypePointer Input %u32\n"
1642 
1643 		+ string(s_InputOutputBuffer) +
1644 
1645 		"%id        = OpVariable %uvec3ptr Input\n"
1646 		"%zero      = OpConstant %i32 0\n"
1647 		"%const3    = OpConstant %u32 3\n"
1648 		"%const50   = OpConstant %u32 50\n"
1649 		"%constf1p5 = OpConstant %f32 1.5\n"
1650 		"%constf27  = OpConstant %f32 27.0\n"
1651 		"%constf42  = OpConstant %f32 42.0\n"
1652 
1653 		"%main = OpFunction %void None %voidf\n"
1654 
1655 		// entry block.
1656 		"%entry    = OpLabel\n"
1657 
1658 		// Create a temporary variable to hold the value of gl_GlobalInvocationID.x.
1659 		"%xvar     = OpVariable %u32ptr Function\n"
1660 		"%xptr     = OpAccessChain %u32ptr_input %id %zero\n"
1661 		"%x        = OpLoad %u32 %xptr\n"
1662 		"            OpStore %xvar %x\n"
1663 
1664 		"%cmp      = OpUGreaterThan %bool %x %const50\n"
1665 		"            OpSelectionMerge %if_merge None\n"
1666 		"            OpBranchConditional %cmp %if_true %if_false\n"
1667 
1668 		// Merge block for switch-statement: placed at the beginning.
1669 		"%switch_merge = OpLabel\n"
1670 		"                OpBranch %if_merge\n"
1671 
1672 		// Case 1 for switch-statement.
1673 		"%case1    = OpLabel\n"
1674 		"%x_1      = OpLoad %u32 %xvar\n"
1675 		"%inloc_1  = OpAccessChain %f32ptr %indata %zero %x_1\n"
1676 		"%inval_1  = OpLoad %f32 %inloc_1\n"
1677 		"%addf42   = OpFAdd %f32 %inval_1 %constf42\n"
1678 		"%outloc_1 = OpAccessChain %f32ptr %outdata %zero %x_1\n"
1679 		"            OpStore %outloc_1 %addf42\n"
1680 		"            OpBranch %switch_merge\n"
1681 
1682 		// False branch for if-statement: placed in the middle of switch cases and before true branch.
1683 		"%if_false = OpLabel\n"
1684 		"%x_f      = OpLoad %u32 %xvar\n"
1685 		"%inloc_f  = OpAccessChain %f32ptr %indata %zero %x_f\n"
1686 		"%inval_f  = OpLoad %f32 %inloc_f\n"
1687 		"%negate   = OpFNegate %f32 %inval_f\n"
1688 		"%outloc_f = OpAccessChain %f32ptr %outdata %zero %x_f\n"
1689 		"            OpStore %outloc_f %negate\n"
1690 		"            OpBranch %if_merge\n"
1691 
1692 		// Merge block for if-statement: placed in the middle of true and false branch.
1693 		"%if_merge = OpLabel\n"
1694 		"            OpReturn\n"
1695 
1696 		// True branch for if-statement: placed in the middle of swtich cases and after the false branch.
1697 		"%if_true  = OpLabel\n"
1698 		"%xval_t   = OpLoad %u32 %xvar\n"
1699 		"%mod      = OpUMod %u32 %xval_t %const3\n"
1700 		"            OpSelectionMerge %switch_merge None\n"
1701 		"            OpSwitch %mod %default 0 %case0 1 %case1 2 %case2\n"
1702 
1703 		// Case 2 for switch-statement.
1704 		"%case2    = OpLabel\n"
1705 		"%x_2      = OpLoad %u32 %xvar\n"
1706 		"%inloc_2  = OpAccessChain %f32ptr %indata %zero %x_2\n"
1707 		"%inval_2  = OpLoad %f32 %inloc_2\n"
1708 		"%subf27   = OpFSub %f32 %inval_2 %constf27\n"
1709 		"%outloc_2 = OpAccessChain %f32ptr %outdata %zero %x_2\n"
1710 		"            OpStore %outloc_2 %subf27\n"
1711 		"            OpBranch %switch_merge\n"
1712 
1713 		// Default case for switch-statement: placed in the middle of normal cases.
1714 		"%default = OpLabel\n"
1715 		"           OpBranch %switch_merge\n"
1716 
1717 		// Case 0 for switch-statement: out of order.
1718 		"%case0    = OpLabel\n"
1719 		"%x_0      = OpLoad %u32 %xvar\n"
1720 		"%inloc_0  = OpAccessChain %f32ptr %indata %zero %x_0\n"
1721 		"%inval_0  = OpLoad %f32 %inloc_0\n"
1722 		"%addf1p5  = OpFAdd %f32 %inval_0 %constf1p5\n"
1723 		"%outloc_0 = OpAccessChain %f32ptr %outdata %zero %x_0\n"
1724 		"            OpStore %outloc_0 %addf1p5\n"
1725 		"            OpBranch %switch_merge\n"
1726 
1727 		"            OpFunctionEnd\n";
1728 	spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1729 	spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
1730 	spec.numWorkGroups = IVec3(numElements, 1, 1);
1731 
1732 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "all", "various out-of-order blocks", spec));
1733 
1734 	return group.release();
1735 }
1736 
createMultipleShaderGroup(tcu::TestContext & testCtx)1737 tcu::TestCaseGroup* createMultipleShaderGroup (tcu::TestContext& testCtx)
1738 {
1739 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "multiple_shaders", "Test multiple shaders in the same module"));
1740 	ComputeShaderSpec				spec1;
1741 	ComputeShaderSpec				spec2;
1742 	de::Random						rnd				(deStringHash(group->getName()));
1743 	const int						numElements		= 100;
1744 	vector<float>					inputFloats		(numElements, 0);
1745 	vector<float>					outputFloats1	(numElements, 0);
1746 	vector<float>					outputFloats2	(numElements, 0);
1747 	fillRandomScalars(rnd, -500.f, 500.f, &inputFloats[0], numElements);
1748 
1749 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1750 	{
1751 		outputFloats1[ndx] = inputFloats[ndx] + inputFloats[ndx];
1752 		outputFloats2[ndx] = -inputFloats[ndx];
1753 	}
1754 
1755 	const string assembly(
1756 		"OpCapability Shader\n"
1757 		"OpCapability ClipDistance\n"
1758 		"OpMemoryModel Logical GLSL450\n"
1759 		"OpEntryPoint GLCompute %comp_main1 \"entrypoint1\" %id\n"
1760 		"OpEntryPoint GLCompute %comp_main2 \"entrypoint2\" %id\n"
1761 		// A module cannot have two OpEntryPoint instructions with the same Execution Model and the same Name string.
1762 		"OpEntryPoint Vertex    %vert_main  \"entrypoint2\" %vert_builtins %vertexIndex %instanceIndex\n"
1763 		"OpExecutionMode %comp_main1 LocalSize 1 1 1\n"
1764 		"OpExecutionMode %comp_main2 LocalSize 1 1 1\n"
1765 
1766 		"OpName %comp_main1              \"entrypoint1\"\n"
1767 		"OpName %comp_main2              \"entrypoint2\"\n"
1768 		"OpName %vert_main               \"entrypoint2\"\n"
1769 		"OpName %id                      \"gl_GlobalInvocationID\"\n"
1770 		"OpName %vert_builtin_st         \"gl_PerVertex\"\n"
1771 		"OpName %vertexIndex             \"gl_VertexIndex\"\n"
1772 		"OpName %instanceIndex           \"gl_InstanceIndex\"\n"
1773 		"OpMemberName %vert_builtin_st 0 \"gl_Position\"\n"
1774 		"OpMemberName %vert_builtin_st 1 \"gl_PointSize\"\n"
1775 		"OpMemberName %vert_builtin_st 2 \"gl_ClipDistance\"\n"
1776 
1777 		"OpDecorate %id                      BuiltIn GlobalInvocationId\n"
1778 		"OpDecorate %vertexIndex             BuiltIn VertexIndex\n"
1779 		"OpDecorate %instanceIndex           BuiltIn InstanceIndex\n"
1780 		"OpDecorate %vert_builtin_st         Block\n"
1781 		"OpMemberDecorate %vert_builtin_st 0 BuiltIn Position\n"
1782 		"OpMemberDecorate %vert_builtin_st 1 BuiltIn PointSize\n"
1783 		"OpMemberDecorate %vert_builtin_st 2 BuiltIn ClipDistance\n"
1784 
1785 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1786 
1787 		"%zero       = OpConstant %i32 0\n"
1788 		"%one        = OpConstant %u32 1\n"
1789 		"%c_f32_1    = OpConstant %f32 1\n"
1790 
1791 		"%i32ptr              = OpTypePointer Input %i32\n"
1792 		"%vec4                = OpTypeVector %f32 4\n"
1793 		"%vec4ptr             = OpTypePointer Output %vec4\n"
1794 		"%f32arr1             = OpTypeArray %f32 %one\n"
1795 		"%vert_builtin_st     = OpTypeStruct %vec4 %f32 %f32arr1\n"
1796 		"%vert_builtin_st_ptr = OpTypePointer Output %vert_builtin_st\n"
1797 		"%vert_builtins       = OpVariable %vert_builtin_st_ptr Output\n"
1798 
1799 		"%id         = OpVariable %uvec3ptr Input\n"
1800 		"%vertexIndex = OpVariable %i32ptr Input\n"
1801 		"%instanceIndex = OpVariable %i32ptr Input\n"
1802 		"%c_vec4_1   = OpConstantComposite %vec4 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
1803 
1804 		// gl_Position = vec4(1.);
1805 		"%vert_main  = OpFunction %void None %voidf\n"
1806 		"%vert_entry = OpLabel\n"
1807 		"%position   = OpAccessChain %vec4ptr %vert_builtins %zero\n"
1808 		"              OpStore %position %c_vec4_1\n"
1809 		"              OpReturn\n"
1810 		"              OpFunctionEnd\n"
1811 
1812 		// Double inputs.
1813 		"%comp_main1  = OpFunction %void None %voidf\n"
1814 		"%comp1_entry = OpLabel\n"
1815 		"%idval1      = OpLoad %uvec3 %id\n"
1816 		"%x1          = OpCompositeExtract %u32 %idval1 0\n"
1817 		"%inloc1      = OpAccessChain %f32ptr %indata %zero %x1\n"
1818 		"%inval1      = OpLoad %f32 %inloc1\n"
1819 		"%add         = OpFAdd %f32 %inval1 %inval1\n"
1820 		"%outloc1     = OpAccessChain %f32ptr %outdata %zero %x1\n"
1821 		"               OpStore %outloc1 %add\n"
1822 		"               OpReturn\n"
1823 		"               OpFunctionEnd\n"
1824 
1825 		// Negate inputs.
1826 		"%comp_main2  = OpFunction %void None %voidf\n"
1827 		"%comp2_entry = OpLabel\n"
1828 		"%idval2      = OpLoad %uvec3 %id\n"
1829 		"%x2          = OpCompositeExtract %u32 %idval2 0\n"
1830 		"%inloc2      = OpAccessChain %f32ptr %indata %zero %x2\n"
1831 		"%inval2      = OpLoad %f32 %inloc2\n"
1832 		"%neg         = OpFNegate %f32 %inval2\n"
1833 		"%outloc2     = OpAccessChain %f32ptr %outdata %zero %x2\n"
1834 		"               OpStore %outloc2 %neg\n"
1835 		"               OpReturn\n"
1836 		"               OpFunctionEnd\n");
1837 
1838 	spec1.assembly = assembly;
1839 	spec1.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1840 	spec1.outputs.push_back(BufferSp(new Float32Buffer(outputFloats1)));
1841 	spec1.numWorkGroups = IVec3(numElements, 1, 1);
1842 	spec1.entryPoint = "entrypoint1";
1843 
1844 	spec2.assembly = assembly;
1845 	spec2.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
1846 	spec2.outputs.push_back(BufferSp(new Float32Buffer(outputFloats2)));
1847 	spec2.numWorkGroups = IVec3(numElements, 1, 1);
1848 	spec2.entryPoint = "entrypoint2";
1849 
1850 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader1", "multiple shaders in the same module", spec1));
1851 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "shader2", "multiple shaders in the same module", spec2));
1852 
1853 	return group.release();
1854 }
1855 
makeLongUTF8String(size_t num4ByteChars)1856 inline std::string makeLongUTF8String (size_t num4ByteChars)
1857 {
1858 	// An example of a longest valid UTF-8 character.  Be explicit about the
1859 	// character type because Microsoft compilers can otherwise interpret the
1860 	// character string as being over wide (16-bit) characters. Ideally, we
1861 	// would just use a C++11 UTF-8 string literal, but we want to support older
1862 	// Microsoft compilers.
1863 	const std::basic_string<char> earthAfrica("\xF0\x9F\x8C\x8D");
1864 	std::string longString;
1865 	longString.reserve(num4ByteChars * 4);
1866 	for (size_t count = 0; count < num4ByteChars; count++)
1867 	{
1868 		longString += earthAfrica;
1869 	}
1870 	return longString;
1871 }
1872 
createOpSourceGroup(tcu::TestContext & testCtx)1873 tcu::TestCaseGroup* createOpSourceGroup (tcu::TestContext& testCtx)
1874 {
1875 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsource", "Tests the OpSource & OpSourceContinued instruction"));
1876 	vector<CaseParameter>			cases;
1877 	de::Random						rnd				(deStringHash(group->getName()));
1878 	const int						numElements		= 100;
1879 	vector<float>					positiveFloats	(numElements, 0);
1880 	vector<float>					negativeFloats	(numElements, 0);
1881 	const StringTemplate			shaderTemplate	(
1882 		"OpCapability Shader\n"
1883 		"OpMemoryModel Logical GLSL450\n"
1884 
1885 		"OpEntryPoint GLCompute %main \"main\" %id\n"
1886 		"OpExecutionMode %main LocalSize 1 1 1\n"
1887 
1888 		"${SOURCE}\n"
1889 
1890 		"OpName %main           \"main\"\n"
1891 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1892 
1893 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1894 
1895 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1896 
1897 		"%id        = OpVariable %uvec3ptr Input\n"
1898 		"%zero      = OpConstant %i32 0\n"
1899 
1900 		"%main      = OpFunction %void None %voidf\n"
1901 		"%label     = OpLabel\n"
1902 		"%idval     = OpLoad %uvec3 %id\n"
1903 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1904 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1905 		"%inval     = OpLoad %f32 %inloc\n"
1906 		"%neg       = OpFNegate %f32 %inval\n"
1907 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
1908 		"             OpStore %outloc %neg\n"
1909 		"             OpReturn\n"
1910 		"             OpFunctionEnd\n");
1911 
1912 	cases.push_back(CaseParameter("unknown_source",							"OpSource Unknown 0"));
1913 	cases.push_back(CaseParameter("wrong_source",							"OpSource OpenCL_C 210"));
1914 	cases.push_back(CaseParameter("normal_filename",						"%fname = OpString \"filename\"\n"
1915 																			"OpSource GLSL 430 %fname"));
1916 	cases.push_back(CaseParameter("empty_filename",							"%fname = OpString \"\"\n"
1917 																			"OpSource GLSL 430 %fname"));
1918 	cases.push_back(CaseParameter("normal_source_code",						"%fname = OpString \"filename\"\n"
1919 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\""));
1920 	cases.push_back(CaseParameter("empty_source_code",						"%fname = OpString \"filename\"\n"
1921 																			"OpSource GLSL 430 %fname \"\""));
1922 	cases.push_back(CaseParameter("long_source_code",						"%fname = OpString \"filename\"\n"
1923 																			"OpSource GLSL 430 %fname \"" + makeLongUTF8String(65530) + "ccc\"")); // word count: 65535
1924 	cases.push_back(CaseParameter("utf8_source_code",						"%fname = OpString \"filename\"\n"
1925 																			"OpSource GLSL 430 %fname \"\xE2\x98\x82\xE2\x98\x85\"")); // umbrella & black star symbol
1926 	cases.push_back(CaseParameter("normal_sourcecontinued",					"%fname = OpString \"filename\"\n"
1927 																			"OpSource GLSL 430 %fname \"#version 430\nvo\"\n"
1928 																			"OpSourceContinued \"id main() {}\""));
1929 	cases.push_back(CaseParameter("empty_sourcecontinued",					"%fname = OpString \"filename\"\n"
1930 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1931 																			"OpSourceContinued \"\""));
1932 	cases.push_back(CaseParameter("long_sourcecontinued",					"%fname = OpString \"filename\"\n"
1933 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1934 																			"OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\"")); // word count: 65535
1935 	cases.push_back(CaseParameter("utf8_sourcecontinued",					"%fname = OpString \"filename\"\n"
1936 																			"OpSource GLSL 430 %fname \"#version 430\nvoid main() {}\"\n"
1937 																			"OpSourceContinued \"\xE2\x98\x8E\xE2\x9A\x91\"")); // white telephone & black flag symbol
1938 	cases.push_back(CaseParameter("multi_sourcecontinued",					"%fname = OpString \"filename\"\n"
1939 																			"OpSource GLSL 430 %fname \"#version 430\n\"\n"
1940 																			"OpSourceContinued \"void\"\n"
1941 																			"OpSourceContinued \"main()\"\n"
1942 																			"OpSourceContinued \"{}\""));
1943 	cases.push_back(CaseParameter("empty_source_before_sourcecontinued",	"%fname = OpString \"filename\"\n"
1944 																			"OpSource GLSL 430 %fname \"\"\n"
1945 																			"OpSourceContinued \"#version 430\nvoid main() {}\""));
1946 
1947 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
1948 
1949 	for (size_t ndx = 0; ndx < numElements; ++ndx)
1950 		negativeFloats[ndx] = -positiveFloats[ndx];
1951 
1952 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
1953 	{
1954 		map<string, string>		specializations;
1955 		ComputeShaderSpec		spec;
1956 
1957 		specializations["SOURCE"] = cases[caseNdx].param;
1958 		spec.assembly = shaderTemplate.specialize(specializations);
1959 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
1960 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
1961 		spec.numWorkGroups = IVec3(numElements, 1, 1);
1962 
1963 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
1964 	}
1965 
1966 	return group.release();
1967 }
1968 
createOpSourceExtensionGroup(tcu::TestContext & testCtx)1969 tcu::TestCaseGroup* createOpSourceExtensionGroup (tcu::TestContext& testCtx)
1970 {
1971 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opsourceextension", "Tests the OpSource instruction"));
1972 	vector<CaseParameter>			cases;
1973 	de::Random						rnd				(deStringHash(group->getName()));
1974 	const int						numElements		= 100;
1975 	vector<float>					inputFloats		(numElements, 0);
1976 	vector<float>					outputFloats	(numElements, 0);
1977 	const StringTemplate			shaderTemplate	(
1978 		string(s_ShaderPreamble) +
1979 
1980 		"OpSourceExtension \"${EXTENSION}\"\n"
1981 
1982 		"OpName %main           \"main\"\n"
1983 		"OpName %id             \"gl_GlobalInvocationID\"\n"
1984 
1985 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
1986 
1987 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
1988 
1989 		"%id        = OpVariable %uvec3ptr Input\n"
1990 		"%zero      = OpConstant %i32 0\n"
1991 
1992 		"%main      = OpFunction %void None %voidf\n"
1993 		"%label     = OpLabel\n"
1994 		"%idval     = OpLoad %uvec3 %id\n"
1995 		"%x         = OpCompositeExtract %u32 %idval 0\n"
1996 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
1997 		"%inval     = OpLoad %f32 %inloc\n"
1998 		"%neg       = OpFNegate %f32 %inval\n"
1999 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2000 		"             OpStore %outloc %neg\n"
2001 		"             OpReturn\n"
2002 		"             OpFunctionEnd\n");
2003 
2004 	cases.push_back(CaseParameter("empty_extension",	""));
2005 	cases.push_back(CaseParameter("real_extension",		"GL_ARB_texture_rectangle"));
2006 	cases.push_back(CaseParameter("fake_extension",		"GL_ARB_im_the_ultimate_extension"));
2007 	cases.push_back(CaseParameter("utf8_extension",		"GL_ARB_\xE2\x98\x82\xE2\x98\x85"));
2008 	cases.push_back(CaseParameter("long_extension",		makeLongUTF8String(65533) + "ccc")); // word count: 65535
2009 
2010 	fillRandomScalars(rnd, -200.f, 200.f, &inputFloats[0], numElements);
2011 
2012 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2013 		outputFloats[ndx] = -inputFloats[ndx];
2014 
2015 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2016 	{
2017 		map<string, string>		specializations;
2018 		ComputeShaderSpec		spec;
2019 
2020 		specializations["EXTENSION"] = cases[caseNdx].param;
2021 		spec.assembly = shaderTemplate.specialize(specializations);
2022 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2023 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2024 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2025 
2026 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2027 	}
2028 
2029 	return group.release();
2030 }
2031 
2032 // Checks that a compute shader can generate a constant null value of various types, without exercising a computation on it.
createOpConstantNullGroup(tcu::TestContext & testCtx)2033 tcu::TestCaseGroup* createOpConstantNullGroup (tcu::TestContext& testCtx)
2034 {
2035 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnull", "Tests the OpConstantNull instruction"));
2036 	vector<CaseParameter>			cases;
2037 	de::Random						rnd				(deStringHash(group->getName()));
2038 	const int						numElements		= 100;
2039 	vector<float>					positiveFloats	(numElements, 0);
2040 	vector<float>					negativeFloats	(numElements, 0);
2041 	const StringTemplate			shaderTemplate	(
2042 		string(s_ShaderPreamble) +
2043 
2044 		"OpSource GLSL 430\n"
2045 		"OpName %main           \"main\"\n"
2046 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2047 
2048 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2049 
2050 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2051 
2052 		"${TYPE}\n"
2053 		"%null      = OpConstantNull %type\n"
2054 
2055 		"%id        = OpVariable %uvec3ptr Input\n"
2056 		"%zero      = OpConstant %i32 0\n"
2057 
2058 		"%main      = OpFunction %void None %voidf\n"
2059 		"%label     = OpLabel\n"
2060 		"%idval     = OpLoad %uvec3 %id\n"
2061 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2062 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2063 		"%inval     = OpLoad %f32 %inloc\n"
2064 		"%neg       = OpFNegate %f32 %inval\n"
2065 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2066 		"             OpStore %outloc %neg\n"
2067 		"             OpReturn\n"
2068 		"             OpFunctionEnd\n");
2069 
2070 	cases.push_back(CaseParameter("bool",			"%type = OpTypeBool"));
2071 	cases.push_back(CaseParameter("sint32",			"%type = OpTypeInt 32 1"));
2072 	cases.push_back(CaseParameter("uint32",			"%type = OpTypeInt 32 0"));
2073 	cases.push_back(CaseParameter("float32",		"%type = OpTypeFloat 32"));
2074 	cases.push_back(CaseParameter("vec4float32",	"%type = OpTypeVector %f32 4"));
2075 	cases.push_back(CaseParameter("vec3bool",		"%type = OpTypeVector %bool 3"));
2076 	cases.push_back(CaseParameter("vec2uint32",		"%type = OpTypeVector %u32 2"));
2077 	cases.push_back(CaseParameter("matrix",			"%type = OpTypeMatrix %fvec3 3"));
2078 	cases.push_back(CaseParameter("array",			"%100 = OpConstant %u32 100\n"
2079 													"%type = OpTypeArray %i32 %100"));
2080 	cases.push_back(CaseParameter("struct",			"%type = OpTypeStruct %f32 %i32 %u32"));
2081 	cases.push_back(CaseParameter("pointer",		"%type = OpTypePointer Function %i32"));
2082 
2083 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2084 
2085 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2086 		negativeFloats[ndx] = -positiveFloats[ndx];
2087 
2088 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2089 	{
2090 		map<string, string>		specializations;
2091 		ComputeShaderSpec		spec;
2092 
2093 		specializations["TYPE"] = cases[caseNdx].param;
2094 		spec.assembly = shaderTemplate.specialize(specializations);
2095 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2096 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2097 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2098 
2099 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2100 	}
2101 
2102 	return group.release();
2103 }
2104 
2105 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpConstantCompositeGroup(tcu::TestContext & testCtx)2106 tcu::TestCaseGroup* createOpConstantCompositeGroup (tcu::TestContext& testCtx)
2107 {
2108 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "Tests the OpConstantComposite instruction"));
2109 	vector<CaseParameter>			cases;
2110 	de::Random						rnd				(deStringHash(group->getName()));
2111 	const int						numElements		= 100;
2112 	vector<float>					positiveFloats	(numElements, 0);
2113 	vector<float>					negativeFloats	(numElements, 0);
2114 	const StringTemplate			shaderTemplate	(
2115 		string(s_ShaderPreamble) +
2116 
2117 		"OpSource GLSL 430\n"
2118 		"OpName %main           \"main\"\n"
2119 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2120 
2121 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2122 
2123 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2124 
2125 		"%id        = OpVariable %uvec3ptr Input\n"
2126 		"%zero      = OpConstant %i32 0\n"
2127 
2128 		"${CONSTANT}\n"
2129 
2130 		"%main      = OpFunction %void None %voidf\n"
2131 		"%label     = OpLabel\n"
2132 		"%idval     = OpLoad %uvec3 %id\n"
2133 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2134 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2135 		"%inval     = OpLoad %f32 %inloc\n"
2136 		"%neg       = OpFNegate %f32 %inval\n"
2137 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2138 		"             OpStore %outloc %neg\n"
2139 		"             OpReturn\n"
2140 		"             OpFunctionEnd\n");
2141 
2142 	cases.push_back(CaseParameter("vector",			"%five = OpConstant %u32 5\n"
2143 													"%const = OpConstantComposite %uvec3 %five %zero %five"));
2144 	cases.push_back(CaseParameter("matrix",			"%m3fvec3 = OpTypeMatrix %fvec3 3\n"
2145 													"%ten = OpConstant %f32 10.\n"
2146 													"%fzero = OpConstant %f32 0.\n"
2147 													"%vec = OpConstantComposite %fvec3 %ten %fzero %ten\n"
2148 													"%mat = OpConstantComposite %m3fvec3 %vec %vec %vec"));
2149 	cases.push_back(CaseParameter("struct",			"%m2vec3 = OpTypeMatrix %fvec3 2\n"
2150 													"%struct = OpTypeStruct %i32 %f32 %fvec3 %m2vec3\n"
2151 													"%fzero = OpConstant %f32 0.\n"
2152 													"%one = OpConstant %f32 1.\n"
2153 													"%point5 = OpConstant %f32 0.5\n"
2154 													"%vec = OpConstantComposite %fvec3 %one %one %fzero\n"
2155 													"%mat = OpConstantComposite %m2vec3 %vec %vec\n"
2156 													"%const = OpConstantComposite %struct %zero %point5 %vec %mat"));
2157 	cases.push_back(CaseParameter("nested_struct",	"%st1 = OpTypeStruct %u32 %f32\n"
2158 													"%st2 = OpTypeStruct %i32 %i32\n"
2159 													"%struct = OpTypeStruct %st1 %st2\n"
2160 													"%point5 = OpConstant %f32 0.5\n"
2161 													"%one = OpConstant %u32 1\n"
2162 													"%ten = OpConstant %i32 10\n"
2163 													"%st1val = OpConstantComposite %st1 %one %point5\n"
2164 													"%st2val = OpConstantComposite %st2 %ten %ten\n"
2165 													"%const = OpConstantComposite %struct %st1val %st2val"));
2166 
2167 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2168 
2169 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2170 		negativeFloats[ndx] = -positiveFloats[ndx];
2171 
2172 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2173 	{
2174 		map<string, string>		specializations;
2175 		ComputeShaderSpec		spec;
2176 
2177 		specializations["CONSTANT"] = cases[caseNdx].param;
2178 		spec.assembly = shaderTemplate.specialize(specializations);
2179 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2180 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2181 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2182 
2183 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2184 	}
2185 
2186 	return group.release();
2187 }
2188 
2189 // Creates a floating point number with the given exponent, and significand
2190 // bits set. It can only create normalized numbers. Only the least significant
2191 // 24 bits of the significand will be examined. The final bit of the
2192 // significand will also be ignored. This allows alignment to be written
2193 // similarly to C99 hex-floats.
2194 // For example if you wanted to write 0x1.7f34p-12 you would call
2195 // constructNormalizedFloat(-12, 0x7f3400)
constructNormalizedFloat(deInt32 exponent,deUint32 significand)2196 float constructNormalizedFloat (deInt32 exponent, deUint32 significand)
2197 {
2198 	float f = 1.0f;
2199 
2200 	for (deInt32 idx = 0; idx < 23; ++idx)
2201 	{
2202 		f += ((significand & 0x800000) == 0) ? 0.f : std::ldexp(1.0f, -(idx + 1));
2203 		significand <<= 1;
2204 	}
2205 
2206 	return std::ldexp(f, exponent);
2207 }
2208 
2209 // Compare instruction for the OpQuantizeF16 compute exact case.
2210 // Returns true if the output is what is expected from the test case.
compareOpQuantizeF16ComputeExactCase(const std::vector<BufferSp> &,const vector<AllocationSp> & outputAllocs,const std::vector<BufferSp> & expectedOutputs)2211 bool compareOpQuantizeF16ComputeExactCase (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
2212 {
2213 	if (outputAllocs.size() != 1)
2214 		return false;
2215 
2216 	// We really just need this for size because we cannot compare Nans.
2217 	const BufferSp&	expectedOutput	= expectedOutputs[0];
2218 	const float*	outputAsFloat	= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
2219 
2220 	if (expectedOutput->getNumBytes() != 4*sizeof(float)) {
2221 		return false;
2222 	}
2223 
2224 	if (*outputAsFloat != constructNormalizedFloat(8, 0x304000) &&
2225 		*outputAsFloat != constructNormalizedFloat(8, 0x300000)) {
2226 		return false;
2227 	}
2228 	outputAsFloat++;
2229 
2230 	if (*outputAsFloat != -constructNormalizedFloat(-7, 0x600000) &&
2231 		*outputAsFloat != -constructNormalizedFloat(-7, 0x604000)) {
2232 		return false;
2233 	}
2234 	outputAsFloat++;
2235 
2236 	if (*outputAsFloat != constructNormalizedFloat(2, 0x01C000) &&
2237 		*outputAsFloat != constructNormalizedFloat(2, 0x020000)) {
2238 		return false;
2239 	}
2240 	outputAsFloat++;
2241 
2242 	if (*outputAsFloat != constructNormalizedFloat(1, 0xFFC000) &&
2243 		*outputAsFloat != constructNormalizedFloat(2, 0x000000)) {
2244 		return false;
2245 	}
2246 
2247 	return true;
2248 }
2249 
2250 // Checks that every output from a test-case is a float NaN.
compareNan(const std::vector<BufferSp> &,const vector<AllocationSp> & outputAllocs,const std::vector<BufferSp> & expectedOutputs)2251 bool compareNan (const std::vector<BufferSp>&, const vector<AllocationSp>& outputAllocs, const std::vector<BufferSp>& expectedOutputs)
2252 {
2253 	if (outputAllocs.size() != 1)
2254 		return false;
2255 
2256 	// We really just need this for size because we cannot compare Nans.
2257 	const BufferSp& expectedOutput		= expectedOutputs[0];
2258 	const float* output_as_float		= static_cast<const float*>(outputAllocs[0]->getHostPtr());;
2259 
2260 	for (size_t idx = 0; idx < expectedOutput->getNumBytes() / sizeof(float); ++idx)
2261 	{
2262 		if (!isnan(output_as_float[idx]))
2263 		{
2264 			return false;
2265 		}
2266 	}
2267 
2268 	return true;
2269 }
2270 
2271 // Checks that a compute shader can generate a constant composite value of various types, without exercising a computation on it.
createOpQuantizeToF16Group(tcu::TestContext & testCtx)2272 tcu::TestCaseGroup* createOpQuantizeToF16Group (tcu::TestContext& testCtx)
2273 {
2274 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opquantize", "Tests the OpQuantizeToF16 instruction"));
2275 
2276 	const std::string shader (
2277 		string(s_ShaderPreamble) +
2278 
2279 		"OpSource GLSL 430\n"
2280 		"OpName %main           \"main\"\n"
2281 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2282 
2283 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2284 
2285 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2286 
2287 		"%id        = OpVariable %uvec3ptr Input\n"
2288 		"%zero      = OpConstant %i32 0\n"
2289 
2290 		"%main      = OpFunction %void None %voidf\n"
2291 		"%label     = OpLabel\n"
2292 		"%idval     = OpLoad %uvec3 %id\n"
2293 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2294 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2295 		"%inval     = OpLoad %f32 %inloc\n"
2296 		"%quant     = OpQuantizeToF16 %f32 %inval\n"
2297 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2298 		"             OpStore %outloc %quant\n"
2299 		"             OpReturn\n"
2300 		"             OpFunctionEnd\n");
2301 
2302 	{
2303 		ComputeShaderSpec	spec;
2304 		const deUint32		numElements		= 100;
2305 		vector<float>		infinities;
2306 		vector<float>		results;
2307 
2308 		infinities.reserve(numElements);
2309 		results.reserve(numElements);
2310 
2311 		for (size_t idx = 0; idx < numElements; ++idx)
2312 		{
2313 			switch(idx % 4)
2314 			{
2315 				case 0:
2316 					infinities.push_back(std::numeric_limits<float>::infinity());
2317 					results.push_back(std::numeric_limits<float>::infinity());
2318 					break;
2319 				case 1:
2320 					infinities.push_back(-std::numeric_limits<float>::infinity());
2321 					results.push_back(-std::numeric_limits<float>::infinity());
2322 					break;
2323 				case 2:
2324 					infinities.push_back(std::ldexp(1.0f, 16));
2325 					results.push_back(std::numeric_limits<float>::infinity());
2326 					break;
2327 				case 3:
2328 					infinities.push_back(std::ldexp(-1.0f, 32));
2329 					results.push_back(-std::numeric_limits<float>::infinity());
2330 					break;
2331 			}
2332 		}
2333 
2334 		spec.assembly = shader;
2335 		spec.inputs.push_back(BufferSp(new Float32Buffer(infinities)));
2336 		spec.outputs.push_back(BufferSp(new Float32Buffer(results)));
2337 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2338 
2339 		group->addChild(new SpvAsmComputeShaderCase(
2340 			testCtx, "infinities", "Check that infinities propagated and created", spec));
2341 	}
2342 
2343 	{
2344 		ComputeShaderSpec	spec;
2345 		vector<float>		nans;
2346 		const deUint32		numElements		= 100;
2347 
2348 		nans.reserve(numElements);
2349 
2350 		for (size_t idx = 0; idx < numElements; ++idx)
2351 		{
2352 			if (idx % 2 == 0)
2353 			{
2354 				nans.push_back(std::numeric_limits<float>::quiet_NaN());
2355 			}
2356 			else
2357 			{
2358 				nans.push_back(-std::numeric_limits<float>::quiet_NaN());
2359 			}
2360 		}
2361 
2362 		spec.assembly = shader;
2363 		spec.inputs.push_back(BufferSp(new Float32Buffer(nans)));
2364 		spec.outputs.push_back(BufferSp(new Float32Buffer(nans)));
2365 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2366 		spec.verifyIO = &compareNan;
2367 
2368 		group->addChild(new SpvAsmComputeShaderCase(
2369 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
2370 	}
2371 
2372 	{
2373 		ComputeShaderSpec	spec;
2374 		vector<float>		small;
2375 		vector<float>		zeros;
2376 		const deUint32		numElements		= 100;
2377 
2378 		small.reserve(numElements);
2379 		zeros.reserve(numElements);
2380 
2381 		for (size_t idx = 0; idx < numElements; ++idx)
2382 		{
2383 			switch(idx % 6)
2384 			{
2385 				case 0:
2386 					small.push_back(0.f);
2387 					zeros.push_back(0.f);
2388 					break;
2389 				case 1:
2390 					small.push_back(-0.f);
2391 					zeros.push_back(-0.f);
2392 					break;
2393 				case 2:
2394 					small.push_back(std::ldexp(1.0f, -16));
2395 					zeros.push_back(0.f);
2396 					break;
2397 				case 3:
2398 					small.push_back(std::ldexp(-1.0f, -32));
2399 					zeros.push_back(-0.f);
2400 					break;
2401 				case 4:
2402 					small.push_back(std::ldexp(1.0f, -127));
2403 					zeros.push_back(0.f);
2404 					break;
2405 				case 5:
2406 					small.push_back(-std::ldexp(1.0f, -128));
2407 					zeros.push_back(-0.f);
2408 					break;
2409 			}
2410 		}
2411 
2412 		spec.assembly = shader;
2413 		spec.inputs.push_back(BufferSp(new Float32Buffer(small)));
2414 		spec.outputs.push_back(BufferSp(new Float32Buffer(zeros)));
2415 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2416 
2417 		group->addChild(new SpvAsmComputeShaderCase(
2418 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
2419 	}
2420 
2421 	{
2422 		ComputeShaderSpec	spec;
2423 		vector<float>		exact;
2424 		const deUint32		numElements		= 200;
2425 
2426 		exact.reserve(numElements);
2427 
2428 		for (size_t idx = 0; idx < numElements; ++idx)
2429 			exact.push_back(static_cast<float>(static_cast<int>(idx) - 100));
2430 
2431 		spec.assembly = shader;
2432 		spec.inputs.push_back(BufferSp(new Float32Buffer(exact)));
2433 		spec.outputs.push_back(BufferSp(new Float32Buffer(exact)));
2434 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2435 
2436 		group->addChild(new SpvAsmComputeShaderCase(
2437 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
2438 	}
2439 
2440 	{
2441 		ComputeShaderSpec	spec;
2442 		vector<float>		inputs;
2443 		const deUint32		numElements		= 4;
2444 
2445 		inputs.push_back(constructNormalizedFloat(8,	0x300300));
2446 		inputs.push_back(-constructNormalizedFloat(-7,	0x600800));
2447 		inputs.push_back(constructNormalizedFloat(2,	0x01E000));
2448 		inputs.push_back(constructNormalizedFloat(1,	0xFFE000));
2449 
2450 		spec.assembly = shader;
2451 		spec.verifyIO = &compareOpQuantizeF16ComputeExactCase;
2452 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2453 		spec.outputs.push_back(BufferSp(new Float32Buffer(inputs)));
2454 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2455 
2456 		group->addChild(new SpvAsmComputeShaderCase(
2457 			testCtx, "rounded", "Check that are rounded when needed", spec));
2458 	}
2459 
2460 	return group.release();
2461 }
2462 
2463 // Performs a bitwise copy of source to the destination type Dest.
2464 template <typename Dest, typename Src>
bitwiseCast(Src source)2465 Dest bitwiseCast(Src source)
2466 {
2467   Dest dest;
2468   DE_STATIC_ASSERT(sizeof(source) == sizeof(dest));
2469   deMemcpy(&dest, &source, sizeof(dest));
2470   return dest;
2471 }
2472 
createSpecConstantOpQuantizeToF16Group(tcu::TestContext & testCtx)2473 tcu::TestCaseGroup* createSpecConstantOpQuantizeToF16Group (tcu::TestContext& testCtx)
2474 {
2475 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opspecconstantop_opquantize", "Tests the OpQuantizeToF16 opcode for the OpSpecConstantOp instruction"));
2476 
2477 	const std::string shader (
2478 		string(s_ShaderPreamble) +
2479 
2480 		"OpName %main           \"main\"\n"
2481 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2482 
2483 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2484 
2485 		"OpDecorate %sc_0  SpecId 0\n"
2486 		"OpDecorate %sc_1  SpecId 1\n"
2487 		"OpDecorate %sc_2  SpecId 2\n"
2488 		"OpDecorate %sc_3  SpecId 3\n"
2489 		"OpDecorate %sc_4  SpecId 4\n"
2490 		"OpDecorate %sc_5  SpecId 5\n"
2491 
2492 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2493 
2494 		"%id        = OpVariable %uvec3ptr Input\n"
2495 		"%zero      = OpConstant %i32 0\n"
2496 		"%c_u32_6   = OpConstant %u32 6\n"
2497 
2498 		"%sc_0      = OpSpecConstant %f32 0.\n"
2499 		"%sc_1      = OpSpecConstant %f32 0.\n"
2500 		"%sc_2      = OpSpecConstant %f32 0.\n"
2501 		"%sc_3      = OpSpecConstant %f32 0.\n"
2502 		"%sc_4      = OpSpecConstant %f32 0.\n"
2503 		"%sc_5      = OpSpecConstant %f32 0.\n"
2504 
2505 		"%sc_0_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_0\n"
2506 		"%sc_1_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_1\n"
2507 		"%sc_2_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_2\n"
2508 		"%sc_3_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_3\n"
2509 		"%sc_4_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_4\n"
2510 		"%sc_5_quant = OpSpecConstantOp %f32 QuantizeToF16 %sc_5\n"
2511 
2512 		"%main      = OpFunction %void None %voidf\n"
2513 		"%label     = OpLabel\n"
2514 		"%idval     = OpLoad %uvec3 %id\n"
2515 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2516 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2517 		"%selector  = OpUMod %u32 %x %c_u32_6\n"
2518 		"            OpSelectionMerge %exit None\n"
2519 		"            OpSwitch %selector %exit 0 %case0 1 %case1 2 %case2 3 %case3 4 %case4 5 %case5\n"
2520 
2521 		"%case0     = OpLabel\n"
2522 		"             OpStore %outloc %sc_0_quant\n"
2523 		"             OpBranch %exit\n"
2524 
2525 		"%case1     = OpLabel\n"
2526 		"             OpStore %outloc %sc_1_quant\n"
2527 		"             OpBranch %exit\n"
2528 
2529 		"%case2     = OpLabel\n"
2530 		"             OpStore %outloc %sc_2_quant\n"
2531 		"             OpBranch %exit\n"
2532 
2533 		"%case3     = OpLabel\n"
2534 		"             OpStore %outloc %sc_3_quant\n"
2535 		"             OpBranch %exit\n"
2536 
2537 		"%case4     = OpLabel\n"
2538 		"             OpStore %outloc %sc_4_quant\n"
2539 		"             OpBranch %exit\n"
2540 
2541 		"%case5     = OpLabel\n"
2542 		"             OpStore %outloc %sc_5_quant\n"
2543 		"             OpBranch %exit\n"
2544 
2545 		"%exit      = OpLabel\n"
2546 		"             OpReturn\n"
2547 
2548 		"             OpFunctionEnd\n");
2549 
2550 	{
2551 		ComputeShaderSpec	spec;
2552 		const deUint8		numCases	= 4;
2553 		vector<float>		inputs		(numCases, 0.f);
2554 		vector<float>		outputs;
2555 
2556 		spec.assembly		= shader;
2557 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2558 
2559 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::numeric_limits<float>::infinity()));
2560 		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::numeric_limits<float>::infinity()));
2561 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, 16)));
2562 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, 32)));
2563 
2564 		outputs.push_back(std::numeric_limits<float>::infinity());
2565 		outputs.push_back(-std::numeric_limits<float>::infinity());
2566 		outputs.push_back(std::numeric_limits<float>::infinity());
2567 		outputs.push_back(-std::numeric_limits<float>::infinity());
2568 
2569 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2570 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2571 
2572 		group->addChild(new SpvAsmComputeShaderCase(
2573 			testCtx, "infinities", "Check that infinities propagated and created", spec));
2574 	}
2575 
2576 	{
2577 		ComputeShaderSpec	spec;
2578 		const deUint8		numCases	= 2;
2579 		vector<float>		inputs		(numCases, 0.f);
2580 		vector<float>		outputs;
2581 
2582 		spec.assembly		= shader;
2583 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2584 		spec.verifyIO		= &compareNan;
2585 
2586 		outputs.push_back(std::numeric_limits<float>::quiet_NaN());
2587 		outputs.push_back(-std::numeric_limits<float>::quiet_NaN());
2588 
2589 		for (deUint8 idx = 0; idx < numCases; ++idx)
2590 			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
2591 
2592 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2593 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2594 
2595 		group->addChild(new SpvAsmComputeShaderCase(
2596 			testCtx, "propagated_nans", "Check that nans are propagated", spec));
2597 	}
2598 
2599 	{
2600 		ComputeShaderSpec	spec;
2601 		const deUint8		numCases	= 6;
2602 		vector<float>		inputs		(numCases, 0.f);
2603 		vector<float>		outputs;
2604 
2605 		spec.assembly		= shader;
2606 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2607 
2608 		spec.specConstants.push_back(bitwiseCast<deUint32>(0.f));
2609 		spec.specConstants.push_back(bitwiseCast<deUint32>(-0.f));
2610 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -16)));
2611 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(-1.0f, -32)));
2612 		spec.specConstants.push_back(bitwiseCast<deUint32>(std::ldexp(1.0f, -127)));
2613 		spec.specConstants.push_back(bitwiseCast<deUint32>(-std::ldexp(1.0f, -128)));
2614 
2615 		outputs.push_back(0.f);
2616 		outputs.push_back(-0.f);
2617 		outputs.push_back(0.f);
2618 		outputs.push_back(-0.f);
2619 		outputs.push_back(0.f);
2620 		outputs.push_back(-0.f);
2621 
2622 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2623 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2624 
2625 		group->addChild(new SpvAsmComputeShaderCase(
2626 			testCtx, "flush_to_zero", "Check that values are zeroed correctly", spec));
2627 	}
2628 
2629 	{
2630 		ComputeShaderSpec	spec;
2631 		const deUint8		numCases	= 6;
2632 		vector<float>		inputs		(numCases, 0.f);
2633 		vector<float>		outputs;
2634 
2635 		spec.assembly		= shader;
2636 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2637 
2638 		for (deUint8 idx = 0; idx < 6; ++idx)
2639 		{
2640 			const float f = static_cast<float>(idx * 10 - 30) / 4.f;
2641 			spec.specConstants.push_back(bitwiseCast<deUint32>(f));
2642 			outputs.push_back(f);
2643 		}
2644 
2645 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2646 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2647 
2648 		group->addChild(new SpvAsmComputeShaderCase(
2649 			testCtx, "exact", "Check that values exactly preserved where appropriate", spec));
2650 	}
2651 
2652 	{
2653 		ComputeShaderSpec	spec;
2654 		const deUint8		numCases	= 4;
2655 		vector<float>		inputs		(numCases, 0.f);
2656 		vector<float>		outputs;
2657 
2658 		spec.assembly		= shader;
2659 		spec.numWorkGroups	= IVec3(numCases, 1, 1);
2660 		spec.verifyIO		= &compareOpQuantizeF16ComputeExactCase;
2661 
2662 		outputs.push_back(constructNormalizedFloat(8, 0x300300));
2663 		outputs.push_back(-constructNormalizedFloat(-7, 0x600800));
2664 		outputs.push_back(constructNormalizedFloat(2, 0x01E000));
2665 		outputs.push_back(constructNormalizedFloat(1, 0xFFE000));
2666 
2667 		for (deUint8 idx = 0; idx < numCases; ++idx)
2668 			spec.specConstants.push_back(bitwiseCast<deUint32>(outputs[idx]));
2669 
2670 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputs)));
2671 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputs)));
2672 
2673 		group->addChild(new SpvAsmComputeShaderCase(
2674 			testCtx, "rounded", "Check that are rounded when needed", spec));
2675 	}
2676 
2677 	return group.release();
2678 }
2679 
2680 // Checks that constant null/composite values can be used in computation.
createOpConstantUsageGroup(tcu::TestContext & testCtx)2681 tcu::TestCaseGroup* createOpConstantUsageGroup (tcu::TestContext& testCtx)
2682 {
2683 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opconstantnullcomposite", "Spotcheck the OpConstantNull & OpConstantComposite instruction"));
2684 	ComputeShaderSpec				spec;
2685 	de::Random						rnd				(deStringHash(group->getName()));
2686 	const int						numElements		= 100;
2687 	vector<float>					positiveFloats	(numElements, 0);
2688 	vector<float>					negativeFloats	(numElements, 0);
2689 
2690 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
2691 
2692 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2693 		negativeFloats[ndx] = -positiveFloats[ndx];
2694 
2695 	spec.assembly =
2696 		"OpCapability Shader\n"
2697 		"%std450 = OpExtInstImport \"GLSL.std.450\"\n"
2698 		"OpMemoryModel Logical GLSL450\n"
2699 		"OpEntryPoint GLCompute %main \"main\" %id\n"
2700 		"OpExecutionMode %main LocalSize 1 1 1\n"
2701 
2702 		"OpSource GLSL 430\n"
2703 		"OpName %main           \"main\"\n"
2704 		"OpName %id             \"gl_GlobalInvocationID\"\n"
2705 
2706 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2707 
2708 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) +
2709 
2710 		"%fmat      = OpTypeMatrix %fvec3 3\n"
2711 		"%ten       = OpConstant %u32 10\n"
2712 		"%f32arr10  = OpTypeArray %f32 %ten\n"
2713 		"%fst       = OpTypeStruct %f32 %f32\n"
2714 
2715 		+ string(s_InputOutputBuffer) +
2716 
2717 		"%id        = OpVariable %uvec3ptr Input\n"
2718 		"%zero      = OpConstant %i32 0\n"
2719 
2720 		// Create a bunch of null values
2721 		"%unull     = OpConstantNull %u32\n"
2722 		"%fnull     = OpConstantNull %f32\n"
2723 		"%vnull     = OpConstantNull %fvec3\n"
2724 		"%mnull     = OpConstantNull %fmat\n"
2725 		"%anull     = OpConstantNull %f32arr10\n"
2726 		"%snull     = OpConstantComposite %fst %fnull %fnull\n"
2727 
2728 		"%main      = OpFunction %void None %voidf\n"
2729 		"%label     = OpLabel\n"
2730 		"%idval     = OpLoad %uvec3 %id\n"
2731 		"%x         = OpCompositeExtract %u32 %idval 0\n"
2732 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
2733 		"%inval     = OpLoad %f32 %inloc\n"
2734 		"%neg       = OpFNegate %f32 %inval\n"
2735 
2736 		// Get the abs() of (a certain element of) those null values
2737 		"%unull_cov = OpConvertUToF %f32 %unull\n"
2738 		"%unull_abs = OpExtInst %f32 %std450 FAbs %unull_cov\n"
2739 		"%fnull_abs = OpExtInst %f32 %std450 FAbs %fnull\n"
2740 		"%vnull_0   = OpCompositeExtract %f32 %vnull 0\n"
2741 		"%vnull_abs = OpExtInst %f32 %std450 FAbs %vnull_0\n"
2742 		"%mnull_12  = OpCompositeExtract %f32 %mnull 1 2\n"
2743 		"%mnull_abs = OpExtInst %f32 %std450 FAbs %mnull_12\n"
2744 		"%anull_3   = OpCompositeExtract %f32 %anull 3\n"
2745 		"%anull_abs = OpExtInst %f32 %std450 FAbs %anull_3\n"
2746 		"%snull_1   = OpCompositeExtract %f32 %snull 1\n"
2747 		"%snull_abs = OpExtInst %f32 %std450 FAbs %snull_1\n"
2748 
2749 		// Add them all
2750 		"%add1      = OpFAdd %f32 %neg  %unull_abs\n"
2751 		"%add2      = OpFAdd %f32 %add1 %fnull_abs\n"
2752 		"%add3      = OpFAdd %f32 %add2 %vnull_abs\n"
2753 		"%add4      = OpFAdd %f32 %add3 %mnull_abs\n"
2754 		"%add5      = OpFAdd %f32 %add4 %anull_abs\n"
2755 		"%final     = OpFAdd %f32 %add5 %snull_abs\n"
2756 
2757 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
2758 		"             OpStore %outloc %final\n" // write to output
2759 		"             OpReturn\n"
2760 		"             OpFunctionEnd\n";
2761 	spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
2762 	spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
2763 	spec.numWorkGroups = IVec3(numElements, 1, 1);
2764 
2765 	group->addChild(new SpvAsmComputeShaderCase(testCtx, "spotcheck", "Check that values constructed via OpConstantNull & OpConstantComposite can be used", spec));
2766 
2767 	return group.release();
2768 }
2769 
2770 // Assembly code used for testing loop control is based on GLSL source code:
2771 // #version 430
2772 //
2773 // layout(std140, set = 0, binding = 0) readonly buffer Input {
2774 //   float elements[];
2775 // } input_data;
2776 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
2777 //   float elements[];
2778 // } output_data;
2779 //
2780 // void main() {
2781 //   uint x = gl_GlobalInvocationID.x;
2782 //   output_data.elements[x] = input_data.elements[x];
2783 //   for (uint i = 0; i < 4; ++i)
2784 //     output_data.elements[x] += 1.f;
2785 // }
createLoopControlGroup(tcu::TestContext & testCtx)2786 tcu::TestCaseGroup* createLoopControlGroup (tcu::TestContext& testCtx)
2787 {
2788 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "loop_control", "Tests loop control cases"));
2789 	vector<CaseParameter>			cases;
2790 	de::Random						rnd				(deStringHash(group->getName()));
2791 	const int						numElements		= 100;
2792 	vector<float>					inputFloats		(numElements, 0);
2793 	vector<float>					outputFloats	(numElements, 0);
2794 	const StringTemplate			shaderTemplate	(
2795 		string(s_ShaderPreamble) +
2796 
2797 		"OpSource GLSL 430\n"
2798 		"OpName %main \"main\"\n"
2799 		"OpName %id \"gl_GlobalInvocationID\"\n"
2800 
2801 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2802 
2803 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2804 
2805 		"%u32ptr      = OpTypePointer Function %u32\n"
2806 
2807 		"%id          = OpVariable %uvec3ptr Input\n"
2808 		"%zero        = OpConstant %i32 0\n"
2809 		"%uzero       = OpConstant %u32 0\n"
2810 		"%one         = OpConstant %i32 1\n"
2811 		"%constf1     = OpConstant %f32 1.0\n"
2812 		"%four        = OpConstant %u32 4\n"
2813 
2814 		"%main        = OpFunction %void None %voidf\n"
2815 		"%entry       = OpLabel\n"
2816 		"%i           = OpVariable %u32ptr Function\n"
2817 		"               OpStore %i %uzero\n"
2818 
2819 		"%idval       = OpLoad %uvec3 %id\n"
2820 		"%x           = OpCompositeExtract %u32 %idval 0\n"
2821 		"%inloc       = OpAccessChain %f32ptr %indata %zero %x\n"
2822 		"%inval       = OpLoad %f32 %inloc\n"
2823 		"%outloc      = OpAccessChain %f32ptr %outdata %zero %x\n"
2824 		"               OpStore %outloc %inval\n"
2825 		"               OpBranch %loop_entry\n"
2826 
2827 		"%loop_entry  = OpLabel\n"
2828 		"%i_val       = OpLoad %u32 %i\n"
2829 		"%cmp_lt      = OpULessThan %bool %i_val %four\n"
2830 		"               OpLoopMerge %loop_merge %loop_entry ${CONTROL}\n"
2831 		"               OpBranchConditional %cmp_lt %loop_body %loop_merge\n"
2832 		"%loop_body   = OpLabel\n"
2833 		"%outval      = OpLoad %f32 %outloc\n"
2834 		"%addf1       = OpFAdd %f32 %outval %constf1\n"
2835 		"               OpStore %outloc %addf1\n"
2836 		"%new_i       = OpIAdd %u32 %i_val %one\n"
2837 		"               OpStore %i %new_i\n"
2838 		"               OpBranch %loop_entry\n"
2839 		"%loop_merge  = OpLabel\n"
2840 		"               OpReturn\n"
2841 		"               OpFunctionEnd\n");
2842 
2843 	cases.push_back(CaseParameter("none",				"None"));
2844 	cases.push_back(CaseParameter("unroll",				"Unroll"));
2845 	cases.push_back(CaseParameter("dont_unroll",		"DontUnroll"));
2846 	cases.push_back(CaseParameter("unroll_dont_unroll",	"Unroll|DontUnroll"));
2847 
2848 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
2849 
2850 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2851 		outputFloats[ndx] = inputFloats[ndx] + 4.f;
2852 
2853 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2854 	{
2855 		map<string, string>		specializations;
2856 		ComputeShaderSpec		spec;
2857 
2858 		specializations["CONTROL"] = cases[caseNdx].param;
2859 		spec.assembly = shaderTemplate.specialize(specializations);
2860 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2861 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2862 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2863 
2864 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2865 	}
2866 
2867 	return group.release();
2868 }
2869 
2870 // Assembly code used for testing selection control is based on GLSL source code:
2871 // #version 430
2872 //
2873 // layout(std140, set = 0, binding = 0) readonly buffer Input {
2874 //   float elements[];
2875 // } input_data;
2876 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
2877 //   float elements[];
2878 // } output_data;
2879 //
2880 // void main() {
2881 //   uint x = gl_GlobalInvocationID.x;
2882 //   float val = input_data.elements[x];
2883 //   if (val > 10.f)
2884 //     output_data.elements[x] = val + 1.f;
2885 //   else
2886 //     output_data.elements[x] = val - 1.f;
2887 // }
createSelectionControlGroup(tcu::TestContext & testCtx)2888 tcu::TestCaseGroup* createSelectionControlGroup (tcu::TestContext& testCtx)
2889 {
2890 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "selection_control", "Tests selection control cases"));
2891 	vector<CaseParameter>			cases;
2892 	de::Random						rnd				(deStringHash(group->getName()));
2893 	const int						numElements		= 100;
2894 	vector<float>					inputFloats		(numElements, 0);
2895 	vector<float>					outputFloats	(numElements, 0);
2896 	const StringTemplate			shaderTemplate	(
2897 		string(s_ShaderPreamble) +
2898 
2899 		"OpSource GLSL 430\n"
2900 		"OpName %main \"main\"\n"
2901 		"OpName %id \"gl_GlobalInvocationID\"\n"
2902 
2903 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2904 
2905 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
2906 
2907 		"%id       = OpVariable %uvec3ptr Input\n"
2908 		"%zero     = OpConstant %i32 0\n"
2909 		"%constf1  = OpConstant %f32 1.0\n"
2910 		"%constf10 = OpConstant %f32 10.0\n"
2911 
2912 		"%main     = OpFunction %void None %voidf\n"
2913 		"%entry    = OpLabel\n"
2914 		"%idval    = OpLoad %uvec3 %id\n"
2915 		"%x        = OpCompositeExtract %u32 %idval 0\n"
2916 		"%inloc    = OpAccessChain %f32ptr %indata %zero %x\n"
2917 		"%inval    = OpLoad %f32 %inloc\n"
2918 		"%outloc   = OpAccessChain %f32ptr %outdata %zero %x\n"
2919 		"%cmp_gt   = OpFOrdGreaterThan %bool %inval %constf10\n"
2920 
2921 		"            OpSelectionMerge %if_end ${CONTROL}\n"
2922 		"            OpBranchConditional %cmp_gt %if_true %if_false\n"
2923 		"%if_true  = OpLabel\n"
2924 		"%addf1    = OpFAdd %f32 %inval %constf1\n"
2925 		"            OpStore %outloc %addf1\n"
2926 		"            OpBranch %if_end\n"
2927 		"%if_false = OpLabel\n"
2928 		"%subf1    = OpFSub %f32 %inval %constf1\n"
2929 		"            OpStore %outloc %subf1\n"
2930 		"            OpBranch %if_end\n"
2931 		"%if_end   = OpLabel\n"
2932 		"            OpReturn\n"
2933 		"            OpFunctionEnd\n");
2934 
2935 	cases.push_back(CaseParameter("none",					"None"));
2936 	cases.push_back(CaseParameter("flatten",				"Flatten"));
2937 	cases.push_back(CaseParameter("dont_flatten",			"DontFlatten"));
2938 	cases.push_back(CaseParameter("flatten_dont_flatten",	"DontFlatten|Flatten"));
2939 
2940 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
2941 
2942 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
2943 	floorAll(inputFloats);
2944 
2945 	for (size_t ndx = 0; ndx < numElements; ++ndx)
2946 		outputFloats[ndx] = inputFloats[ndx] + (inputFloats[ndx] > 10.f ? 1.f : -1.f);
2947 
2948 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
2949 	{
2950 		map<string, string>		specializations;
2951 		ComputeShaderSpec		spec;
2952 
2953 		specializations["CONTROL"] = cases[caseNdx].param;
2954 		spec.assembly = shaderTemplate.specialize(specializations);
2955 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
2956 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
2957 		spec.numWorkGroups = IVec3(numElements, 1, 1);
2958 
2959 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
2960 	}
2961 
2962 	return group.release();
2963 }
2964 
2965 // Assembly code used for testing function control is based on GLSL source code:
2966 //
2967 // #version 430
2968 //
2969 // layout(std140, set = 0, binding = 0) readonly buffer Input {
2970 //   float elements[];
2971 // } input_data;
2972 // layout(std140, set = 0, binding = 1) writeonly buffer Output {
2973 //   float elements[];
2974 // } output_data;
2975 //
2976 // float const10() { return 10.f; }
2977 //
2978 // void main() {
2979 //   uint x = gl_GlobalInvocationID.x;
2980 //   output_data.elements[x] = input_data.elements[x] + const10();
2981 // }
createFunctionControlGroup(tcu::TestContext & testCtx)2982 tcu::TestCaseGroup* createFunctionControlGroup (tcu::TestContext& testCtx)
2983 {
2984 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "function_control", "Tests function control cases"));
2985 	vector<CaseParameter>			cases;
2986 	de::Random						rnd				(deStringHash(group->getName()));
2987 	const int						numElements		= 100;
2988 	vector<float>					inputFloats		(numElements, 0);
2989 	vector<float>					outputFloats	(numElements, 0);
2990 	const StringTemplate			shaderTemplate	(
2991 		string(s_ShaderPreamble) +
2992 
2993 		"OpSource GLSL 430\n"
2994 		"OpName %main \"main\"\n"
2995 		"OpName %func_const10 \"const10(\"\n"
2996 		"OpName %id \"gl_GlobalInvocationID\"\n"
2997 
2998 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
2999 
3000 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3001 
3002 		"%f32f = OpTypeFunction %f32\n"
3003 		"%id = OpVariable %uvec3ptr Input\n"
3004 		"%zero = OpConstant %i32 0\n"
3005 		"%constf10 = OpConstant %f32 10.0\n"
3006 
3007 		"%main         = OpFunction %void None %voidf\n"
3008 		"%entry        = OpLabel\n"
3009 		"%idval        = OpLoad %uvec3 %id\n"
3010 		"%x            = OpCompositeExtract %u32 %idval 0\n"
3011 		"%inloc        = OpAccessChain %f32ptr %indata %zero %x\n"
3012 		"%inval        = OpLoad %f32 %inloc\n"
3013 		"%ret_10       = OpFunctionCall %f32 %func_const10\n"
3014 		"%fadd         = OpFAdd %f32 %inval %ret_10\n"
3015 		"%outloc       = OpAccessChain %f32ptr %outdata %zero %x\n"
3016 		"                OpStore %outloc %fadd\n"
3017 		"                OpReturn\n"
3018 		"                OpFunctionEnd\n"
3019 
3020 		"%func_const10 = OpFunction %f32 ${CONTROL} %f32f\n"
3021 		"%label        = OpLabel\n"
3022 		"                OpReturnValue %constf10\n"
3023 		"                OpFunctionEnd\n");
3024 
3025 	cases.push_back(CaseParameter("none",						"None"));
3026 	cases.push_back(CaseParameter("inline",						"Inline"));
3027 	cases.push_back(CaseParameter("dont_inline",				"DontInline"));
3028 	cases.push_back(CaseParameter("pure",						"Pure"));
3029 	cases.push_back(CaseParameter("const",						"Const"));
3030 	cases.push_back(CaseParameter("inline_pure",				"Inline|Pure"));
3031 	cases.push_back(CaseParameter("const_dont_inline",			"Const|DontInline"));
3032 	cases.push_back(CaseParameter("inline_dont_inline",			"Inline|DontInline"));
3033 	cases.push_back(CaseParameter("pure_inline_dont_inline",	"Pure|Inline|DontInline"));
3034 
3035 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
3036 
3037 	// CPU might not use the same rounding mode as the GPU. Use whole numbers to avoid rounding differences.
3038 	floorAll(inputFloats);
3039 
3040 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3041 		outputFloats[ndx] = inputFloats[ndx] + 10.f;
3042 
3043 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3044 	{
3045 		map<string, string>		specializations;
3046 		ComputeShaderSpec		spec;
3047 
3048 		specializations["CONTROL"] = cases[caseNdx].param;
3049 		spec.assembly = shaderTemplate.specialize(specializations);
3050 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3051 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3052 		spec.numWorkGroups = IVec3(numElements, 1, 1);
3053 
3054 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3055 	}
3056 
3057 	return group.release();
3058 }
3059 
createMemoryAccessGroup(tcu::TestContext & testCtx)3060 tcu::TestCaseGroup* createMemoryAccessGroup (tcu::TestContext& testCtx)
3061 {
3062 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "memory_access", "Tests memory access cases"));
3063 	vector<CaseParameter>			cases;
3064 	de::Random						rnd				(deStringHash(group->getName()));
3065 	const int						numElements		= 100;
3066 	vector<float>					inputFloats		(numElements, 0);
3067 	vector<float>					outputFloats	(numElements, 0);
3068 	const StringTemplate			shaderTemplate	(
3069 		string(s_ShaderPreamble) +
3070 
3071 		"OpSource GLSL 430\n"
3072 		"OpName %main           \"main\"\n"
3073 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3074 
3075 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3076 
3077 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3078 
3079 		"%f32ptr_f  = OpTypePointer Function %f32\n"
3080 
3081 		"%id        = OpVariable %uvec3ptr Input\n"
3082 		"%zero      = OpConstant %i32 0\n"
3083 		"%four      = OpConstant %i32 4\n"
3084 
3085 		"%main      = OpFunction %void None %voidf\n"
3086 		"%label     = OpLabel\n"
3087 		"%copy      = OpVariable %f32ptr_f Function\n"
3088 		"%idval     = OpLoad %uvec3 %id ${ACCESS}\n"
3089 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3090 		"%inloc     = OpAccessChain %f32ptr %indata  %zero %x\n"
3091 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3092 		"             OpCopyMemory %copy %inloc ${ACCESS}\n"
3093 		"%val1      = OpLoad %f32 %copy\n"
3094 		"%val2      = OpLoad %f32 %inloc\n"
3095 		"%add       = OpFAdd %f32 %val1 %val2\n"
3096 		"             OpStore %outloc %add ${ACCESS}\n"
3097 		"             OpReturn\n"
3098 		"             OpFunctionEnd\n");
3099 
3100 	cases.push_back(CaseParameter("null",					""));
3101 	cases.push_back(CaseParameter("none",					"None"));
3102 	cases.push_back(CaseParameter("volatile",				"Volatile"));
3103 	cases.push_back(CaseParameter("aligned",				"Aligned 4"));
3104 	cases.push_back(CaseParameter("nontemporal",			"Nontemporal"));
3105 	cases.push_back(CaseParameter("aligned_nontemporal",	"Aligned|Nontemporal 4"));
3106 	cases.push_back(CaseParameter("aligned_volatile",		"Volatile|Aligned 4"));
3107 
3108 	fillRandomScalars(rnd, -100.f, 100.f, &inputFloats[0], numElements);
3109 
3110 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3111 		outputFloats[ndx] = inputFloats[ndx] + inputFloats[ndx];
3112 
3113 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3114 	{
3115 		map<string, string>		specializations;
3116 		ComputeShaderSpec		spec;
3117 
3118 		specializations["ACCESS"] = cases[caseNdx].param;
3119 		spec.assembly = shaderTemplate.specialize(specializations);
3120 		spec.inputs.push_back(BufferSp(new Float32Buffer(inputFloats)));
3121 		spec.outputs.push_back(BufferSp(new Float32Buffer(outputFloats)));
3122 		spec.numWorkGroups = IVec3(numElements, 1, 1);
3123 
3124 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3125 	}
3126 
3127 	return group.release();
3128 }
3129 
3130 // Checks that we can get undefined values for various types, without exercising a computation with it.
createOpUndefGroup(tcu::TestContext & testCtx)3131 tcu::TestCaseGroup* createOpUndefGroup (tcu::TestContext& testCtx)
3132 {
3133 	de::MovePtr<tcu::TestCaseGroup>	group			(new tcu::TestCaseGroup(testCtx, "opundef", "Tests the OpUndef instruction"));
3134 	vector<CaseParameter>			cases;
3135 	de::Random						rnd				(deStringHash(group->getName()));
3136 	const int						numElements		= 100;
3137 	vector<float>					positiveFloats	(numElements, 0);
3138 	vector<float>					negativeFloats	(numElements, 0);
3139 	const StringTemplate			shaderTemplate	(
3140 		string(s_ShaderPreamble) +
3141 
3142 		"OpSource GLSL 430\n"
3143 		"OpName %main           \"main\"\n"
3144 		"OpName %id             \"gl_GlobalInvocationID\"\n"
3145 
3146 		"OpDecorate %id BuiltIn GlobalInvocationId\n"
3147 
3148 		+ string(s_InputOutputBufferTraits) + string(s_CommonTypes) + string(s_InputOutputBuffer) +
3149 
3150 		"${TYPE}\n"
3151 
3152 		"%id        = OpVariable %uvec3ptr Input\n"
3153 		"%zero      = OpConstant %i32 0\n"
3154 
3155 		"%main      = OpFunction %void None %voidf\n"
3156 		"%label     = OpLabel\n"
3157 
3158 		"%undef     = OpUndef %type\n"
3159 
3160 		"%idval     = OpLoad %uvec3 %id\n"
3161 		"%x         = OpCompositeExtract %u32 %idval 0\n"
3162 
3163 		"%inloc     = OpAccessChain %f32ptr %indata %zero %x\n"
3164 		"%inval     = OpLoad %f32 %inloc\n"
3165 		"%neg       = OpFNegate %f32 %inval\n"
3166 		"%outloc    = OpAccessChain %f32ptr %outdata %zero %x\n"
3167 		"             OpStore %outloc %neg\n"
3168 		"             OpReturn\n"
3169 		"             OpFunctionEnd\n");
3170 
3171 	cases.push_back(CaseParameter("bool",			"%type = OpTypeBool"));
3172 	cases.push_back(CaseParameter("sint32",			"%type = OpTypeInt 32 1"));
3173 	cases.push_back(CaseParameter("uint32",			"%type = OpTypeInt 32 0"));
3174 	cases.push_back(CaseParameter("float32",		"%type = OpTypeFloat 32"));
3175 	cases.push_back(CaseParameter("vec4float32",	"%type = OpTypeVector %f32 4"));
3176 	cases.push_back(CaseParameter("vec2uint32",		"%type = OpTypeVector %u32 2"));
3177 	cases.push_back(CaseParameter("matrix",			"%type = OpTypeMatrix %fvec3 3"));
3178 	cases.push_back(CaseParameter("image",			"%type = OpTypeImage %f32 2D 0 0 0 1 Unknown"));
3179 	cases.push_back(CaseParameter("sampler",		"%type = OpTypeSampler"));
3180 	cases.push_back(CaseParameter("sampledimage",	"%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n"
3181 													"%type = OpTypeSampledImage %img"));
3182 	cases.push_back(CaseParameter("array",			"%100 = OpConstant %u32 100\n"
3183 													"%type = OpTypeArray %i32 %100"));
3184 	cases.push_back(CaseParameter("runtimearray",	"%type = OpTypeRuntimeArray %f32"));
3185 	cases.push_back(CaseParameter("struct",			"%type = OpTypeStruct %f32 %i32 %u32"));
3186 	cases.push_back(CaseParameter("pointer",		"%type = OpTypePointer Function %i32"));
3187 
3188 	fillRandomScalars(rnd, 1.f, 100.f, &positiveFloats[0], numElements);
3189 
3190 	for (size_t ndx = 0; ndx < numElements; ++ndx)
3191 		negativeFloats[ndx] = -positiveFloats[ndx];
3192 
3193 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
3194 	{
3195 		map<string, string>		specializations;
3196 		ComputeShaderSpec		spec;
3197 
3198 		specializations["TYPE"] = cases[caseNdx].param;
3199 		spec.assembly = shaderTemplate.specialize(specializations);
3200 		spec.inputs.push_back(BufferSp(new Float32Buffer(positiveFloats)));
3201 		spec.outputs.push_back(BufferSp(new Float32Buffer(negativeFloats)));
3202 		spec.numWorkGroups = IVec3(numElements, 1, 1);
3203 
3204 		group->addChild(new SpvAsmComputeShaderCase(testCtx, cases[caseNdx].name, cases[caseNdx].name, spec));
3205 	}
3206 
3207 		return group.release();
3208 }
3209 typedef std::pair<std::string, VkShaderStageFlagBits>	EntryToStage;
3210 typedef map<string, vector<EntryToStage> >				ModuleMap;
3211 typedef map<VkShaderStageFlagBits, vector<deInt32> >	StageToSpecConstantMap;
3212 
3213 // Context for a specific test instantiation. For example, an instantiation
3214 // may test colors yellow/magenta/cyan/mauve in a tesselation shader
3215 // with an entry point named 'main_to_the_main'
3216 struct InstanceContext
3217 {
3218 	// Map of modules to what entry_points we care to use from those modules.
3219 	ModuleMap				moduleMap;
3220 	RGBA					inputColors[4];
3221 	RGBA					outputColors[4];
3222 	// Concrete SPIR-V code to test via boilerplate specialization.
3223 	map<string, string>		testCodeFragments;
3224 	StageToSpecConstantMap	specConstants;
3225 	bool					hasTessellation;
3226 	VkShaderStageFlagBits	requiredStages;
3227 
InstanceContextvkt::SpirVAssembly::__anon889ef7250111::InstanceContext3228 	InstanceContext (const RGBA (&inputs)[4], const RGBA (&outputs)[4], const map<string, string>& testCodeFragments_, const StageToSpecConstantMap& specConstants_)
3229 		: testCodeFragments		(testCodeFragments_)
3230 		, specConstants			(specConstants_)
3231 		, hasTessellation		(false)
3232 		, requiredStages		(static_cast<VkShaderStageFlagBits>(0))
3233 	{
3234 		inputColors[0]		= inputs[0];
3235 		inputColors[1]		= inputs[1];
3236 		inputColors[2]		= inputs[2];
3237 		inputColors[3]		= inputs[3];
3238 
3239 		outputColors[0]		= outputs[0];
3240 		outputColors[1]		= outputs[1];
3241 		outputColors[2]		= outputs[2];
3242 		outputColors[3]		= outputs[3];
3243 	}
3244 
InstanceContextvkt::SpirVAssembly::__anon889ef7250111::InstanceContext3245 	InstanceContext (const InstanceContext& other)
3246 		: moduleMap			(other.moduleMap)
3247 		, testCodeFragments	(other.testCodeFragments)
3248 		, specConstants		(other.specConstants)
3249 		, hasTessellation	(other.hasTessellation)
3250 		, requiredStages    (other.requiredStages)
3251 	{
3252 		inputColors[0]		= other.inputColors[0];
3253 		inputColors[1]		= other.inputColors[1];
3254 		inputColors[2]		= other.inputColors[2];
3255 		inputColors[3]		= other.inputColors[3];
3256 
3257 		outputColors[0]		= other.outputColors[0];
3258 		outputColors[1]		= other.outputColors[1];
3259 		outputColors[2]		= other.outputColors[2];
3260 		outputColors[3]		= other.outputColors[3];
3261 	}
3262 };
3263 
3264 // A description of a shader to be used for a single stage of the graphics pipeline.
3265 struct ShaderElement
3266 {
3267 	// The module that contains this shader entrypoint.
3268 	string					moduleName;
3269 
3270 	// The name of the entrypoint.
3271 	string					entryName;
3272 
3273 	// Which shader stage this entry point represents.
3274 	VkShaderStageFlagBits	stage;
3275 
ShaderElementvkt::SpirVAssembly::__anon889ef7250111::ShaderElement3276 	ShaderElement (const string& moduleName_, const string& entryPoint_, VkShaderStageFlagBits shaderStage_)
3277 		: moduleName(moduleName_)
3278 		, entryName(entryPoint_)
3279 		, stage(shaderStage_)
3280 	{
3281 	}
3282 };
3283 
getDefaultColors(RGBA (& colors)[4])3284 void getDefaultColors (RGBA (&colors)[4])
3285 {
3286 	colors[0] = RGBA::white();
3287 	colors[1] = RGBA::red();
3288 	colors[2] = RGBA::green();
3289 	colors[3] = RGBA::blue();
3290 }
3291 
getHalfColorsFullAlpha(RGBA (& colors)[4])3292 void getHalfColorsFullAlpha (RGBA (&colors)[4])
3293 {
3294 	colors[0] = RGBA(127, 127, 127, 255);
3295 	colors[1] = RGBA(127, 0,   0,	255);
3296 	colors[2] = RGBA(0,	  127, 0,	255);
3297 	colors[3] = RGBA(0,	  0,   127, 255);
3298 }
3299 
getInvertedDefaultColors(RGBA (& colors)[4])3300 void getInvertedDefaultColors (RGBA (&colors)[4])
3301 {
3302 	colors[0] = RGBA(0,		0,		0,		255);
3303 	colors[1] = RGBA(0,		255,	255,	255);
3304 	colors[2] = RGBA(255,	0,		255,	255);
3305 	colors[3] = RGBA(255,	255,	0,		255);
3306 }
3307 
3308 // Turns a statically sized array of ShaderElements into an instance-context
3309 // by setting up the mapping of modules to their contained shaders and stages.
3310 // The inputs and expected outputs are given by inputColors and outputColors
3311 template<size_t N>
createInstanceContext(const ShaderElement (& elements)[N],const RGBA (& inputColors)[4],const RGBA (& outputColors)[4],const map<string,string> & testCodeFragments,const StageToSpecConstantMap & specConstants)3312 InstanceContext createInstanceContext (const ShaderElement (&elements)[N], const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, const StageToSpecConstantMap& specConstants)
3313 {
3314 	InstanceContext ctx (inputColors, outputColors, testCodeFragments, specConstants);
3315 	for (size_t i = 0; i < N; ++i)
3316 	{
3317 		ctx.moduleMap[elements[i].moduleName].push_back(std::make_pair(elements[i].entryName, elements[i].stage));
3318 		ctx.requiredStages = static_cast<VkShaderStageFlagBits>(ctx.requiredStages | elements[i].stage);
3319 	}
3320 	return ctx;
3321 }
3322 
3323 template<size_t N>
createInstanceContext(const ShaderElement (& elements)[N],RGBA (& inputColors)[4],const RGBA (& outputColors)[4],const map<string,string> & testCodeFragments)3324 inline InstanceContext createInstanceContext (const ShaderElement (&elements)[N], RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments)
3325 {
3326 	return createInstanceContext(elements, inputColors, outputColors, testCodeFragments, StageToSpecConstantMap());
3327 }
3328 
3329 // The same as createInstanceContext above, but with default colors.
3330 template<size_t N>
createInstanceContext(const ShaderElement (& elements)[N],const map<string,string> & testCodeFragments)3331 InstanceContext createInstanceContext (const ShaderElement (&elements)[N], const map<string, string>& testCodeFragments)
3332 {
3333 	RGBA defaultColors[4];
3334 	getDefaultColors(defaultColors);
3335 	return createInstanceContext(elements, defaultColors, defaultColors, testCodeFragments);
3336 }
3337 
3338 // For the current InstanceContext, constructs the required modules and shader stage create infos.
createPipelineShaderStages(const DeviceInterface & vk,const VkDevice vkDevice,InstanceContext & instance,Context & context,vector<ModuleHandleSp> & modules,vector<VkPipelineShaderStageCreateInfo> & createInfos)3339 void createPipelineShaderStages (const DeviceInterface& vk, const VkDevice vkDevice, InstanceContext& instance, Context& context, vector<ModuleHandleSp>& modules, vector<VkPipelineShaderStageCreateInfo>& createInfos)
3340 {
3341 	for (ModuleMap::const_iterator moduleNdx = instance.moduleMap.begin(); moduleNdx != instance.moduleMap.end(); ++moduleNdx)
3342 	{
3343 		const ModuleHandleSp mod(new Unique<VkShaderModule>(createShaderModule(vk, vkDevice, context.getBinaryCollection().get(moduleNdx->first), 0)));
3344 		modules.push_back(ModuleHandleSp(mod));
3345 		for (vector<EntryToStage>::const_iterator shaderNdx = moduleNdx->second.begin(); shaderNdx != moduleNdx->second.end(); ++shaderNdx)
3346 		{
3347 			const EntryToStage&						stage			= *shaderNdx;
3348 			const VkPipelineShaderStageCreateInfo	shaderParam		=
3349 			{
3350 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	//	VkStructureType			sType;
3351 				DE_NULL,												//	const void*				pNext;
3352 				(VkPipelineShaderStageCreateFlags)0,
3353 				stage.second,											//	VkShaderStageFlagBits	stage;
3354 				**modules.back(),										//	VkShaderModule			module;
3355 				stage.first.c_str(),									//	const char*				pName;
3356 				(const VkSpecializationInfo*)DE_NULL,
3357 			};
3358 			createInfos.push_back(shaderParam);
3359 		}
3360 	}
3361 }
3362 
3363 #define SPIRV_ASSEMBLY_TYPES																	\
3364 	"%void = OpTypeVoid\n"																		\
3365 	"%bool = OpTypeBool\n"																		\
3366 																								\
3367 	"%i32 = OpTypeInt 32 1\n"																	\
3368 	"%u32 = OpTypeInt 32 0\n"																	\
3369 																								\
3370 	"%f32 = OpTypeFloat 32\n"																	\
3371 	"%v3f32 = OpTypeVector %f32 3\n"															\
3372 	"%v4f32 = OpTypeVector %f32 4\n"															\
3373 	"%v4bool = OpTypeVector %bool 4\n"															\
3374 																								\
3375 	"%v4f32_function = OpTypeFunction %v4f32 %v4f32\n"											\
3376 	"%fun = OpTypeFunction %void\n"																\
3377 																								\
3378 	"%ip_f32 = OpTypePointer Input %f32\n"														\
3379 	"%ip_i32 = OpTypePointer Input %i32\n"														\
3380 	"%ip_v3f32 = OpTypePointer Input %v3f32\n"													\
3381 	"%ip_v4f32 = OpTypePointer Input %v4f32\n"													\
3382 																								\
3383 	"%op_f32 = OpTypePointer Output %f32\n"														\
3384 	"%op_v4f32 = OpTypePointer Output %v4f32\n"													\
3385 																								\
3386 	"%fp_f32   = OpTypePointer Function %f32\n"													\
3387 	"%fp_i32   = OpTypePointer Function %i32\n"													\
3388 	"%fp_v4f32 = OpTypePointer Function %v4f32\n"
3389 
3390 #define SPIRV_ASSEMBLY_CONSTANTS																\
3391 	"%c_f32_1 = OpConstant %f32 1.0\n"															\
3392 	"%c_f32_0 = OpConstant %f32 0.0\n"															\
3393 	"%c_f32_0_5 = OpConstant %f32 0.5\n"														\
3394 	"%c_f32_n1  = OpConstant %f32 -1.\n"														\
3395 	"%c_f32_7 = OpConstant %f32 7.0\n"															\
3396 	"%c_f32_8 = OpConstant %f32 8.0\n"															\
3397 	"%c_i32_0 = OpConstant %i32 0\n"															\
3398 	"%c_i32_1 = OpConstant %i32 1\n"															\
3399 	"%c_i32_2 = OpConstant %i32 2\n"															\
3400 	"%c_i32_3 = OpConstant %i32 3\n"															\
3401 	"%c_i32_4 = OpConstant %i32 4\n"															\
3402 	"%c_u32_0 = OpConstant %u32 0\n"															\
3403 	"%c_u32_1 = OpConstant %u32 1\n"															\
3404 	"%c_u32_2 = OpConstant %u32 2\n"															\
3405 	"%c_u32_3 = OpConstant %u32 3\n"															\
3406 	"%c_u32_32 = OpConstant %u32 32\n"															\
3407 	"%c_u32_4 = OpConstant %u32 4\n"															\
3408 	"%c_u32_31_bits = OpConstant %u32 0x7FFFFFFF\n"												\
3409 	"%c_v4f32_1_1_1_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"		\
3410 	"%c_v4f32_1_0_0_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_1\n"		\
3411 	"%c_v4f32_0_5_0_5_0_5_0_5 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5\n"
3412 
3413 #define SPIRV_ASSEMBLY_ARRAYS																	\
3414 	"%a1f32 = OpTypeArray %f32 %c_u32_1\n"														\
3415 	"%a2f32 = OpTypeArray %f32 %c_u32_2\n"														\
3416 	"%a3v4f32 = OpTypeArray %v4f32 %c_u32_3\n"													\
3417 	"%a4f32 = OpTypeArray %f32 %c_u32_4\n"														\
3418 	"%a32v4f32 = OpTypeArray %v4f32 %c_u32_32\n"												\
3419 	"%ip_a3v4f32 = OpTypePointer Input %a3v4f32\n"												\
3420 	"%ip_a32v4f32 = OpTypePointer Input %a32v4f32\n"											\
3421 	"%op_a2f32 = OpTypePointer Output %a2f32\n"													\
3422 	"%op_a3v4f32 = OpTypePointer Output %a3v4f32\n"												\
3423 	"%op_a4f32 = OpTypePointer Output %a4f32\n"
3424 
3425 // Creates vertex-shader assembly by specializing a boilerplate StringTemplate
3426 // on fragments, which must (at least) map "testfun" to an OpFunction definition
3427 // for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3428 // with "BP_" to avoid collisions with fragments.
3429 //
3430 // It corresponds roughly to this GLSL:
3431 //;
3432 // layout(location = 0) in vec4 position;
3433 // layout(location = 1) in vec4 color;
3434 // layout(location = 1) out highp vec4 vtxColor;
3435 // void main (void) { gl_Position = position; vtxColor = test_func(color); }
makeVertexShaderAssembly(const map<string,string> & fragments)3436 string makeVertexShaderAssembly(const map<string, string>& fragments)
3437 {
3438 // \todo [2015-11-23 awoloszyn] Remove OpName once these have stabalized
3439 	static const char vertexShaderBoilerplate[] =
3440 		"OpCapability Shader\n"
3441 		"OpCapability ClipDistance\n"
3442 		"OpCapability CullDistance\n"
3443 		"OpMemoryModel Logical GLSL450\n"
3444 		"OpEntryPoint Vertex %main \"main\" %BP_stream %BP_position %BP_vtx_color %BP_color %BP_gl_VertexIndex %BP_gl_InstanceIndex\n"
3445 		"${debug:opt}\n"
3446 		"OpName %main \"main\"\n"
3447 		"OpName %BP_gl_PerVertex \"gl_PerVertex\"\n"
3448 		"OpMemberName %BP_gl_PerVertex 0 \"gl_Position\"\n"
3449 		"OpMemberName %BP_gl_PerVertex 1 \"gl_PointSize\"\n"
3450 		"OpMemberName %BP_gl_PerVertex 2 \"gl_ClipDistance\"\n"
3451 		"OpMemberName %BP_gl_PerVertex 3 \"gl_CullDistance\"\n"
3452 		"OpName %test_code \"testfun(vf4;\"\n"
3453 		"OpName %BP_stream \"\"\n"
3454 		"OpName %BP_position \"position\"\n"
3455 		"OpName %BP_vtx_color \"vtxColor\"\n"
3456 		"OpName %BP_color \"color\"\n"
3457 		"OpName %BP_gl_VertexIndex \"gl_VertexIndex\"\n"
3458 		"OpName %BP_gl_InstanceIndex \"gl_InstanceIndex\"\n"
3459 		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3460 		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3461 		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3462 		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3463 		"OpDecorate %BP_gl_PerVertex Block\n"
3464 		"OpDecorate %BP_position Location 0\n"
3465 		"OpDecorate %BP_vtx_color Location 1\n"
3466 		"OpDecorate %BP_color Location 1\n"
3467 		"OpDecorate %BP_gl_VertexIndex BuiltIn VertexIndex\n"
3468 		"OpDecorate %BP_gl_InstanceIndex BuiltIn InstanceIndex\n"
3469 		"${decoration:opt}\n"
3470 		SPIRV_ASSEMBLY_TYPES
3471 		SPIRV_ASSEMBLY_CONSTANTS
3472 		SPIRV_ASSEMBLY_ARRAYS
3473 		"%BP_gl_PerVertex = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3474 		"%BP_op_gl_PerVertex = OpTypePointer Output %BP_gl_PerVertex\n"
3475 		"%BP_stream = OpVariable %BP_op_gl_PerVertex Output\n"
3476 		"%BP_position = OpVariable %ip_v4f32 Input\n"
3477 		"%BP_vtx_color = OpVariable %op_v4f32 Output\n"
3478 		"%BP_color = OpVariable %ip_v4f32 Input\n"
3479 		"%BP_gl_VertexIndex = OpVariable %ip_i32 Input\n"
3480 		"%BP_gl_InstanceIndex = OpVariable %ip_i32 Input\n"
3481 		"${pre_main:opt}\n"
3482 		"%main = OpFunction %void None %fun\n"
3483 		"%BP_label = OpLabel\n"
3484 		"%BP_pos = OpLoad %v4f32 %BP_position\n"
3485 		"%BP_gl_pos = OpAccessChain %op_v4f32 %BP_stream %c_i32_0\n"
3486 		"OpStore %BP_gl_pos %BP_pos\n"
3487 		"%BP_col = OpLoad %v4f32 %BP_color\n"
3488 		"%BP_col_transformed = OpFunctionCall %v4f32 %test_code %BP_col\n"
3489 		"OpStore %BP_vtx_color %BP_col_transformed\n"
3490 		"OpReturn\n"
3491 		"OpFunctionEnd\n"
3492 		"${testfun}\n";
3493 	return tcu::StringTemplate(vertexShaderBoilerplate).specialize(fragments);
3494 }
3495 
3496 // Creates tess-control-shader assembly by specializing a boilerplate
3497 // StringTemplate on fragments, which must (at least) map "testfun" to an
3498 // OpFunction definition for %test_code that takes and returns a %v4f32.
3499 // Boilerplate IDs are prefixed with "BP_" to avoid collisions with fragments.
3500 //
3501 // It roughly corresponds to the following GLSL.
3502 //
3503 // #version 450
3504 // layout(vertices = 3) out;
3505 // layout(location = 1) in vec4 in_color[];
3506 // layout(location = 1) out vec4 out_color[];
3507 //
3508 // void main() {
3509 //   out_color[gl_InvocationID] = testfun(in_color[gl_InvocationID]);
3510 //   gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;
3511 //   if (gl_InvocationID == 0) {
3512 //     gl_TessLevelOuter[0] = 1.0;
3513 //     gl_TessLevelOuter[1] = 1.0;
3514 //     gl_TessLevelOuter[2] = 1.0;
3515 //     gl_TessLevelInner[0] = 1.0;
3516 //   }
3517 // }
makeTessControlShaderAssembly(const map<string,string> & fragments)3518 string makeTessControlShaderAssembly (const map<string, string>& fragments)
3519 {
3520 	static const char tessControlShaderBoilerplate[] =
3521 		"OpCapability Tessellation\n"
3522 		"OpCapability ClipDistance\n"
3523 		"OpCapability CullDistance\n"
3524 		"OpMemoryModel Logical GLSL450\n"
3525 		"OpEntryPoint TessellationControl %BP_main \"main\" %BP_out_color %BP_gl_InvocationID %BP_in_color %BP_gl_out %BP_gl_in %BP_gl_TessLevelOuter %BP_gl_TessLevelInner\n"
3526 		"OpExecutionMode %BP_main OutputVertices 3\n"
3527 		"${debug:opt}\n"
3528 		"OpName %BP_main \"main\"\n"
3529 		"OpName %test_code \"testfun(vf4;\"\n"
3530 		"OpName %BP_out_color \"out_color\"\n"
3531 		"OpName %BP_gl_InvocationID \"gl_InvocationID\"\n"
3532 		"OpName %BP_in_color \"in_color\"\n"
3533 		"OpName %BP_gl_PerVertex \"gl_PerVertex\"\n"
3534 		"OpMemberName %BP_gl_PerVertex 0 \"gl_Position\"\n"
3535 		"OpMemberName %BP_gl_PerVertex 1 \"gl_PointSize\"\n"
3536 		"OpMemberName %BP_gl_PerVertex 2 \"gl_ClipDistance\"\n"
3537 		"OpMemberName %BP_gl_PerVertex 3 \"gl_CullDistance\"\n"
3538 		"OpName %BP_gl_out \"gl_out\"\n"
3539 		"OpName %BP_gl_PVOut \"gl_PerVertex\"\n"
3540 		"OpMemberName %BP_gl_PVOut 0 \"gl_Position\"\n"
3541 		"OpMemberName %BP_gl_PVOut 1 \"gl_PointSize\"\n"
3542 		"OpMemberName %BP_gl_PVOut 2 \"gl_ClipDistance\"\n"
3543 		"OpMemberName %BP_gl_PVOut 3 \"gl_CullDistance\"\n"
3544 		"OpName %BP_gl_in \"gl_in\"\n"
3545 		"OpName %BP_gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
3546 		"OpName %BP_gl_TessLevelInner \"gl_TessLevelInner\"\n"
3547 		"OpDecorate %BP_out_color Location 1\n"
3548 		"OpDecorate %BP_gl_InvocationID BuiltIn InvocationId\n"
3549 		"OpDecorate %BP_in_color Location 1\n"
3550 		"OpMemberDecorate %BP_gl_PerVertex 0 BuiltIn Position\n"
3551 		"OpMemberDecorate %BP_gl_PerVertex 1 BuiltIn PointSize\n"
3552 		"OpMemberDecorate %BP_gl_PerVertex 2 BuiltIn ClipDistance\n"
3553 		"OpMemberDecorate %BP_gl_PerVertex 3 BuiltIn CullDistance\n"
3554 		"OpDecorate %BP_gl_PerVertex Block\n"
3555 		"OpMemberDecorate %BP_gl_PVOut 0 BuiltIn Position\n"
3556 		"OpMemberDecorate %BP_gl_PVOut 1 BuiltIn PointSize\n"
3557 		"OpMemberDecorate %BP_gl_PVOut 2 BuiltIn ClipDistance\n"
3558 		"OpMemberDecorate %BP_gl_PVOut 3 BuiltIn CullDistance\n"
3559 		"OpDecorate %BP_gl_PVOut Block\n"
3560 		"OpDecorate %BP_gl_TessLevelOuter Patch\n"
3561 		"OpDecorate %BP_gl_TessLevelOuter BuiltIn TessLevelOuter\n"
3562 		"OpDecorate %BP_gl_TessLevelInner Patch\n"
3563 		"OpDecorate %BP_gl_TessLevelInner BuiltIn TessLevelInner\n"
3564 		"${decoration:opt}\n"
3565 		SPIRV_ASSEMBLY_TYPES
3566 		SPIRV_ASSEMBLY_CONSTANTS
3567 		SPIRV_ASSEMBLY_ARRAYS
3568 		"%BP_out_color = OpVariable %op_a3v4f32 Output\n"
3569 		"%BP_gl_InvocationID = OpVariable %ip_i32 Input\n"
3570 		"%BP_in_color = OpVariable %ip_a32v4f32 Input\n"
3571 		"%BP_gl_PerVertex = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3572 		"%BP_a3_gl_PerVertex = OpTypeArray %BP_gl_PerVertex %c_u32_3\n"
3573 		"%BP_op_a3_gl_PerVertex = OpTypePointer Output %BP_a3_gl_PerVertex\n"
3574 		"%BP_gl_out = OpVariable %BP_op_a3_gl_PerVertex Output\n"
3575 		"%BP_gl_PVOut = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3576 		"%BP_a32_gl_PVOut = OpTypeArray %BP_gl_PVOut %c_u32_32\n"
3577 		"%BP_ip_a32_gl_PVOut = OpTypePointer Input %BP_a32_gl_PVOut\n"
3578 		"%BP_gl_in = OpVariable %BP_ip_a32_gl_PVOut Input\n"
3579 		"%BP_gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
3580 		"%BP_gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
3581 		"${pre_main:opt}\n"
3582 
3583 		"%BP_main = OpFunction %void None %fun\n"
3584 		"%BP_label = OpLabel\n"
3585 
3586 		"%BP_gl_Invoc = OpLoad %i32 %BP_gl_InvocationID\n"
3587 
3588 		"%BP_in_col_loc = OpAccessChain %ip_v4f32 %BP_in_color %BP_gl_Invoc\n"
3589 		"%BP_out_col_loc = OpAccessChain %op_v4f32 %BP_out_color %BP_gl_Invoc\n"
3590 		"%BP_in_col_val = OpLoad %v4f32 %BP_in_col_loc\n"
3591 		"%BP_clr_transformed = OpFunctionCall %v4f32 %test_code %BP_in_col_val\n"
3592 		"OpStore %BP_out_col_loc %BP_clr_transformed\n"
3593 
3594 		"%BP_in_pos_loc = OpAccessChain %ip_v4f32 %BP_gl_in %BP_gl_Invoc %c_i32_0\n"
3595 		"%BP_out_pos_loc = OpAccessChain %op_v4f32 %BP_gl_out %BP_gl_Invoc %c_i32_0\n"
3596 		"%BP_in_pos_val = OpLoad %v4f32 %BP_in_pos_loc\n"
3597 		"OpStore %BP_out_pos_loc %BP_in_pos_val\n"
3598 
3599 		"%BP_cmp = OpIEqual %bool %BP_gl_Invoc %c_i32_0\n"
3600 		"OpSelectionMerge %BP_merge_label None\n"
3601 		"OpBranchConditional %BP_cmp %BP_if_label %BP_merge_label\n"
3602 		"%BP_if_label = OpLabel\n"
3603 		"%BP_gl_TessLevelOuterPos_0 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_0\n"
3604 		"%BP_gl_TessLevelOuterPos_1 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_1\n"
3605 		"%BP_gl_TessLevelOuterPos_2 = OpAccessChain %op_f32 %BP_gl_TessLevelOuter %c_i32_2\n"
3606 		"%BP_gl_TessLevelInnerPos_0 = OpAccessChain %op_f32 %BP_gl_TessLevelInner %c_i32_0\n"
3607 		"OpStore %BP_gl_TessLevelOuterPos_0 %c_f32_1\n"
3608 		"OpStore %BP_gl_TessLevelOuterPos_1 %c_f32_1\n"
3609 		"OpStore %BP_gl_TessLevelOuterPos_2 %c_f32_1\n"
3610 		"OpStore %BP_gl_TessLevelInnerPos_0 %c_f32_1\n"
3611 		"OpBranch %BP_merge_label\n"
3612 		"%BP_merge_label = OpLabel\n"
3613 		"OpReturn\n"
3614 		"OpFunctionEnd\n"
3615 		"${testfun}\n";
3616 	return tcu::StringTemplate(tessControlShaderBoilerplate).specialize(fragments);
3617 }
3618 
3619 // Creates tess-evaluation-shader assembly by specializing a boilerplate
3620 // StringTemplate on fragments, which must (at least) map "testfun" to an
3621 // OpFunction definition for %test_code that takes and returns a %v4f32.
3622 // Boilerplate IDs are prefixed with "BP_" to avoid collisions with fragments.
3623 //
3624 // It roughly corresponds to the following glsl.
3625 //
3626 // #version 450
3627 //
3628 // layout(triangles, equal_spacing, ccw) in;
3629 // layout(location = 1) in vec4 in_color[];
3630 // layout(location = 1) out vec4 out_color;
3631 //
3632 // #define interpolate(val)
3633 //   vec4(gl_TessCoord.x) * val[0] + vec4(gl_TessCoord.y) * val[1] +
3634 //          vec4(gl_TessCoord.z) * val[2]
3635 //
3636 // void main() {
3637 //   gl_Position = vec4(gl_TessCoord.x) * gl_in[0].gl_Position +
3638 //                  vec4(gl_TessCoord.y) * gl_in[1].gl_Position +
3639 //                  vec4(gl_TessCoord.z) * gl_in[2].gl_Position;
3640 //   out_color = testfun(interpolate(in_color));
3641 // }
makeTessEvalShaderAssembly(const map<string,string> & fragments)3642 string makeTessEvalShaderAssembly(const map<string, string>& fragments)
3643 {
3644 	static const char tessEvalBoilerplate[] =
3645 		"OpCapability Tessellation\n"
3646 		"OpCapability ClipDistance\n"
3647 		"OpCapability CullDistance\n"
3648 		"OpMemoryModel Logical GLSL450\n"
3649 		"OpEntryPoint TessellationEvaluation %BP_main \"main\" %BP_stream %BP_gl_TessCoord %BP_gl_in %BP_out_color %BP_in_color\n"
3650 		"OpExecutionMode %BP_main Triangles\n"
3651 		"OpExecutionMode %BP_main SpacingEqual\n"
3652 		"OpExecutionMode %BP_main VertexOrderCcw\n"
3653 		"${debug:opt}\n"
3654 		"OpName %BP_main \"main\"\n"
3655 		"OpName %test_code \"testfun(vf4;\"\n"
3656 		"OpName %BP_gl_PerVertexOut \"gl_PerVertex\"\n"
3657 		"OpMemberName %BP_gl_PerVertexOut 0 \"gl_Position\"\n"
3658 		"OpMemberName %BP_gl_PerVertexOut 1 \"gl_PointSize\"\n"
3659 		"OpMemberName %BP_gl_PerVertexOut 2 \"gl_ClipDistance\"\n"
3660 		"OpMemberName %BP_gl_PerVertexOut 3 \"gl_CullDistance\"\n"
3661 		"OpName %BP_stream \"\"\n"
3662 		"OpName %BP_gl_TessCoord \"gl_TessCoord\"\n"
3663 		"OpName %BP_gl_PerVertexIn \"gl_PerVertex\"\n"
3664 		"OpMemberName %BP_gl_PerVertexIn 0 \"gl_Position\"\n"
3665 		"OpMemberName %BP_gl_PerVertexIn 1 \"gl_PointSize\"\n"
3666 		"OpMemberName %BP_gl_PerVertexIn 2 \"gl_ClipDistance\"\n"
3667 		"OpMemberName %BP_gl_PerVertexIn 3 \"gl_CullDistance\"\n"
3668 		"OpName %BP_gl_in \"gl_in\"\n"
3669 		"OpName %BP_out_color \"out_color\"\n"
3670 		"OpName %BP_in_color \"in_color\"\n"
3671 		"OpMemberDecorate %BP_gl_PerVertexOut 0 BuiltIn Position\n"
3672 		"OpMemberDecorate %BP_gl_PerVertexOut 1 BuiltIn PointSize\n"
3673 		"OpMemberDecorate %BP_gl_PerVertexOut 2 BuiltIn ClipDistance\n"
3674 		"OpMemberDecorate %BP_gl_PerVertexOut 3 BuiltIn CullDistance\n"
3675 		"OpDecorate %BP_gl_PerVertexOut Block\n"
3676 		"OpDecorate %BP_gl_TessCoord BuiltIn TessCoord\n"
3677 		"OpMemberDecorate %BP_gl_PerVertexIn 0 BuiltIn Position\n"
3678 		"OpMemberDecorate %BP_gl_PerVertexIn 1 BuiltIn PointSize\n"
3679 		"OpMemberDecorate %BP_gl_PerVertexIn 2 BuiltIn ClipDistance\n"
3680 		"OpMemberDecorate %BP_gl_PerVertexIn 3 BuiltIn CullDistance\n"
3681 		"OpDecorate %BP_gl_PerVertexIn Block\n"
3682 		"OpDecorate %BP_out_color Location 1\n"
3683 		"OpDecorate %BP_in_color Location 1\n"
3684 		"${decoration:opt}\n"
3685 		SPIRV_ASSEMBLY_TYPES
3686 		SPIRV_ASSEMBLY_CONSTANTS
3687 		SPIRV_ASSEMBLY_ARRAYS
3688 		"%BP_gl_PerVertexOut = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3689 		"%BP_op_gl_PerVertexOut = OpTypePointer Output %BP_gl_PerVertexOut\n"
3690 		"%BP_stream = OpVariable %BP_op_gl_PerVertexOut Output\n"
3691 		"%BP_gl_TessCoord = OpVariable %ip_v3f32 Input\n"
3692 		"%BP_gl_PerVertexIn = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3693 		"%BP_a32_gl_PerVertexIn = OpTypeArray %BP_gl_PerVertexIn %c_u32_32\n"
3694 		"%BP_ip_a32_gl_PerVertexIn = OpTypePointer Input %BP_a32_gl_PerVertexIn\n"
3695 		"%BP_gl_in = OpVariable %BP_ip_a32_gl_PerVertexIn Input\n"
3696 		"%BP_out_color = OpVariable %op_v4f32 Output\n"
3697 		"%BP_in_color = OpVariable %ip_a32v4f32 Input\n"
3698 		"${pre_main:opt}\n"
3699 		"%BP_main = OpFunction %void None %fun\n"
3700 		"%BP_label = OpLabel\n"
3701 		"%BP_gl_TC_0 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_0\n"
3702 		"%BP_gl_TC_1 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_1\n"
3703 		"%BP_gl_TC_2 = OpAccessChain %ip_f32 %BP_gl_TessCoord %c_u32_2\n"
3704 		"%BP_gl_in_gl_Pos_0 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_0 %c_i32_0\n"
3705 		"%BP_gl_in_gl_Pos_1 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_1 %c_i32_0\n"
3706 		"%BP_gl_in_gl_Pos_2 = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_2 %c_i32_0\n"
3707 
3708 		"%BP_gl_OPos = OpAccessChain %op_v4f32 %BP_stream %c_i32_0\n"
3709 		"%BP_in_color_0 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_0\n"
3710 		"%BP_in_color_1 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_1\n"
3711 		"%BP_in_color_2 = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_2\n"
3712 
3713 		"%BP_TC_W_0 = OpLoad %f32 %BP_gl_TC_0\n"
3714 		"%BP_TC_W_1 = OpLoad %f32 %BP_gl_TC_1\n"
3715 		"%BP_TC_W_2 = OpLoad %f32 %BP_gl_TC_2\n"
3716 		"%BP_v4f32_TC_0 = OpCompositeConstruct %v4f32 %BP_TC_W_0 %BP_TC_W_0 %BP_TC_W_0 %BP_TC_W_0\n"
3717 		"%BP_v4f32_TC_1 = OpCompositeConstruct %v4f32 %BP_TC_W_1 %BP_TC_W_1 %BP_TC_W_1 %BP_TC_W_1\n"
3718 		"%BP_v4f32_TC_2 = OpCompositeConstruct %v4f32 %BP_TC_W_2 %BP_TC_W_2 %BP_TC_W_2 %BP_TC_W_2\n"
3719 
3720 		"%BP_gl_IP_0 = OpLoad %v4f32 %BP_gl_in_gl_Pos_0\n"
3721 		"%BP_gl_IP_1 = OpLoad %v4f32 %BP_gl_in_gl_Pos_1\n"
3722 		"%BP_gl_IP_2 = OpLoad %v4f32 %BP_gl_in_gl_Pos_2\n"
3723 
3724 		"%BP_IP_W_0 = OpFMul %v4f32 %BP_v4f32_TC_0 %BP_gl_IP_0\n"
3725 		"%BP_IP_W_1 = OpFMul %v4f32 %BP_v4f32_TC_1 %BP_gl_IP_1\n"
3726 		"%BP_IP_W_2 = OpFMul %v4f32 %BP_v4f32_TC_2 %BP_gl_IP_2\n"
3727 
3728 		"%BP_pos_sum_0 = OpFAdd %v4f32 %BP_IP_W_0 %BP_IP_W_1\n"
3729 		"%BP_pos_sum_1 = OpFAdd %v4f32 %BP_pos_sum_0 %BP_IP_W_2\n"
3730 
3731 		"OpStore %BP_gl_OPos %BP_pos_sum_1\n"
3732 
3733 		"%BP_IC_0 = OpLoad %v4f32 %BP_in_color_0\n"
3734 		"%BP_IC_1 = OpLoad %v4f32 %BP_in_color_1\n"
3735 		"%BP_IC_2 = OpLoad %v4f32 %BP_in_color_2\n"
3736 
3737 		"%BP_IC_W_0 = OpFMul %v4f32 %BP_v4f32_TC_0 %BP_IC_0\n"
3738 		"%BP_IC_W_1 = OpFMul %v4f32 %BP_v4f32_TC_1 %BP_IC_1\n"
3739 		"%BP_IC_W_2 = OpFMul %v4f32 %BP_v4f32_TC_2 %BP_IC_2\n"
3740 
3741 		"%BP_col_sum_0 = OpFAdd %v4f32 %BP_IC_W_0 %BP_IC_W_1\n"
3742 		"%BP_col_sum_1 = OpFAdd %v4f32 %BP_col_sum_0 %BP_IC_W_2\n"
3743 
3744 		"%BP_clr_transformed = OpFunctionCall %v4f32 %test_code %BP_col_sum_1\n"
3745 
3746 		"OpStore %BP_out_color %BP_clr_transformed\n"
3747 		"OpReturn\n"
3748 		"OpFunctionEnd\n"
3749 		"${testfun}\n";
3750 	return tcu::StringTemplate(tessEvalBoilerplate).specialize(fragments);
3751 }
3752 
3753 // Creates geometry-shader assembly by specializing a boilerplate StringTemplate
3754 // on fragments, which must (at least) map "testfun" to an OpFunction definition
3755 // for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3756 // with "BP_" to avoid collisions with fragments.
3757 //
3758 // Derived from this GLSL:
3759 //
3760 // #version 450
3761 // layout(triangles) in;
3762 // layout(triangle_strip, max_vertices = 3) out;
3763 //
3764 // layout(location = 1) in vec4 in_color[];
3765 // layout(location = 1) out vec4 out_color;
3766 //
3767 // void main() {
3768 //   gl_Position = gl_in[0].gl_Position;
3769 //   out_color = test_fun(in_color[0]);
3770 //   EmitVertex();
3771 //   gl_Position = gl_in[1].gl_Position;
3772 //   out_color = test_fun(in_color[1]);
3773 //   EmitVertex();
3774 //   gl_Position = gl_in[2].gl_Position;
3775 //   out_color = test_fun(in_color[2]);
3776 //   EmitVertex();
3777 //   EndPrimitive();
3778 // }
makeGeometryShaderAssembly(const map<string,string> & fragments)3779 string makeGeometryShaderAssembly(const map<string, string>& fragments)
3780 {
3781 	static const char geometryShaderBoilerplate[] =
3782 		"OpCapability Geometry\n"
3783 		"OpCapability ClipDistance\n"
3784 		"OpCapability CullDistance\n"
3785 		"OpMemoryModel Logical GLSL450\n"
3786 		"OpEntryPoint Geometry %BP_main \"main\" %BP_out_gl_position %BP_gl_in %BP_out_color %BP_in_color\n"
3787 		"OpExecutionMode %BP_main Triangles\n"
3788 		"OpExecutionMode %BP_main OutputTriangleStrip\n"
3789 		"OpExecutionMode %BP_main OutputVertices 3\n"
3790 		"${debug:opt}\n"
3791 		"OpName %BP_main \"main\"\n"
3792 		"OpName %BP_per_vertex_in \"gl_PerVertex\"\n"
3793 		"OpMemberName %BP_per_vertex_in 0 \"gl_Position\"\n"
3794 		"OpMemberName %BP_per_vertex_in 1 \"gl_PointSize\"\n"
3795 		"OpMemberName %BP_per_vertex_in 2 \"gl_ClipDistance\"\n"
3796 		"OpMemberName %BP_per_vertex_in 3 \"gl_CullDistance\"\n"
3797 		"OpName %BP_gl_in \"gl_in\"\n"
3798 		"OpName %BP_out_color \"out_color\"\n"
3799 		"OpName %BP_in_color \"in_color\"\n"
3800 		"OpName %test_code \"testfun(vf4;\"\n"
3801 		"OpDecorate %BP_out_gl_position BuiltIn Position\n"
3802 		"OpMemberDecorate %BP_per_vertex_in 0 BuiltIn Position\n"
3803 		"OpMemberDecorate %BP_per_vertex_in 1 BuiltIn PointSize\n"
3804 		"OpMemberDecorate %BP_per_vertex_in 2 BuiltIn ClipDistance\n"
3805 		"OpMemberDecorate %BP_per_vertex_in 3 BuiltIn CullDistance\n"
3806 		"OpDecorate %BP_per_vertex_in Block\n"
3807 		"OpDecorate %BP_out_color Location 1\n"
3808 		"OpDecorate %BP_in_color Location 1\n"
3809 		"${decoration:opt}\n"
3810 		SPIRV_ASSEMBLY_TYPES
3811 		SPIRV_ASSEMBLY_CONSTANTS
3812 		SPIRV_ASSEMBLY_ARRAYS
3813 		"%BP_per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
3814 		"%BP_a3_per_vertex_in = OpTypeArray %BP_per_vertex_in %c_u32_3\n"
3815 		"%BP_ip_a3_per_vertex_in = OpTypePointer Input %BP_a3_per_vertex_in\n"
3816 
3817 		"%BP_gl_in = OpVariable %BP_ip_a3_per_vertex_in Input\n"
3818 		"%BP_out_color = OpVariable %op_v4f32 Output\n"
3819 		"%BP_in_color = OpVariable %ip_a3v4f32 Input\n"
3820 		"%BP_out_gl_position = OpVariable %op_v4f32 Output\n"
3821 		"${pre_main:opt}\n"
3822 
3823 		"%BP_main = OpFunction %void None %fun\n"
3824 		"%BP_label = OpLabel\n"
3825 		"%BP_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_0 %c_i32_0\n"
3826 		"%BP_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_1 %c_i32_0\n"
3827 		"%BP_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %BP_gl_in %c_i32_2 %c_i32_0\n"
3828 
3829 		"%BP_in_position_0 = OpLoad %v4f32 %BP_gl_in_0_gl_position\n"
3830 		"%BP_in_position_1 = OpLoad %v4f32 %BP_gl_in_1_gl_position\n"
3831 		"%BP_in_position_2 = OpLoad %v4f32 %BP_gl_in_2_gl_position \n"
3832 
3833 		"%BP_in_color_0_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_0\n"
3834 		"%BP_in_color_1_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_1\n"
3835 		"%BP_in_color_2_ptr = OpAccessChain %ip_v4f32 %BP_in_color %c_i32_2\n"
3836 
3837 		"%BP_in_color_0 = OpLoad %v4f32 %BP_in_color_0_ptr\n"
3838 		"%BP_in_color_1 = OpLoad %v4f32 %BP_in_color_1_ptr\n"
3839 		"%BP_in_color_2 = OpLoad %v4f32 %BP_in_color_2_ptr\n"
3840 
3841 		"%BP_transformed_in_color_0 = OpFunctionCall %v4f32 %test_code %BP_in_color_0\n"
3842 		"%BP_transformed_in_color_1 = OpFunctionCall %v4f32 %test_code %BP_in_color_1\n"
3843 		"%BP_transformed_in_color_2 = OpFunctionCall %v4f32 %test_code %BP_in_color_2\n"
3844 
3845 
3846 		"OpStore %BP_out_gl_position %BP_in_position_0\n"
3847 		"OpStore %BP_out_color %BP_transformed_in_color_0\n"
3848 		"OpEmitVertex\n"
3849 
3850 		"OpStore %BP_out_gl_position %BP_in_position_1\n"
3851 		"OpStore %BP_out_color %BP_transformed_in_color_1\n"
3852 		"OpEmitVertex\n"
3853 
3854 		"OpStore %BP_out_gl_position %BP_in_position_2\n"
3855 		"OpStore %BP_out_color %BP_transformed_in_color_2\n"
3856 		"OpEmitVertex\n"
3857 
3858 		"OpEndPrimitive\n"
3859 		"OpReturn\n"
3860 		"OpFunctionEnd\n"
3861 		"${testfun}\n";
3862 	return tcu::StringTemplate(geometryShaderBoilerplate).specialize(fragments);
3863 }
3864 
3865 // Creates fragment-shader assembly by specializing a boilerplate StringTemplate
3866 // on fragments, which must (at least) map "testfun" to an OpFunction definition
3867 // for %test_code that takes and returns a %v4f32.  Boilerplate IDs are prefixed
3868 // with "BP_" to avoid collisions with fragments.
3869 //
3870 // Derived from this GLSL:
3871 //
3872 // layout(location = 1) in highp vec4 vtxColor;
3873 // layout(location = 0) out highp vec4 fragColor;
3874 // highp vec4 testfun(highp vec4 x) { return x; }
3875 // void main(void) { fragColor = testfun(vtxColor); }
3876 //
3877 // with modifications including passing vtxColor by value and ripping out
3878 // testfun() definition.
makeFragmentShaderAssembly(const map<string,string> & fragments)3879 string makeFragmentShaderAssembly(const map<string, string>& fragments)
3880 {
3881 	static const char fragmentShaderBoilerplate[] =
3882 		"OpCapability Shader\n"
3883 		"OpMemoryModel Logical GLSL450\n"
3884 		"OpEntryPoint Fragment %BP_main \"main\" %BP_vtxColor %BP_fragColor\n"
3885 		"OpExecutionMode %BP_main OriginUpperLeft\n"
3886 		"${debug:opt}\n"
3887 		"OpName %BP_main \"main\"\n"
3888 		"OpName %BP_fragColor \"fragColor\"\n"
3889 		"OpName %BP_vtxColor \"vtxColor\"\n"
3890 		"OpName %test_code \"testfun(vf4;\"\n"
3891 		"OpDecorate %BP_fragColor Location 0\n"
3892 		"OpDecorate %BP_vtxColor Location 1\n"
3893 		"${decoration:opt}\n"
3894 		SPIRV_ASSEMBLY_TYPES
3895 		SPIRV_ASSEMBLY_CONSTANTS
3896 		SPIRV_ASSEMBLY_ARRAYS
3897 		"%BP_fragColor = OpVariable %op_v4f32 Output\n"
3898 		"%BP_vtxColor = OpVariable %ip_v4f32 Input\n"
3899 		"${pre_main:opt}\n"
3900 		"%BP_main = OpFunction %void None %fun\n"
3901 		"%BP_label_main = OpLabel\n"
3902 		"%BP_tmp1 = OpLoad %v4f32 %BP_vtxColor\n"
3903 		"%BP_tmp2 = OpFunctionCall %v4f32 %test_code %BP_tmp1\n"
3904 		"OpStore %BP_fragColor %BP_tmp2\n"
3905 		"OpReturn\n"
3906 		"OpFunctionEnd\n"
3907 		"${testfun}\n";
3908 	return tcu::StringTemplate(fragmentShaderBoilerplate).specialize(fragments);
3909 }
3910 
3911 // Creates fragments that specialize into a simple pass-through shader (of any kind).
passthruFragments(void)3912 map<string, string> passthruFragments(void)
3913 {
3914 	map<string, string> fragments;
3915 	fragments["testfun"] =
3916 		// A %test_code function that returns its argument unchanged.
3917 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
3918 		"%param1 = OpFunctionParameter %v4f32\n"
3919 		"%label_testfun = OpLabel\n"
3920 		"OpReturnValue %param1\n"
3921 		"OpFunctionEnd\n";
3922 	return fragments;
3923 }
3924 
3925 // Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3926 // Vertex shader gets custom code from context, the rest are pass-through.
addShaderCodeCustomVertex(vk::SourceCollections & dst,InstanceContext context)3927 void addShaderCodeCustomVertex(vk::SourceCollections& dst, InstanceContext context)
3928 {
3929 	map<string, string> passthru = passthruFragments();
3930 	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(context.testCodeFragments);
3931 	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3932 }
3933 
3934 // Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3935 // Tessellation control shader gets custom code from context, the rest are
3936 // pass-through.
addShaderCodeCustomTessControl(vk::SourceCollections & dst,InstanceContext context)3937 void addShaderCodeCustomTessControl(vk::SourceCollections& dst, InstanceContext context)
3938 {
3939 	map<string, string> passthru = passthruFragments();
3940 	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3941 	dst.spirvAsmSources.add("tessc") << makeTessControlShaderAssembly(context.testCodeFragments);
3942 	dst.spirvAsmSources.add("tesse") << makeTessEvalShaderAssembly(passthru);
3943 	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3944 }
3945 
3946 // Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3947 // Tessellation evaluation shader gets custom code from context, the rest are
3948 // pass-through.
addShaderCodeCustomTessEval(vk::SourceCollections & dst,InstanceContext context)3949 void addShaderCodeCustomTessEval(vk::SourceCollections& dst, InstanceContext context)
3950 {
3951 	map<string, string> passthru = passthruFragments();
3952 	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3953 	dst.spirvAsmSources.add("tessc") << makeTessControlShaderAssembly(passthru);
3954 	dst.spirvAsmSources.add("tesse") << makeTessEvalShaderAssembly(context.testCodeFragments);
3955 	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3956 }
3957 
3958 // Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3959 // Geometry shader gets custom code from context, the rest are pass-through.
addShaderCodeCustomGeometry(vk::SourceCollections & dst,InstanceContext context)3960 void addShaderCodeCustomGeometry(vk::SourceCollections& dst, InstanceContext context)
3961 {
3962 	map<string, string> passthru = passthruFragments();
3963 	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3964 	dst.spirvAsmSources.add("geom") << makeGeometryShaderAssembly(context.testCodeFragments);
3965 	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(passthru);
3966 }
3967 
3968 // Adds shader assembly text to dst.spirvAsmSources for all shader kinds.
3969 // Fragment shader gets custom code from context, the rest are pass-through.
addShaderCodeCustomFragment(vk::SourceCollections & dst,InstanceContext context)3970 void addShaderCodeCustomFragment(vk::SourceCollections& dst, InstanceContext context)
3971 {
3972 	map<string, string> passthru = passthruFragments();
3973 	dst.spirvAsmSources.add("vert") << makeVertexShaderAssembly(passthru);
3974 	dst.spirvAsmSources.add("frag") << makeFragmentShaderAssembly(context.testCodeFragments);
3975 }
3976 
createCombinedModule(vk::SourceCollections & dst,InstanceContext)3977 void createCombinedModule(vk::SourceCollections& dst, InstanceContext)
3978 {
3979 	// \todo [2015-12-07 awoloszyn] Make tessellation / geometry conditional
3980 	// \todo [2015-12-07 awoloszyn] Remove OpName and OpMemberName at some point
3981 	dst.spirvAsmSources.add("module") <<
3982 		"OpCapability Shader\n"
3983 		"OpCapability ClipDistance\n"
3984 		"OpCapability CullDistance\n"
3985 		"OpCapability Geometry\n"
3986 		"OpCapability Tessellation\n"
3987 		"OpMemoryModel Logical GLSL450\n"
3988 
3989 		"OpEntryPoint Vertex %vert_main \"main\" %vert_Position %vert_vtxColor %vert_color %vert_vtxPosition %vert_vertex_id %vert_instance_id\n"
3990 		"OpEntryPoint Geometry %geom_main \"main\" %geom_out_gl_position %geom_gl_in %geom_out_color %geom_in_color\n"
3991 		"OpEntryPoint TessellationControl %tessc_main \"main\" %tessc_out_color %tessc_gl_InvocationID %tessc_in_color %tessc_out_position %tessc_in_position %tessc_gl_TessLevelOuter %tessc_gl_TessLevelInner\n"
3992 		"OpEntryPoint TessellationEvaluation %tesse_main \"main\" %tesse_stream %tesse_gl_tessCoord %tesse_in_position %tesse_out_color %tesse_in_color \n"
3993 		"OpEntryPoint Fragment %frag_main \"main\" %frag_vtxColor %frag_fragColor\n"
3994 
3995 		"OpExecutionMode %geom_main Triangles\n"
3996 		"OpExecutionMode %geom_main OutputTriangleStrip\n"
3997 		"OpExecutionMode %geom_main OutputVertices 3\n"
3998 
3999 		"OpExecutionMode %tessc_main OutputVertices 3\n"
4000 
4001 		"OpExecutionMode %tesse_main Triangles\n"
4002 
4003 		"OpExecutionMode %frag_main OriginUpperLeft\n"
4004 
4005 		"OpName %vert_main \"main\"\n"
4006 		"OpName %vert_vtxPosition \"vtxPosition\"\n"
4007 		"OpName %vert_Position \"position\"\n"
4008 		"OpName %vert_vtxColor \"vtxColor\"\n"
4009 		"OpName %vert_color \"color\"\n"
4010 		"OpName %vert_vertex_id \"gl_VertexIndex\"\n"
4011 		"OpName %vert_instance_id \"gl_InstanceIndex\"\n"
4012 		"OpName %geom_main \"main\"\n"
4013 		"OpName %geom_per_vertex_in \"gl_PerVertex\"\n"
4014 		"OpMemberName %geom_per_vertex_in 0 \"gl_Position\"\n"
4015 		"OpMemberName %geom_per_vertex_in 1 \"gl_PointSize\"\n"
4016 		"OpMemberName %geom_per_vertex_in 2 \"gl_ClipDistance\"\n"
4017 		"OpMemberName %geom_per_vertex_in 3 \"gl_CullDistance\"\n"
4018 		"OpName %geom_gl_in \"gl_in\"\n"
4019 		"OpName %geom_out_color \"out_color\"\n"
4020 		"OpName %geom_in_color \"in_color\"\n"
4021 		"OpName %tessc_main \"main\"\n"
4022 		"OpName %tessc_out_color \"out_color\"\n"
4023 		"OpName %tessc_gl_InvocationID \"gl_InvocationID\"\n"
4024 		"OpName %tessc_in_color \"in_color\"\n"
4025 		"OpName %tessc_out_position \"out_position\"\n"
4026 		"OpName %tessc_in_position \"in_position\"\n"
4027 		"OpName %tessc_gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
4028 		"OpName %tessc_gl_TessLevelInner \"gl_TessLevelInner\"\n"
4029 		"OpName %tesse_main \"main\"\n"
4030 		"OpName %tesse_per_vertex_out \"gl_PerVertex\"\n"
4031 		"OpMemberName %tesse_per_vertex_out 0 \"gl_Position\"\n"
4032 		"OpMemberName %tesse_per_vertex_out 1 \"gl_PointSize\"\n"
4033 		"OpMemberName %tesse_per_vertex_out 2 \"gl_ClipDistance\"\n"
4034 		"OpMemberName %tesse_per_vertex_out 3 \"gl_CullDistance\"\n"
4035 		"OpName %tesse_stream \"\"\n"
4036 		"OpName %tesse_gl_tessCoord \"gl_TessCoord\"\n"
4037 		"OpName %tesse_in_position \"in_position\"\n"
4038 		"OpName %tesse_out_color \"out_color\"\n"
4039 		"OpName %tesse_in_color \"in_color\"\n"
4040 		"OpName %frag_main \"main\"\n"
4041 		"OpName %frag_fragColor \"fragColor\"\n"
4042 		"OpName %frag_vtxColor \"vtxColor\"\n"
4043 
4044 		"; Vertex decorations\n"
4045 		"OpDecorate %vert_vtxPosition Location 2\n"
4046 		"OpDecorate %vert_Position Location 0\n"
4047 		"OpDecorate %vert_vtxColor Location 1\n"
4048 		"OpDecorate %vert_color Location 1\n"
4049 		"OpDecorate %vert_vertex_id BuiltIn VertexIndex\n"
4050 		"OpDecorate %vert_instance_id BuiltIn InstanceIndex\n"
4051 
4052 		"; Geometry decorations\n"
4053 		"OpDecorate %geom_out_gl_position BuiltIn Position\n"
4054 		"OpMemberDecorate %geom_per_vertex_in 0 BuiltIn Position\n"
4055 		"OpMemberDecorate %geom_per_vertex_in 1 BuiltIn PointSize\n"
4056 		"OpMemberDecorate %geom_per_vertex_in 2 BuiltIn ClipDistance\n"
4057 		"OpMemberDecorate %geom_per_vertex_in 3 BuiltIn CullDistance\n"
4058 		"OpDecorate %geom_per_vertex_in Block\n"
4059 		"OpDecorate %geom_out_color Location 1\n"
4060 		"OpDecorate %geom_in_color Location 1\n"
4061 
4062 		"; Tessellation Control decorations\n"
4063 		"OpDecorate %tessc_out_color Location 1\n"
4064 		"OpDecorate %tessc_gl_InvocationID BuiltIn InvocationId\n"
4065 		"OpDecorate %tessc_in_color Location 1\n"
4066 		"OpDecorate %tessc_out_position Location 2\n"
4067 		"OpDecorate %tessc_in_position Location 2\n"
4068 		"OpDecorate %tessc_gl_TessLevelOuter Patch\n"
4069 		"OpDecorate %tessc_gl_TessLevelOuter BuiltIn TessLevelOuter\n"
4070 		"OpDecorate %tessc_gl_TessLevelInner Patch\n"
4071 		"OpDecorate %tessc_gl_TessLevelInner BuiltIn TessLevelInner\n"
4072 
4073 		"; Tessellation Evaluation decorations\n"
4074 		"OpMemberDecorate %tesse_per_vertex_out 0 BuiltIn Position\n"
4075 		"OpMemberDecorate %tesse_per_vertex_out 1 BuiltIn PointSize\n"
4076 		"OpMemberDecorate %tesse_per_vertex_out 2 BuiltIn ClipDistance\n"
4077 		"OpMemberDecorate %tesse_per_vertex_out 3 BuiltIn CullDistance\n"
4078 		"OpDecorate %tesse_per_vertex_out Block\n"
4079 		"OpDecorate %tesse_gl_tessCoord BuiltIn TessCoord\n"
4080 		"OpDecorate %tesse_in_position Location 2\n"
4081 		"OpDecorate %tesse_out_color Location 1\n"
4082 		"OpDecorate %tesse_in_color Location 1\n"
4083 
4084 		"; Fragment decorations\n"
4085 		"OpDecorate %frag_fragColor Location 0\n"
4086 		"OpDecorate %frag_vtxColor Location 1\n"
4087 
4088 		SPIRV_ASSEMBLY_TYPES
4089 		SPIRV_ASSEMBLY_CONSTANTS
4090 		SPIRV_ASSEMBLY_ARRAYS
4091 
4092 		"; Vertex Variables\n"
4093 		"%vert_vtxPosition = OpVariable %op_v4f32 Output\n"
4094 		"%vert_Position = OpVariable %ip_v4f32 Input\n"
4095 		"%vert_vtxColor = OpVariable %op_v4f32 Output\n"
4096 		"%vert_color = OpVariable %ip_v4f32 Input\n"
4097 		"%vert_vertex_id = OpVariable %ip_i32 Input\n"
4098 		"%vert_instance_id = OpVariable %ip_i32 Input\n"
4099 
4100 		"; Geometry Variables\n"
4101 		"%geom_per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4102 		"%geom_a3_per_vertex_in = OpTypeArray %geom_per_vertex_in %c_u32_3\n"
4103 		"%geom_ip_a3_per_vertex_in = OpTypePointer Input %geom_a3_per_vertex_in\n"
4104 		"%geom_gl_in = OpVariable %geom_ip_a3_per_vertex_in Input\n"
4105 		"%geom_out_color = OpVariable %op_v4f32 Output\n"
4106 		"%geom_in_color = OpVariable %ip_a3v4f32 Input\n"
4107 		"%geom_out_gl_position = OpVariable %op_v4f32 Output\n"
4108 
4109 		"; Tessellation Control Variables\n"
4110 		"%tessc_out_color = OpVariable %op_a3v4f32 Output\n"
4111 		"%tessc_gl_InvocationID = OpVariable %ip_i32 Input\n"
4112 		"%tessc_in_color = OpVariable %ip_a32v4f32 Input\n"
4113 		"%tessc_out_position = OpVariable %op_a3v4f32 Output\n"
4114 		"%tessc_in_position = OpVariable %ip_a32v4f32 Input\n"
4115 		"%tessc_gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
4116 		"%tessc_gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
4117 
4118 		"; Tessellation Evaluation Decorations\n"
4119 		"%tesse_per_vertex_out = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4120 		"%tesse_op_per_vertex_out = OpTypePointer Output %tesse_per_vertex_out\n"
4121 		"%tesse_stream = OpVariable %tesse_op_per_vertex_out Output\n"
4122 		"%tesse_gl_tessCoord = OpVariable %ip_v3f32 Input\n"
4123 		"%tesse_in_position = OpVariable %ip_a32v4f32 Input\n"
4124 		"%tesse_out_color = OpVariable %op_v4f32 Output\n"
4125 		"%tesse_in_color = OpVariable %ip_a32v4f32 Input\n"
4126 
4127 		"; Fragment Variables\n"
4128 		"%frag_fragColor = OpVariable %op_v4f32 Output\n"
4129 		"%frag_vtxColor = OpVariable %ip_v4f32 Input\n"
4130 
4131 		"; Vertex Entry\n"
4132 		"%vert_main = OpFunction %void None %fun\n"
4133 		"%vert_label = OpLabel\n"
4134 		"%vert_tmp_position = OpLoad %v4f32 %vert_Position\n"
4135 		"OpStore %vert_vtxPosition %vert_tmp_position\n"
4136 		"%vert_tmp_color = OpLoad %v4f32 %vert_color\n"
4137 		"OpStore %vert_vtxColor %vert_tmp_color\n"
4138 		"OpReturn\n"
4139 		"OpFunctionEnd\n"
4140 
4141 		"; Geometry Entry\n"
4142 		"%geom_main = OpFunction %void None %fun\n"
4143 		"%geom_label = OpLabel\n"
4144 		"%geom_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_0 %c_i32_0\n"
4145 		"%geom_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_1 %c_i32_0\n"
4146 		"%geom_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %geom_gl_in %c_i32_2 %c_i32_0\n"
4147 		"%geom_in_position_0 = OpLoad %v4f32 %geom_gl_in_0_gl_position\n"
4148 		"%geom_in_position_1 = OpLoad %v4f32 %geom_gl_in_1_gl_position\n"
4149 		"%geom_in_position_2 = OpLoad %v4f32 %geom_gl_in_2_gl_position \n"
4150 		"%geom_in_color_0_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_0\n"
4151 		"%geom_in_color_1_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_1\n"
4152 		"%geom_in_color_2_ptr = OpAccessChain %ip_v4f32 %geom_in_color %c_i32_2\n"
4153 		"%geom_in_color_0 = OpLoad %v4f32 %geom_in_color_0_ptr\n"
4154 		"%geom_in_color_1 = OpLoad %v4f32 %geom_in_color_1_ptr\n"
4155 		"%geom_in_color_2 = OpLoad %v4f32 %geom_in_color_2_ptr\n"
4156 		"OpStore %geom_out_gl_position %geom_in_position_0\n"
4157 		"OpStore %geom_out_color %geom_in_color_0\n"
4158 		"OpEmitVertex\n"
4159 		"OpStore %geom_out_gl_position %geom_in_position_1\n"
4160 		"OpStore %geom_out_color %geom_in_color_1\n"
4161 		"OpEmitVertex\n"
4162 		"OpStore %geom_out_gl_position %geom_in_position_2\n"
4163 		"OpStore %geom_out_color %geom_in_color_2\n"
4164 		"OpEmitVertex\n"
4165 		"OpEndPrimitive\n"
4166 		"OpReturn\n"
4167 		"OpFunctionEnd\n"
4168 
4169 		"; Tessellation Control Entry\n"
4170 		"%tessc_main = OpFunction %void None %fun\n"
4171 		"%tessc_label = OpLabel\n"
4172 		"%tessc_invocation_id = OpLoad %i32 %tessc_gl_InvocationID\n"
4173 		"%tessc_in_color_ptr = OpAccessChain %ip_v4f32 %tessc_in_color %tessc_invocation_id\n"
4174 		"%tessc_in_position_ptr = OpAccessChain %ip_v4f32 %tessc_in_position %tessc_invocation_id\n"
4175 		"%tessc_in_color_val = OpLoad %v4f32 %tessc_in_color_ptr\n"
4176 		"%tessc_in_position_val = OpLoad %v4f32 %tessc_in_position_ptr\n"
4177 		"%tessc_out_color_ptr = OpAccessChain %op_v4f32 %tessc_out_color %tessc_invocation_id\n"
4178 		"%tessc_out_position_ptr = OpAccessChain %op_v4f32 %tessc_out_position %tessc_invocation_id\n"
4179 		"OpStore %tessc_out_color_ptr %tessc_in_color_val\n"
4180 		"OpStore %tessc_out_position_ptr %tessc_in_position_val\n"
4181 		"%tessc_is_first_invocation = OpIEqual %bool %tessc_invocation_id %c_i32_0\n"
4182 		"OpSelectionMerge %tessc_merge_label None\n"
4183 		"OpBranchConditional %tessc_is_first_invocation %tessc_first_invocation %tessc_merge_label\n"
4184 		"%tessc_first_invocation = OpLabel\n"
4185 		"%tessc_tess_outer_0 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_0\n"
4186 		"%tessc_tess_outer_1 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_1\n"
4187 		"%tessc_tess_outer_2 = OpAccessChain %op_f32 %tessc_gl_TessLevelOuter %c_i32_2\n"
4188 		"%tessc_tess_inner = OpAccessChain %op_f32 %tessc_gl_TessLevelInner %c_i32_0\n"
4189 		"OpStore %tessc_tess_outer_0 %c_f32_1\n"
4190 		"OpStore %tessc_tess_outer_1 %c_f32_1\n"
4191 		"OpStore %tessc_tess_outer_2 %c_f32_1\n"
4192 		"OpStore %tessc_tess_inner %c_f32_1\n"
4193 		"OpBranch %tessc_merge_label\n"
4194 		"%tessc_merge_label = OpLabel\n"
4195 		"OpReturn\n"
4196 		"OpFunctionEnd\n"
4197 
4198 		"; Tessellation Evaluation Entry\n"
4199 		"%tesse_main = OpFunction %void None %fun\n"
4200 		"%tesse_label = OpLabel\n"
4201 		"%tesse_tc_0_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_0\n"
4202 		"%tesse_tc_1_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_1\n"
4203 		"%tesse_tc_2_ptr = OpAccessChain %ip_f32 %tesse_gl_tessCoord %c_u32_2\n"
4204 		"%tesse_tc_0 = OpLoad %f32 %tesse_tc_0_ptr\n"
4205 		"%tesse_tc_1 = OpLoad %f32 %tesse_tc_1_ptr\n"
4206 		"%tesse_tc_2 = OpLoad %f32 %tesse_tc_2_ptr\n"
4207 		"%tesse_in_pos_0_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_0\n"
4208 		"%tesse_in_pos_1_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_1\n"
4209 		"%tesse_in_pos_2_ptr = OpAccessChain %ip_v4f32 %tesse_in_position %c_i32_2\n"
4210 		"%tesse_in_pos_0 = OpLoad %v4f32 %tesse_in_pos_0_ptr\n"
4211 		"%tesse_in_pos_1 = OpLoad %v4f32 %tesse_in_pos_1_ptr\n"
4212 		"%tesse_in_pos_2 = OpLoad %v4f32 %tesse_in_pos_2_ptr\n"
4213 		"%tesse_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_0 %tesse_in_pos_0\n"
4214 		"%tesse_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_1 %tesse_in_pos_1\n"
4215 		"%tesse_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_2 %tesse_in_pos_2\n"
4216 		"%tesse_out_pos_ptr = OpAccessChain %op_v4f32 %tesse_stream %c_i32_0\n"
4217 		"%tesse_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse_in_pos_0_weighted %tesse_in_pos_1_weighted\n"
4218 		"%tesse_computed_out = OpFAdd %v4f32 %tesse_in_pos_0_plus_pos_1 %tesse_in_pos_2_weighted\n"
4219 		"OpStore %tesse_out_pos_ptr %tesse_computed_out\n"
4220 		"%tesse_in_clr_0_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_0\n"
4221 		"%tesse_in_clr_1_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_1\n"
4222 		"%tesse_in_clr_2_ptr = OpAccessChain %ip_v4f32 %tesse_in_color %c_i32_2\n"
4223 		"%tesse_in_clr_0 = OpLoad %v4f32 %tesse_in_clr_0_ptr\n"
4224 		"%tesse_in_clr_1 = OpLoad %v4f32 %tesse_in_clr_1_ptr\n"
4225 		"%tesse_in_clr_2 = OpLoad %v4f32 %tesse_in_clr_2_ptr\n"
4226 		"%tesse_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_0 %tesse_in_clr_0\n"
4227 		"%tesse_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_1 %tesse_in_clr_1\n"
4228 		"%tesse_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse_tc_2 %tesse_in_clr_2\n"
4229 		"%tesse_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse_in_clr_0_weighted %tesse_in_clr_1_weighted\n"
4230 		"%tesse_computed_clr = OpFAdd %v4f32 %tesse_in_clr_0_plus_col_1 %tesse_in_clr_2_weighted\n"
4231 		"OpStore %tesse_out_color %tesse_computed_clr\n"
4232 		"OpReturn\n"
4233 		"OpFunctionEnd\n"
4234 
4235 		"; Fragment Entry\n"
4236 		"%frag_main = OpFunction %void None %fun\n"
4237 		"%frag_label_main = OpLabel\n"
4238 		"%frag_tmp1 = OpLoad %v4f32 %frag_vtxColor\n"
4239 		"OpStore %frag_fragColor %frag_tmp1\n"
4240 		"OpReturn\n"
4241 		"OpFunctionEnd\n";
4242 }
4243 
4244 // This has two shaders of each stage. The first
4245 // is a passthrough, the second inverts the color.
createMultipleEntries(vk::SourceCollections & dst,InstanceContext)4246 void createMultipleEntries(vk::SourceCollections& dst, InstanceContext)
4247 {
4248 	dst.spirvAsmSources.add("vert") <<
4249 	// This module contains 2 vertex shaders. One that is a passthrough
4250 	// and a second that inverts the color of the output (1.0 - color).
4251 		"OpCapability Shader\n"
4252 		"OpMemoryModel Logical GLSL450\n"
4253 		"OpEntryPoint Vertex %main \"vert1\" %Position %vtxColor %color %vtxPosition %vertex_id %instance_id\n"
4254 		"OpEntryPoint Vertex %main2 \"vert2\" %Position %vtxColor %color %vtxPosition %vertex_id %instance_id\n"
4255 
4256 		"OpName %main \"vert1\"\n"
4257 		"OpName %main2 \"vert2\"\n"
4258 		"OpName %vtxPosition \"vtxPosition\"\n"
4259 		"OpName %Position \"position\"\n"
4260 		"OpName %vtxColor \"vtxColor\"\n"
4261 		"OpName %color \"color\"\n"
4262 		"OpName %vertex_id \"gl_VertexIndex\"\n"
4263 		"OpName %instance_id \"gl_InstanceIndex\"\n"
4264 
4265 		"OpDecorate %vtxPosition Location 2\n"
4266 		"OpDecorate %Position Location 0\n"
4267 		"OpDecorate %vtxColor Location 1\n"
4268 		"OpDecorate %color Location 1\n"
4269 		"OpDecorate %vertex_id BuiltIn VertexIndex\n"
4270 		"OpDecorate %instance_id BuiltIn InstanceIndex\n"
4271 		SPIRV_ASSEMBLY_TYPES
4272 		SPIRV_ASSEMBLY_CONSTANTS
4273 		SPIRV_ASSEMBLY_ARRAYS
4274 		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4275 		"%vtxPosition = OpVariable %op_v4f32 Output\n"
4276 		"%Position = OpVariable %ip_v4f32 Input\n"
4277 		"%vtxColor = OpVariable %op_v4f32 Output\n"
4278 		"%color = OpVariable %ip_v4f32 Input\n"
4279 		"%vertex_id = OpVariable %ip_i32 Input\n"
4280 		"%instance_id = OpVariable %ip_i32 Input\n"
4281 
4282 		"%main = OpFunction %void None %fun\n"
4283 		"%label = OpLabel\n"
4284 		"%tmp_position = OpLoad %v4f32 %Position\n"
4285 		"OpStore %vtxPosition %tmp_position\n"
4286 		"%tmp_color = OpLoad %v4f32 %color\n"
4287 		"OpStore %vtxColor %tmp_color\n"
4288 		"OpReturn\n"
4289 		"OpFunctionEnd\n"
4290 
4291 		"%main2 = OpFunction %void None %fun\n"
4292 		"%label2 = OpLabel\n"
4293 		"%tmp_position2 = OpLoad %v4f32 %Position\n"
4294 		"OpStore %vtxPosition %tmp_position2\n"
4295 		"%tmp_color2 = OpLoad %v4f32 %color\n"
4296 		"%tmp_color3 = OpFSub %v4f32 %cval %tmp_color2\n"
4297 		"%tmp_color4 = OpVectorInsertDynamic %v4f32 %tmp_color3 %c_f32_1 %c_i32_3\n"
4298 		"OpStore %vtxColor %tmp_color4\n"
4299 		"OpReturn\n"
4300 		"OpFunctionEnd\n";
4301 
4302 	dst.spirvAsmSources.add("frag") <<
4303 		// This is a single module that contains 2 fragment shaders.
4304 		// One that passes color through and the other that inverts the output
4305 		// color (1.0 - color).
4306 		"OpCapability Shader\n"
4307 		"OpMemoryModel Logical GLSL450\n"
4308 		"OpEntryPoint Fragment %main \"frag1\" %vtxColor %fragColor\n"
4309 		"OpEntryPoint Fragment %main2 \"frag2\" %vtxColor %fragColor\n"
4310 		"OpExecutionMode %main OriginUpperLeft\n"
4311 		"OpExecutionMode %main2 OriginUpperLeft\n"
4312 
4313 		"OpName %main \"frag1\"\n"
4314 		"OpName %main2 \"frag2\"\n"
4315 		"OpName %fragColor \"fragColor\"\n"
4316 		"OpName %vtxColor \"vtxColor\"\n"
4317 		"OpDecorate %fragColor Location 0\n"
4318 		"OpDecorate %vtxColor Location 1\n"
4319 		SPIRV_ASSEMBLY_TYPES
4320 		SPIRV_ASSEMBLY_CONSTANTS
4321 		SPIRV_ASSEMBLY_ARRAYS
4322 		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4323 		"%fragColor = OpVariable %op_v4f32 Output\n"
4324 		"%vtxColor = OpVariable %ip_v4f32 Input\n"
4325 
4326 		"%main = OpFunction %void None %fun\n"
4327 		"%label_main = OpLabel\n"
4328 		"%tmp1 = OpLoad %v4f32 %vtxColor\n"
4329 		"OpStore %fragColor %tmp1\n"
4330 		"OpReturn\n"
4331 		"OpFunctionEnd\n"
4332 
4333 		"%main2 = OpFunction %void None %fun\n"
4334 		"%label_main2 = OpLabel\n"
4335 		"%tmp2 = OpLoad %v4f32 %vtxColor\n"
4336 		"%tmp3 = OpFSub %v4f32 %cval %tmp2\n"
4337 		"%tmp4 = OpVectorInsertDynamic %v4f32 %tmp3 %c_f32_1 %c_i32_3\n"
4338 		"OpStore %fragColor %tmp4\n"
4339 		"OpReturn\n"
4340 		"OpFunctionEnd\n";
4341 
4342 	dst.spirvAsmSources.add("geom") <<
4343 		"OpCapability Geometry\n"
4344 		"OpCapability ClipDistance\n"
4345 		"OpCapability CullDistance\n"
4346 		"OpMemoryModel Logical GLSL450\n"
4347 		"OpEntryPoint Geometry %geom1_main \"geom1\" %out_gl_position %gl_in %out_color %in_color\n"
4348 		"OpEntryPoint Geometry %geom2_main \"geom2\" %out_gl_position %gl_in %out_color %in_color\n"
4349 		"OpExecutionMode %geom1_main Triangles\n"
4350 		"OpExecutionMode %geom2_main Triangles\n"
4351 		"OpExecutionMode %geom1_main OutputTriangleStrip\n"
4352 		"OpExecutionMode %geom2_main OutputTriangleStrip\n"
4353 		"OpExecutionMode %geom1_main OutputVertices 3\n"
4354 		"OpExecutionMode %geom2_main OutputVertices 3\n"
4355 		"OpName %geom1_main \"geom1\"\n"
4356 		"OpName %geom2_main \"geom2\"\n"
4357 		"OpName %per_vertex_in \"gl_PerVertex\"\n"
4358 		"OpMemberName %per_vertex_in 0 \"gl_Position\"\n"
4359 		"OpMemberName %per_vertex_in 1 \"gl_PointSize\"\n"
4360 		"OpMemberName %per_vertex_in 2 \"gl_ClipDistance\"\n"
4361 		"OpMemberName %per_vertex_in 3 \"gl_CullDistance\"\n"
4362 		"OpName %gl_in \"gl_in\"\n"
4363 		"OpName %out_color \"out_color\"\n"
4364 		"OpName %in_color \"in_color\"\n"
4365 		"OpDecorate %out_gl_position BuiltIn Position\n"
4366 		"OpMemberDecorate %per_vertex_in 0 BuiltIn Position\n"
4367 		"OpMemberDecorate %per_vertex_in 1 BuiltIn PointSize\n"
4368 		"OpMemberDecorate %per_vertex_in 2 BuiltIn ClipDistance\n"
4369 		"OpMemberDecorate %per_vertex_in 3 BuiltIn CullDistance\n"
4370 		"OpDecorate %per_vertex_in Block\n"
4371 		"OpDecorate %out_color Location 1\n"
4372 		"OpDecorate %in_color Location 1\n"
4373 		SPIRV_ASSEMBLY_TYPES
4374 		SPIRV_ASSEMBLY_CONSTANTS
4375 		SPIRV_ASSEMBLY_ARRAYS
4376 		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4377 		"%per_vertex_in = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4378 		"%a3_per_vertex_in = OpTypeArray %per_vertex_in %c_u32_3\n"
4379 		"%ip_a3_per_vertex_in = OpTypePointer Input %a3_per_vertex_in\n"
4380 		"%gl_in = OpVariable %ip_a3_per_vertex_in Input\n"
4381 		"%out_color = OpVariable %op_v4f32 Output\n"
4382 		"%in_color = OpVariable %ip_a3v4f32 Input\n"
4383 		"%out_gl_position = OpVariable %op_v4f32 Output\n"
4384 
4385 		"%geom1_main = OpFunction %void None %fun\n"
4386 		"%geom1_label = OpLabel\n"
4387 		"%geom1_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_0 %c_i32_0\n"
4388 		"%geom1_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_1 %c_i32_0\n"
4389 		"%geom1_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_2 %c_i32_0\n"
4390 		"%geom1_in_position_0 = OpLoad %v4f32 %geom1_gl_in_0_gl_position\n"
4391 		"%geom1_in_position_1 = OpLoad %v4f32 %geom1_gl_in_1_gl_position\n"
4392 		"%geom1_in_position_2 = OpLoad %v4f32 %geom1_gl_in_2_gl_position \n"
4393 		"%geom1_in_color_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4394 		"%geom1_in_color_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4395 		"%geom1_in_color_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4396 		"%geom1_in_color_0 = OpLoad %v4f32 %geom1_in_color_0_ptr\n"
4397 		"%geom1_in_color_1 = OpLoad %v4f32 %geom1_in_color_1_ptr\n"
4398 		"%geom1_in_color_2 = OpLoad %v4f32 %geom1_in_color_2_ptr\n"
4399 		"OpStore %out_gl_position %geom1_in_position_0\n"
4400 		"OpStore %out_color %geom1_in_color_0\n"
4401 		"OpEmitVertex\n"
4402 		"OpStore %out_gl_position %geom1_in_position_1\n"
4403 		"OpStore %out_color %geom1_in_color_1\n"
4404 		"OpEmitVertex\n"
4405 		"OpStore %out_gl_position %geom1_in_position_2\n"
4406 		"OpStore %out_color %geom1_in_color_2\n"
4407 		"OpEmitVertex\n"
4408 		"OpEndPrimitive\n"
4409 		"OpReturn\n"
4410 		"OpFunctionEnd\n"
4411 
4412 		"%geom2_main = OpFunction %void None %fun\n"
4413 		"%geom2_label = OpLabel\n"
4414 		"%geom2_gl_in_0_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_0 %c_i32_0\n"
4415 		"%geom2_gl_in_1_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_1 %c_i32_0\n"
4416 		"%geom2_gl_in_2_gl_position = OpAccessChain %ip_v4f32 %gl_in %c_i32_2 %c_i32_0\n"
4417 		"%geom2_in_position_0 = OpLoad %v4f32 %geom2_gl_in_0_gl_position\n"
4418 		"%geom2_in_position_1 = OpLoad %v4f32 %geom2_gl_in_1_gl_position\n"
4419 		"%geom2_in_position_2 = OpLoad %v4f32 %geom2_gl_in_2_gl_position \n"
4420 		"%geom2_in_color_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4421 		"%geom2_in_color_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4422 		"%geom2_in_color_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4423 		"%geom2_in_color_0 = OpLoad %v4f32 %geom2_in_color_0_ptr\n"
4424 		"%geom2_in_color_1 = OpLoad %v4f32 %geom2_in_color_1_ptr\n"
4425 		"%geom2_in_color_2 = OpLoad %v4f32 %geom2_in_color_2_ptr\n"
4426 		"%geom2_transformed_in_color_0 = OpFSub %v4f32 %cval %geom2_in_color_0\n"
4427 		"%geom2_transformed_in_color_1 = OpFSub %v4f32 %cval %geom2_in_color_1\n"
4428 		"%geom2_transformed_in_color_2 = OpFSub %v4f32 %cval %geom2_in_color_2\n"
4429 		"%geom2_transformed_in_color_0_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_0 %c_f32_1 %c_i32_3\n"
4430 		"%geom2_transformed_in_color_1_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_1 %c_f32_1 %c_i32_3\n"
4431 		"%geom2_transformed_in_color_2_a = OpVectorInsertDynamic %v4f32 %geom2_transformed_in_color_2 %c_f32_1 %c_i32_3\n"
4432 		"OpStore %out_gl_position %geom2_in_position_0\n"
4433 		"OpStore %out_color %geom2_transformed_in_color_0_a\n"
4434 		"OpEmitVertex\n"
4435 		"OpStore %out_gl_position %geom2_in_position_1\n"
4436 		"OpStore %out_color %geom2_transformed_in_color_1_a\n"
4437 		"OpEmitVertex\n"
4438 		"OpStore %out_gl_position %geom2_in_position_2\n"
4439 		"OpStore %out_color %geom2_transformed_in_color_2_a\n"
4440 		"OpEmitVertex\n"
4441 		"OpEndPrimitive\n"
4442 		"OpReturn\n"
4443 		"OpFunctionEnd\n";
4444 
4445 	dst.spirvAsmSources.add("tessc") <<
4446 		"OpCapability Tessellation\n"
4447 		"OpMemoryModel Logical GLSL450\n"
4448 		"OpEntryPoint TessellationControl %tessc1_main \"tessc1\" %out_color %gl_InvocationID %in_color %out_position %in_position %gl_TessLevelOuter %gl_TessLevelInner\n"
4449 		"OpEntryPoint TessellationControl %tessc2_main \"tessc2\" %out_color %gl_InvocationID %in_color %out_position %in_position %gl_TessLevelOuter %gl_TessLevelInner\n"
4450 		"OpExecutionMode %tessc1_main OutputVertices 3\n"
4451 		"OpExecutionMode %tessc2_main OutputVertices 3\n"
4452 		"OpName %tessc1_main \"tessc1\"\n"
4453 		"OpName %tessc2_main \"tessc2\"\n"
4454 		"OpName %out_color \"out_color\"\n"
4455 		"OpName %gl_InvocationID \"gl_InvocationID\"\n"
4456 		"OpName %in_color \"in_color\"\n"
4457 		"OpName %out_position \"out_position\"\n"
4458 		"OpName %in_position \"in_position\"\n"
4459 		"OpName %gl_TessLevelOuter \"gl_TessLevelOuter\"\n"
4460 		"OpName %gl_TessLevelInner \"gl_TessLevelInner\"\n"
4461 		"OpDecorate %out_color Location 1\n"
4462 		"OpDecorate %gl_InvocationID BuiltIn InvocationId\n"
4463 		"OpDecorate %in_color Location 1\n"
4464 		"OpDecorate %out_position Location 2\n"
4465 		"OpDecorate %in_position Location 2\n"
4466 		"OpDecorate %gl_TessLevelOuter Patch\n"
4467 		"OpDecorate %gl_TessLevelOuter BuiltIn TessLevelOuter\n"
4468 		"OpDecorate %gl_TessLevelInner Patch\n"
4469 		"OpDecorate %gl_TessLevelInner BuiltIn TessLevelInner\n"
4470 		SPIRV_ASSEMBLY_TYPES
4471 		SPIRV_ASSEMBLY_CONSTANTS
4472 		SPIRV_ASSEMBLY_ARRAYS
4473 		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4474 		"%out_color = OpVariable %op_a3v4f32 Output\n"
4475 		"%gl_InvocationID = OpVariable %ip_i32 Input\n"
4476 		"%in_color = OpVariable %ip_a32v4f32 Input\n"
4477 		"%out_position = OpVariable %op_a3v4f32 Output\n"
4478 		"%in_position = OpVariable %ip_a32v4f32 Input\n"
4479 		"%gl_TessLevelOuter = OpVariable %op_a4f32 Output\n"
4480 		"%gl_TessLevelInner = OpVariable %op_a2f32 Output\n"
4481 
4482 		"%tessc1_main = OpFunction %void None %fun\n"
4483 		"%tessc1_label = OpLabel\n"
4484 		"%tessc1_invocation_id = OpLoad %i32 %gl_InvocationID\n"
4485 		"%tessc1_in_color_ptr = OpAccessChain %ip_v4f32 %in_color %tessc1_invocation_id\n"
4486 		"%tessc1_in_position_ptr = OpAccessChain %ip_v4f32 %in_position %tessc1_invocation_id\n"
4487 		"%tessc1_in_color_val = OpLoad %v4f32 %tessc1_in_color_ptr\n"
4488 		"%tessc1_in_position_val = OpLoad %v4f32 %tessc1_in_position_ptr\n"
4489 		"%tessc1_out_color_ptr = OpAccessChain %op_v4f32 %out_color %tessc1_invocation_id\n"
4490 		"%tessc1_out_position_ptr = OpAccessChain %op_v4f32 %out_position %tessc1_invocation_id\n"
4491 		"OpStore %tessc1_out_color_ptr %tessc1_in_color_val\n"
4492 		"OpStore %tessc1_out_position_ptr %tessc1_in_position_val\n"
4493 		"%tessc1_is_first_invocation = OpIEqual %bool %tessc1_invocation_id %c_i32_0\n"
4494 		"OpSelectionMerge %tessc1_merge_label None\n"
4495 		"OpBranchConditional %tessc1_is_first_invocation %tessc1_first_invocation %tessc1_merge_label\n"
4496 		"%tessc1_first_invocation = OpLabel\n"
4497 		"%tessc1_tess_outer_0 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_0\n"
4498 		"%tessc1_tess_outer_1 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_1\n"
4499 		"%tessc1_tess_outer_2 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_2\n"
4500 		"%tessc1_tess_inner = OpAccessChain %op_f32 %gl_TessLevelInner %c_i32_0\n"
4501 		"OpStore %tessc1_tess_outer_0 %c_f32_1\n"
4502 		"OpStore %tessc1_tess_outer_1 %c_f32_1\n"
4503 		"OpStore %tessc1_tess_outer_2 %c_f32_1\n"
4504 		"OpStore %tessc1_tess_inner %c_f32_1\n"
4505 		"OpBranch %tessc1_merge_label\n"
4506 		"%tessc1_merge_label = OpLabel\n"
4507 		"OpReturn\n"
4508 		"OpFunctionEnd\n"
4509 
4510 		"%tessc2_main = OpFunction %void None %fun\n"
4511 		"%tessc2_label = OpLabel\n"
4512 		"%tessc2_invocation_id = OpLoad %i32 %gl_InvocationID\n"
4513 		"%tessc2_in_color_ptr = OpAccessChain %ip_v4f32 %in_color %tessc2_invocation_id\n"
4514 		"%tessc2_in_position_ptr = OpAccessChain %ip_v4f32 %in_position %tessc2_invocation_id\n"
4515 		"%tessc2_in_color_val = OpLoad %v4f32 %tessc2_in_color_ptr\n"
4516 		"%tessc2_in_position_val = OpLoad %v4f32 %tessc2_in_position_ptr\n"
4517 		"%tessc2_out_color_ptr = OpAccessChain %op_v4f32 %out_color %tessc2_invocation_id\n"
4518 		"%tessc2_out_position_ptr = OpAccessChain %op_v4f32 %out_position %tessc2_invocation_id\n"
4519 		"%tessc2_transformed_color = OpFSub %v4f32 %cval %tessc2_in_color_val\n"
4520 		"%tessc2_transformed_color_a = OpVectorInsertDynamic %v4f32 %tessc2_transformed_color %c_f32_1 %c_i32_3\n"
4521 		"OpStore %tessc2_out_color_ptr %tessc2_transformed_color_a\n"
4522 		"OpStore %tessc2_out_position_ptr %tessc2_in_position_val\n"
4523 		"%tessc2_is_first_invocation = OpIEqual %bool %tessc2_invocation_id %c_i32_0\n"
4524 		"OpSelectionMerge %tessc2_merge_label None\n"
4525 		"OpBranchConditional %tessc2_is_first_invocation %tessc2_first_invocation %tessc2_merge_label\n"
4526 		"%tessc2_first_invocation = OpLabel\n"
4527 		"%tessc2_tess_outer_0 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_0\n"
4528 		"%tessc2_tess_outer_1 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_1\n"
4529 		"%tessc2_tess_outer_2 = OpAccessChain %op_f32 %gl_TessLevelOuter %c_i32_2\n"
4530 		"%tessc2_tess_inner = OpAccessChain %op_f32 %gl_TessLevelInner %c_i32_0\n"
4531 		"OpStore %tessc2_tess_outer_0 %c_f32_1\n"
4532 		"OpStore %tessc2_tess_outer_1 %c_f32_1\n"
4533 		"OpStore %tessc2_tess_outer_2 %c_f32_1\n"
4534 		"OpStore %tessc2_tess_inner %c_f32_1\n"
4535 		"OpBranch %tessc2_merge_label\n"
4536 		"%tessc2_merge_label = OpLabel\n"
4537 		"OpReturn\n"
4538 		"OpFunctionEnd\n";
4539 
4540 	dst.spirvAsmSources.add("tesse") <<
4541 		"OpCapability Tessellation\n"
4542 		"OpCapability ClipDistance\n"
4543 		"OpCapability CullDistance\n"
4544 		"OpMemoryModel Logical GLSL450\n"
4545 		"OpEntryPoint TessellationEvaluation %tesse1_main \"tesse1\" %stream %gl_tessCoord %in_position %out_color %in_color \n"
4546 		"OpEntryPoint TessellationEvaluation %tesse2_main \"tesse2\" %stream %gl_tessCoord %in_position %out_color %in_color \n"
4547 		"OpExecutionMode %tesse1_main Triangles\n"
4548 		"OpExecutionMode %tesse2_main Triangles\n"
4549 		"OpName %tesse1_main \"tesse1\"\n"
4550 		"OpName %tesse2_main \"tesse2\"\n"
4551 		"OpName %per_vertex_out \"gl_PerVertex\"\n"
4552 		"OpMemberName %per_vertex_out 0 \"gl_Position\"\n"
4553 		"OpMemberName %per_vertex_out 1 \"gl_PointSize\"\n"
4554 		"OpMemberName %per_vertex_out 2 \"gl_ClipDistance\"\n"
4555 		"OpMemberName %per_vertex_out 3 \"gl_CullDistance\"\n"
4556 		"OpName %stream \"\"\n"
4557 		"OpName %gl_tessCoord \"gl_TessCoord\"\n"
4558 		"OpName %in_position \"in_position\"\n"
4559 		"OpName %out_color \"out_color\"\n"
4560 		"OpName %in_color \"in_color\"\n"
4561 		"OpMemberDecorate %per_vertex_out 0 BuiltIn Position\n"
4562 		"OpMemberDecorate %per_vertex_out 1 BuiltIn PointSize\n"
4563 		"OpMemberDecorate %per_vertex_out 2 BuiltIn ClipDistance\n"
4564 		"OpMemberDecorate %per_vertex_out 3 BuiltIn CullDistance\n"
4565 		"OpDecorate %per_vertex_out Block\n"
4566 		"OpDecorate %gl_tessCoord BuiltIn TessCoord\n"
4567 		"OpDecorate %in_position Location 2\n"
4568 		"OpDecorate %out_color Location 1\n"
4569 		"OpDecorate %in_color Location 1\n"
4570 		SPIRV_ASSEMBLY_TYPES
4571 		SPIRV_ASSEMBLY_CONSTANTS
4572 		SPIRV_ASSEMBLY_ARRAYS
4573 		"%cval = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
4574 		"%per_vertex_out = OpTypeStruct %v4f32 %f32 %a1f32 %a1f32\n"
4575 		"%op_per_vertex_out = OpTypePointer Output %per_vertex_out\n"
4576 		"%stream = OpVariable %op_per_vertex_out Output\n"
4577 		"%gl_tessCoord = OpVariable %ip_v3f32 Input\n"
4578 		"%in_position = OpVariable %ip_a32v4f32 Input\n"
4579 		"%out_color = OpVariable %op_v4f32 Output\n"
4580 		"%in_color = OpVariable %ip_a32v4f32 Input\n"
4581 
4582 		"%tesse1_main = OpFunction %void None %fun\n"
4583 		"%tesse1_label = OpLabel\n"
4584 		"%tesse1_tc_0_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_0\n"
4585 		"%tesse1_tc_1_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_1\n"
4586 		"%tesse1_tc_2_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_2\n"
4587 		"%tesse1_tc_0 = OpLoad %f32 %tesse1_tc_0_ptr\n"
4588 		"%tesse1_tc_1 = OpLoad %f32 %tesse1_tc_1_ptr\n"
4589 		"%tesse1_tc_2 = OpLoad %f32 %tesse1_tc_2_ptr\n"
4590 		"%tesse1_in_pos_0_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_0\n"
4591 		"%tesse1_in_pos_1_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_1\n"
4592 		"%tesse1_in_pos_2_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_2\n"
4593 		"%tesse1_in_pos_0 = OpLoad %v4f32 %tesse1_in_pos_0_ptr\n"
4594 		"%tesse1_in_pos_1 = OpLoad %v4f32 %tesse1_in_pos_1_ptr\n"
4595 		"%tesse1_in_pos_2 = OpLoad %v4f32 %tesse1_in_pos_2_ptr\n"
4596 		"%tesse1_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_0 %tesse1_in_pos_0\n"
4597 		"%tesse1_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_1 %tesse1_in_pos_1\n"
4598 		"%tesse1_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_2 %tesse1_in_pos_2\n"
4599 		"%tesse1_out_pos_ptr = OpAccessChain %op_v4f32 %stream %c_i32_0\n"
4600 		"%tesse1_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse1_in_pos_0_weighted %tesse1_in_pos_1_weighted\n"
4601 		"%tesse1_computed_out = OpFAdd %v4f32 %tesse1_in_pos_0_plus_pos_1 %tesse1_in_pos_2_weighted\n"
4602 		"OpStore %tesse1_out_pos_ptr %tesse1_computed_out\n"
4603 		"%tesse1_in_clr_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4604 		"%tesse1_in_clr_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4605 		"%tesse1_in_clr_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4606 		"%tesse1_in_clr_0 = OpLoad %v4f32 %tesse1_in_clr_0_ptr\n"
4607 		"%tesse1_in_clr_1 = OpLoad %v4f32 %tesse1_in_clr_1_ptr\n"
4608 		"%tesse1_in_clr_2 = OpLoad %v4f32 %tesse1_in_clr_2_ptr\n"
4609 		"%tesse1_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_0 %tesse1_in_clr_0\n"
4610 		"%tesse1_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_1 %tesse1_in_clr_1\n"
4611 		"%tesse1_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse1_tc_2 %tesse1_in_clr_2\n"
4612 		"%tesse1_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse1_in_clr_0_weighted %tesse1_in_clr_1_weighted\n"
4613 		"%tesse1_computed_clr = OpFAdd %v4f32 %tesse1_in_clr_0_plus_col_1 %tesse1_in_clr_2_weighted\n"
4614 		"OpStore %out_color %tesse1_computed_clr\n"
4615 		"OpReturn\n"
4616 		"OpFunctionEnd\n"
4617 
4618 		"%tesse2_main = OpFunction %void None %fun\n"
4619 		"%tesse2_label = OpLabel\n"
4620 		"%tesse2_tc_0_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_0\n"
4621 		"%tesse2_tc_1_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_1\n"
4622 		"%tesse2_tc_2_ptr = OpAccessChain %ip_f32 %gl_tessCoord %c_u32_2\n"
4623 		"%tesse2_tc_0 = OpLoad %f32 %tesse2_tc_0_ptr\n"
4624 		"%tesse2_tc_1 = OpLoad %f32 %tesse2_tc_1_ptr\n"
4625 		"%tesse2_tc_2 = OpLoad %f32 %tesse2_tc_2_ptr\n"
4626 		"%tesse2_in_pos_0_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_0\n"
4627 		"%tesse2_in_pos_1_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_1\n"
4628 		"%tesse2_in_pos_2_ptr = OpAccessChain %ip_v4f32 %in_position %c_i32_2\n"
4629 		"%tesse2_in_pos_0 = OpLoad %v4f32 %tesse2_in_pos_0_ptr\n"
4630 		"%tesse2_in_pos_1 = OpLoad %v4f32 %tesse2_in_pos_1_ptr\n"
4631 		"%tesse2_in_pos_2 = OpLoad %v4f32 %tesse2_in_pos_2_ptr\n"
4632 		"%tesse2_in_pos_0_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_0 %tesse2_in_pos_0\n"
4633 		"%tesse2_in_pos_1_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_1 %tesse2_in_pos_1\n"
4634 		"%tesse2_in_pos_2_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_2 %tesse2_in_pos_2\n"
4635 		"%tesse2_out_pos_ptr = OpAccessChain %op_v4f32 %stream %c_i32_0\n"
4636 		"%tesse2_in_pos_0_plus_pos_1 = OpFAdd %v4f32 %tesse2_in_pos_0_weighted %tesse2_in_pos_1_weighted\n"
4637 		"%tesse2_computed_out = OpFAdd %v4f32 %tesse2_in_pos_0_plus_pos_1 %tesse2_in_pos_2_weighted\n"
4638 		"OpStore %tesse2_out_pos_ptr %tesse2_computed_out\n"
4639 		"%tesse2_in_clr_0_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_0\n"
4640 		"%tesse2_in_clr_1_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_1\n"
4641 		"%tesse2_in_clr_2_ptr = OpAccessChain %ip_v4f32 %in_color %c_i32_2\n"
4642 		"%tesse2_in_clr_0 = OpLoad %v4f32 %tesse2_in_clr_0_ptr\n"
4643 		"%tesse2_in_clr_1 = OpLoad %v4f32 %tesse2_in_clr_1_ptr\n"
4644 		"%tesse2_in_clr_2 = OpLoad %v4f32 %tesse2_in_clr_2_ptr\n"
4645 		"%tesse2_in_clr_0_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_0 %tesse2_in_clr_0\n"
4646 		"%tesse2_in_clr_1_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_1 %tesse2_in_clr_1\n"
4647 		"%tesse2_in_clr_2_weighted = OpVectorTimesScalar %v4f32 %tesse2_tc_2 %tesse2_in_clr_2\n"
4648 		"%tesse2_in_clr_0_plus_col_1 = OpFAdd %v4f32 %tesse2_in_clr_0_weighted %tesse2_in_clr_1_weighted\n"
4649 		"%tesse2_computed_clr = OpFAdd %v4f32 %tesse2_in_clr_0_plus_col_1 %tesse2_in_clr_2_weighted\n"
4650 		"%tesse2_clr_transformed = OpFSub %v4f32 %cval %tesse2_computed_clr\n"
4651 		"%tesse2_clr_transformed_a = OpVectorInsertDynamic %v4f32 %tesse2_clr_transformed %c_f32_1 %c_i32_3\n"
4652 		"OpStore %out_color %tesse2_clr_transformed_a\n"
4653 		"OpReturn\n"
4654 		"OpFunctionEnd\n";
4655 }
4656 
4657 // Sets up and runs a Vulkan pipeline, then spot-checks the resulting image.
4658 // Feeds the pipeline a set of colored triangles, which then must occur in the
4659 // rendered image.  The surface is cleared before executing the pipeline, so
4660 // whatever the shaders draw can be directly spot-checked.
runAndVerifyDefaultPipeline(Context & context,InstanceContext instance)4661 TestStatus runAndVerifyDefaultPipeline (Context& context, InstanceContext instance)
4662 {
4663 	const VkDevice								vkDevice				= context.getDevice();
4664 	const DeviceInterface&						vk						= context.getDeviceInterface();
4665 	const VkQueue								queue					= context.getUniversalQueue();
4666 	const deUint32								queueFamilyIndex		= context.getUniversalQueueFamilyIndex();
4667 	const tcu::UVec2							renderSize				(256, 256);
4668 	vector<ModuleHandleSp>						modules;
4669 	map<VkShaderStageFlagBits, VkShaderModule>	moduleByStage;
4670 	const int									testSpecificSeed		= 31354125;
4671 	const int									seed					= context.getTestContext().getCommandLine().getBaseSeed() ^ testSpecificSeed;
4672 	bool										supportsGeometry		= false;
4673 	bool										supportsTessellation	= false;
4674 	bool										hasTessellation         = false;
4675 
4676 	const VkPhysicalDeviceFeatures&				features				= context.getDeviceFeatures();
4677 	supportsGeometry		= features.geometryShader == VK_TRUE;
4678 	supportsTessellation	= features.tessellationShader == VK_TRUE;
4679 	hasTessellation			= (instance.requiredStages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ||
4680 								(instance.requiredStages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
4681 
4682 	if (hasTessellation && !supportsTessellation)
4683 	{
4684 		throw tcu::NotSupportedError(std::string("Tessellation not supported"));
4685 	}
4686 
4687 	if ((instance.requiredStages & VK_SHADER_STAGE_GEOMETRY_BIT) &&
4688 		!supportsGeometry)
4689 	{
4690 		throw tcu::NotSupportedError(std::string("Geometry not supported"));
4691 	}
4692 
4693 	de::Random(seed).shuffle(instance.inputColors, instance.inputColors+4);
4694 	de::Random(seed).shuffle(instance.outputColors, instance.outputColors+4);
4695 	const Vec4								vertexData[]			=
4696 	{
4697 		// Upper left corner:
4698 		Vec4(-1.0f, -1.0f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4699 		Vec4(-0.5f, -1.0f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4700 		Vec4(-1.0f, -0.5f, 0.0f, 1.0f), instance.inputColors[0].toVec(),
4701 
4702 		// Upper right corner:
4703 		Vec4(+0.5f, -1.0f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4704 		Vec4(+1.0f, -1.0f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4705 		Vec4(+1.0f, -0.5f, 0.0f, 1.0f), instance.inputColors[1].toVec(),
4706 
4707 		// Lower left corner:
4708 		Vec4(-1.0f, +0.5f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4709 		Vec4(-0.5f, +1.0f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4710 		Vec4(-1.0f, +1.0f, 0.0f, 1.0f), instance.inputColors[2].toVec(),
4711 
4712 		// Lower right corner:
4713 		Vec4(+1.0f, +0.5f, 0.0f, 1.0f), instance.inputColors[3].toVec(),
4714 		Vec4(+1.0f, +1.0f, 0.0f, 1.0f), instance.inputColors[3].toVec(),
4715 		Vec4(+0.5f, +1.0f, 0.0f, 1.0f), instance.inputColors[3].toVec()
4716 	};
4717 	const size_t							singleVertexDataSize	= 2 * sizeof(Vec4);
4718 	const size_t							vertexCount				= sizeof(vertexData) / singleVertexDataSize;
4719 
4720 	const VkBufferCreateInfo				vertexBufferParams		=
4721 	{
4722 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	//	VkStructureType		sType;
4723 		DE_NULL,								//	const void*			pNext;
4724 		0u,										//	VkBufferCreateFlags	flags;
4725 		(VkDeviceSize)sizeof(vertexData),		//	VkDeviceSize		size;
4726 		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,		//	VkBufferUsageFlags	usage;
4727 		VK_SHARING_MODE_EXCLUSIVE,				//	VkSharingMode		sharingMode;
4728 		1u,										//	deUint32			queueFamilyCount;
4729 		&queueFamilyIndex,						//	const deUint32*		pQueueFamilyIndices;
4730 	};
4731 	const Unique<VkBuffer>					vertexBuffer			(createBuffer(vk, vkDevice, &vertexBufferParams));
4732 	const UniquePtr<Allocation>				vertexBufferMemory		(context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *vertexBuffer), MemoryRequirement::HostVisible));
4733 
4734 	VK_CHECK(vk.bindBufferMemory(vkDevice, *vertexBuffer, vertexBufferMemory->getMemory(), vertexBufferMemory->getOffset()));
4735 
4736 	const VkDeviceSize						imageSizeBytes			= (VkDeviceSize)(sizeof(deUint32)*renderSize.x()*renderSize.y());
4737 	const VkBufferCreateInfo				readImageBufferParams	=
4738 	{
4739 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		//	VkStructureType		sType;
4740 		DE_NULL,									//	const void*			pNext;
4741 		0u,											//	VkBufferCreateFlags	flags;
4742 		imageSizeBytes,								//	VkDeviceSize		size;
4743 		VK_BUFFER_USAGE_TRANSFER_DST_BIT,			//	VkBufferUsageFlags	usage;
4744 		VK_SHARING_MODE_EXCLUSIVE,					//	VkSharingMode		sharingMode;
4745 		1u,											//	deUint32			queueFamilyCount;
4746 		&queueFamilyIndex,							//	const deUint32*		pQueueFamilyIndices;
4747 	};
4748 	const Unique<VkBuffer>					readImageBuffer			(createBuffer(vk, vkDevice, &readImageBufferParams));
4749 	const UniquePtr<Allocation>				readImageBufferMemory	(context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
4750 
4751 	VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
4752 
4753 	const VkImageCreateInfo					imageParams				=
4754 	{
4755 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,									//	VkStructureType		sType;
4756 		DE_NULL,																//	const void*			pNext;
4757 		0u,																		//	VkImageCreateFlags	flags;
4758 		VK_IMAGE_TYPE_2D,														//	VkImageType			imageType;
4759 		VK_FORMAT_R8G8B8A8_UNORM,												//	VkFormat			format;
4760 		{ renderSize.x(), renderSize.y(), 1 },									//	VkExtent3D			extent;
4761 		1u,																		//	deUint32			mipLevels;
4762 		1u,																		//	deUint32			arraySize;
4763 		VK_SAMPLE_COUNT_1_BIT,													//	deUint32			samples;
4764 		VK_IMAGE_TILING_OPTIMAL,												//	VkImageTiling		tiling;
4765 		VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT|VK_IMAGE_USAGE_TRANSFER_SRC_BIT,	//	VkImageUsageFlags	usage;
4766 		VK_SHARING_MODE_EXCLUSIVE,												//	VkSharingMode		sharingMode;
4767 		1u,																		//	deUint32			queueFamilyCount;
4768 		&queueFamilyIndex,														//	const deUint32*		pQueueFamilyIndices;
4769 		VK_IMAGE_LAYOUT_UNDEFINED,												//	VkImageLayout		initialLayout;
4770 	};
4771 
4772 	const Unique<VkImage>					image					(createImage(vk, vkDevice, &imageParams));
4773 	const UniquePtr<Allocation>				imageMemory				(context.getDefaultAllocator().allocate(getImageMemoryRequirements(vk, vkDevice, *image), MemoryRequirement::Any));
4774 
4775 	VK_CHECK(vk.bindImageMemory(vkDevice, *image, imageMemory->getMemory(), imageMemory->getOffset()));
4776 
4777 	const VkAttachmentDescription			colorAttDesc			=
4778 	{
4779 		0u,												//	VkAttachmentDescriptionFlags	flags;
4780 		VK_FORMAT_R8G8B8A8_UNORM,						//	VkFormat						format;
4781 		VK_SAMPLE_COUNT_1_BIT,							//	deUint32						samples;
4782 		VK_ATTACHMENT_LOAD_OP_CLEAR,					//	VkAttachmentLoadOp				loadOp;
4783 		VK_ATTACHMENT_STORE_OP_STORE,					//	VkAttachmentStoreOp				storeOp;
4784 		VK_ATTACHMENT_LOAD_OP_DONT_CARE,				//	VkAttachmentLoadOp				stencilLoadOp;
4785 		VK_ATTACHMENT_STORE_OP_DONT_CARE,				//	VkAttachmentStoreOp				stencilStoreOp;
4786 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout					initialLayout;
4787 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout					finalLayout;
4788 	};
4789 	const VkAttachmentReference				colorAttRef				=
4790 	{
4791 		0u,												//	deUint32		attachment;
4792 		VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,		//	VkImageLayout	layout;
4793 	};
4794 	const VkSubpassDescription				subpassDesc				=
4795 	{
4796 		0u,												//	VkSubpassDescriptionFlags		flags;
4797 		VK_PIPELINE_BIND_POINT_GRAPHICS,				//	VkPipelineBindPoint				pipelineBindPoint;
4798 		0u,												//	deUint32						inputCount;
4799 		DE_NULL,										//	const VkAttachmentReference*	pInputAttachments;
4800 		1u,												//	deUint32						colorCount;
4801 		&colorAttRef,									//	const VkAttachmentReference*	pColorAttachments;
4802 		DE_NULL,										//	const VkAttachmentReference*	pResolveAttachments;
4803 		DE_NULL,										//	const VkAttachmentReference*	pDepthStencilAttachment;
4804 		0u,												//	deUint32						preserveCount;
4805 		DE_NULL,										//	const VkAttachmentReference*	pPreserveAttachments;
4806 
4807 	};
4808 	const VkRenderPassCreateInfo			renderPassParams		=
4809 	{
4810 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,		//	VkStructureType					sType;
4811 		DE_NULL,										//	const void*						pNext;
4812 		(VkRenderPassCreateFlags)0,
4813 		1u,												//	deUint32						attachmentCount;
4814 		&colorAttDesc,									//	const VkAttachmentDescription*	pAttachments;
4815 		1u,												//	deUint32						subpassCount;
4816 		&subpassDesc,									//	const VkSubpassDescription*		pSubpasses;
4817 		0u,												//	deUint32						dependencyCount;
4818 		DE_NULL,										//	const VkSubpassDependency*		pDependencies;
4819 	};
4820 	const Unique<VkRenderPass>				renderPass				(createRenderPass(vk, vkDevice, &renderPassParams));
4821 
4822 	const VkImageViewCreateInfo				colorAttViewParams		=
4823 	{
4824 		VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,		//	VkStructureType				sType;
4825 		DE_NULL,										//	const void*					pNext;
4826 		0u,												//	VkImageViewCreateFlags		flags;
4827 		*image,											//	VkImage						image;
4828 		VK_IMAGE_VIEW_TYPE_2D,							//	VkImageViewType				viewType;
4829 		VK_FORMAT_R8G8B8A8_UNORM,						//	VkFormat					format;
4830 		{
4831 			VK_COMPONENT_SWIZZLE_R,
4832 			VK_COMPONENT_SWIZZLE_G,
4833 			VK_COMPONENT_SWIZZLE_B,
4834 			VK_COMPONENT_SWIZZLE_A
4835 		},												//	VkChannelMapping			channels;
4836 		{
4837 			VK_IMAGE_ASPECT_COLOR_BIT,						//	VkImageAspectFlags	aspectMask;
4838 			0u,												//	deUint32			baseMipLevel;
4839 			1u,												//	deUint32			mipLevels;
4840 			0u,												//	deUint32			baseArrayLayer;
4841 			1u,												//	deUint32			arraySize;
4842 		},												//	VkImageSubresourceRange		subresourceRange;
4843 	};
4844 	const Unique<VkImageView>				colorAttView			(createImageView(vk, vkDevice, &colorAttViewParams));
4845 
4846 
4847 	// Pipeline layout
4848 	const VkPipelineLayoutCreateInfo		pipelineLayoutParams	=
4849 	{
4850 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,			//	VkStructureType					sType;
4851 		DE_NULL,												//	const void*						pNext;
4852 		(VkPipelineLayoutCreateFlags)0,
4853 		0u,														//	deUint32						descriptorSetCount;
4854 		DE_NULL,												//	const VkDescriptorSetLayout*	pSetLayouts;
4855 		0u,														//	deUint32						pushConstantRangeCount;
4856 		DE_NULL,												//	const VkPushConstantRange*		pPushConstantRanges;
4857 	};
4858 	const Unique<VkPipelineLayout>			pipelineLayout			(createPipelineLayout(vk, vkDevice, &pipelineLayoutParams));
4859 
4860 	// Pipeline
4861 	vector<VkPipelineShaderStageCreateInfo>		shaderStageParams;
4862 	// We need these vectors to make sure that information about specialization constants for each stage can outlive createGraphicsPipeline().
4863 	vector<vector<VkSpecializationMapEntry> >	specConstantEntries;
4864 	vector<VkSpecializationInfo>				specializationInfos;
4865 	createPipelineShaderStages(vk, vkDevice, instance, context, modules, shaderStageParams);
4866 
4867 	// And we don't want the reallocation of these vectors to invalidate pointers pointing to their contents.
4868 	specConstantEntries.reserve(shaderStageParams.size());
4869 	specializationInfos.reserve(shaderStageParams.size());
4870 
4871 	// Patch the specialization info field in PipelineShaderStageCreateInfos.
4872 	for (vector<VkPipelineShaderStageCreateInfo>::iterator stageInfo = shaderStageParams.begin(); stageInfo != shaderStageParams.end(); ++stageInfo)
4873 	{
4874 		const StageToSpecConstantMap::const_iterator stageIt = instance.specConstants.find(stageInfo->stage);
4875 
4876 		if (stageIt != instance.specConstants.end())
4877 		{
4878 			const size_t						numSpecConstants	= stageIt->second.size();
4879 			vector<VkSpecializationMapEntry>	entries;
4880 			VkSpecializationInfo				specInfo;
4881 
4882 			entries.resize(numSpecConstants);
4883 
4884 			// Only support 32-bit integers as spec constants now. And their constant IDs are numbered sequentially starting from 0.
4885 			for (size_t ndx = 0; ndx < numSpecConstants; ++ndx)
4886 			{
4887 				entries[ndx].constantID	= (deUint32)ndx;
4888 				entries[ndx].offset		= deUint32(ndx * sizeof(deInt32));
4889 				entries[ndx].size		= sizeof(deInt32);
4890 			}
4891 
4892 			specConstantEntries.push_back(entries);
4893 
4894 			specInfo.mapEntryCount	= (deUint32)numSpecConstants;
4895 			specInfo.pMapEntries	= specConstantEntries.back().data();
4896 			specInfo.dataSize		= numSpecConstants * sizeof(deInt32);
4897 			specInfo.pData			= stageIt->second.data();
4898 			specializationInfos.push_back(specInfo);
4899 
4900 			stageInfo->pSpecializationInfo = &specializationInfos.back();
4901 		}
4902 	}
4903 	const VkPipelineDepthStencilStateCreateInfo	depthStencilParams		=
4904 	{
4905 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,	//	VkStructureType		sType;
4906 		DE_NULL,													//	const void*			pNext;
4907 		(VkPipelineDepthStencilStateCreateFlags)0,
4908 		DE_FALSE,													//	deUint32			depthTestEnable;
4909 		DE_FALSE,													//	deUint32			depthWriteEnable;
4910 		VK_COMPARE_OP_ALWAYS,										//	VkCompareOp			depthCompareOp;
4911 		DE_FALSE,													//	deUint32			depthBoundsTestEnable;
4912 		DE_FALSE,													//	deUint32			stencilTestEnable;
4913 		{
4914 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilFailOp;
4915 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilPassOp;
4916 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilDepthFailOp;
4917 			VK_COMPARE_OP_ALWAYS,										//	VkCompareOp	stencilCompareOp;
4918 			0u,															//	deUint32	stencilCompareMask;
4919 			0u,															//	deUint32	stencilWriteMask;
4920 			0u,															//	deUint32	stencilReference;
4921 		},															//	VkStencilOpState	front;
4922 		{
4923 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilFailOp;
4924 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilPassOp;
4925 			VK_STENCIL_OP_KEEP,											//	VkStencilOp	stencilDepthFailOp;
4926 			VK_COMPARE_OP_ALWAYS,										//	VkCompareOp	stencilCompareOp;
4927 			0u,															//	deUint32	stencilCompareMask;
4928 			0u,															//	deUint32	stencilWriteMask;
4929 			0u,															//	deUint32	stencilReference;
4930 		},															//	VkStencilOpState	back;
4931 		-1.0f,														//	float				minDepthBounds;
4932 		+1.0f,														//	float				maxDepthBounds;
4933 	};
4934 	const VkViewport						viewport0				=
4935 	{
4936 		0.0f,														//	float	originX;
4937 		0.0f,														//	float	originY;
4938 		(float)renderSize.x(),										//	float	width;
4939 		(float)renderSize.y(),										//	float	height;
4940 		0.0f,														//	float	minDepth;
4941 		1.0f,														//	float	maxDepth;
4942 	};
4943 	const VkRect2D							scissor0				=
4944 	{
4945 		{
4946 			0u,															//	deInt32	x;
4947 			0u,															//	deInt32	y;
4948 		},															//	VkOffset2D	offset;
4949 		{
4950 			renderSize.x(),												//	deInt32	width;
4951 			renderSize.y(),												//	deInt32	height;
4952 		},															//	VkExtent2D	extent;
4953 	};
4954 	const VkPipelineViewportStateCreateInfo		viewportParams			=
4955 	{
4956 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,		//	VkStructureType		sType;
4957 		DE_NULL,													//	const void*			pNext;
4958 		(VkPipelineViewportStateCreateFlags)0,
4959 		1u,															//	deUint32			viewportCount;
4960 		&viewport0,
4961 		1u,
4962 		&scissor0
4963 	};
4964 	const VkSampleMask							sampleMask				= ~0u;
4965 	const VkPipelineMultisampleStateCreateInfo	multisampleParams		=
4966 	{
4967 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,	//	VkStructureType			sType;
4968 		DE_NULL,													//	const void*				pNext;
4969 		(VkPipelineMultisampleStateCreateFlags)0,
4970 		VK_SAMPLE_COUNT_1_BIT,										//	VkSampleCountFlagBits	rasterSamples;
4971 		DE_FALSE,													//	deUint32				sampleShadingEnable;
4972 		0.0f,														//	float					minSampleShading;
4973 		&sampleMask,												//	const VkSampleMask*		pSampleMask;
4974 		DE_FALSE,													//	VkBool32				alphaToCoverageEnable;
4975 		DE_FALSE,													//	VkBool32				alphaToOneEnable;
4976 	};
4977 	const VkPipelineRasterizationStateCreateInfo	rasterParams		=
4978 	{
4979 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,	//	VkStructureType	sType;
4980 		DE_NULL,													//	const void*		pNext;
4981 		(VkPipelineRasterizationStateCreateFlags)0,
4982 		DE_TRUE,													//	deUint32		depthClipEnable;
4983 		DE_FALSE,													//	deUint32		rasterizerDiscardEnable;
4984 		VK_POLYGON_MODE_FILL,										//	VkFillMode		fillMode;
4985 		VK_CULL_MODE_NONE,											//	VkCullMode		cullMode;
4986 		VK_FRONT_FACE_COUNTER_CLOCKWISE,							//	VkFrontFace		frontFace;
4987 		VK_FALSE,													//	VkBool32		depthBiasEnable;
4988 		0.0f,														//	float			depthBias;
4989 		0.0f,														//	float			depthBiasClamp;
4990 		0.0f,														//	float			slopeScaledDepthBias;
4991 		1.0f,														//	float			lineWidth;
4992 	};
4993 	const VkPrimitiveTopology topology = hasTessellation? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST: VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
4994 	const VkPipelineInputAssemblyStateCreateInfo	inputAssemblyParams	=
4995 	{
4996 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,	//	VkStructureType		sType;
4997 		DE_NULL,														//	const void*			pNext;
4998 		(VkPipelineInputAssemblyStateCreateFlags)0,
4999 		topology,														//	VkPrimitiveTopology	topology;
5000 		DE_FALSE,														//	deUint32			primitiveRestartEnable;
5001 	};
5002 	const VkVertexInputBindingDescription		vertexBinding0 =
5003 	{
5004 		0u,									// deUint32					binding;
5005 		deUint32(singleVertexDataSize),		// deUint32					strideInBytes;
5006 		VK_VERTEX_INPUT_RATE_VERTEX			// VkVertexInputStepRate	stepRate;
5007 	};
5008 	const VkVertexInputAttributeDescription		vertexAttrib0[2] =
5009 	{
5010 		{
5011 			0u,									// deUint32	location;
5012 			0u,									// deUint32	binding;
5013 			VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat	format;
5014 			0u									// deUint32	offsetInBytes;
5015 		},
5016 		{
5017 			1u,									// deUint32	location;
5018 			0u,									// deUint32	binding;
5019 			VK_FORMAT_R32G32B32A32_SFLOAT,		// VkFormat	format;
5020 			sizeof(Vec4),						// deUint32	offsetInBytes;
5021 		}
5022 	};
5023 
5024 	const VkPipelineVertexInputStateCreateInfo	vertexInputStateParams	=
5025 	{
5026 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	//	VkStructureType								sType;
5027 		DE_NULL,													//	const void*									pNext;
5028 		(VkPipelineVertexInputStateCreateFlags)0,
5029 		1u,															//	deUint32									bindingCount;
5030 		&vertexBinding0,											//	const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
5031 		2u,															//	deUint32									attributeCount;
5032 		vertexAttrib0,												//	const VkVertexInputAttributeDescription*	pVertexAttributeDescriptions;
5033 	};
5034 	const VkPipelineColorBlendAttachmentState	attBlendParams			=
5035 	{
5036 		DE_FALSE,													//	deUint32		blendEnable;
5037 		VK_BLEND_FACTOR_ONE,										//	VkBlend			srcBlendColor;
5038 		VK_BLEND_FACTOR_ZERO,										//	VkBlend			destBlendColor;
5039 		VK_BLEND_OP_ADD,											//	VkBlendOp		blendOpColor;
5040 		VK_BLEND_FACTOR_ONE,										//	VkBlend			srcBlendAlpha;
5041 		VK_BLEND_FACTOR_ZERO,										//	VkBlend			destBlendAlpha;
5042 		VK_BLEND_OP_ADD,											//	VkBlendOp		blendOpAlpha;
5043 		(VK_COLOR_COMPONENT_R_BIT|
5044 		 VK_COLOR_COMPONENT_G_BIT|
5045 		 VK_COLOR_COMPONENT_B_BIT|
5046 		 VK_COLOR_COMPONENT_A_BIT),									//	VkChannelFlags	channelWriteMask;
5047 	};
5048 	const VkPipelineColorBlendStateCreateInfo	blendParams				=
5049 	{
5050 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,	//	VkStructureType								sType;
5051 		DE_NULL,													//	const void*									pNext;
5052 		(VkPipelineColorBlendStateCreateFlags)0,
5053 		DE_FALSE,													//	VkBool32									logicOpEnable;
5054 		VK_LOGIC_OP_COPY,											//	VkLogicOp									logicOp;
5055 		1u,															//	deUint32									attachmentCount;
5056 		&attBlendParams,											//	const VkPipelineColorBlendAttachmentState*	pAttachments;
5057 		{ 0.0f, 0.0f, 0.0f, 0.0f },									//	float										blendConst[4];
5058 	};
5059 	const VkPipelineTessellationStateCreateInfo	tessellationState	=
5060 	{
5061 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
5062 		DE_NULL,
5063 		(VkPipelineTessellationStateCreateFlags)0,
5064 		3u
5065 	};
5066 
5067 	const VkPipelineTessellationStateCreateInfo* tessellationInfo	=	hasTessellation ? &tessellationState: DE_NULL;
5068 	const VkGraphicsPipelineCreateInfo		pipelineParams			=
5069 	{
5070 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,		//	VkStructureType									sType;
5071 		DE_NULL,												//	const void*										pNext;
5072 		0u,														//	VkPipelineCreateFlags							flags;
5073 		(deUint32)shaderStageParams.size(),						//	deUint32										stageCount;
5074 		&shaderStageParams[0],									//	const VkPipelineShaderStageCreateInfo*			pStages;
5075 		&vertexInputStateParams,								//	const VkPipelineVertexInputStateCreateInfo*		pVertexInputState;
5076 		&inputAssemblyParams,									//	const VkPipelineInputAssemblyStateCreateInfo*	pInputAssemblyState;
5077 		tessellationInfo,										//	const VkPipelineTessellationStateCreateInfo*	pTessellationState;
5078 		&viewportParams,										//	const VkPipelineViewportStateCreateInfo*		pViewportState;
5079 		&rasterParams,											//	const VkPipelineRasterStateCreateInfo*			pRasterState;
5080 		&multisampleParams,										//	const VkPipelineMultisampleStateCreateInfo*		pMultisampleState;
5081 		&depthStencilParams,									//	const VkPipelineDepthStencilStateCreateInfo*	pDepthStencilState;
5082 		&blendParams,											//	const VkPipelineColorBlendStateCreateInfo*		pColorBlendState;
5083 		(const VkPipelineDynamicStateCreateInfo*)DE_NULL,		//	const VkPipelineDynamicStateCreateInfo*			pDynamicState;
5084 		*pipelineLayout,										//	VkPipelineLayout								layout;
5085 		*renderPass,											//	VkRenderPass									renderPass;
5086 		0u,														//	deUint32										subpass;
5087 		DE_NULL,												//	VkPipeline										basePipelineHandle;
5088 		0u,														//	deInt32											basePipelineIndex;
5089 	};
5090 
5091 	const Unique<VkPipeline>				pipeline				(createGraphicsPipeline(vk, vkDevice, DE_NULL, &pipelineParams));
5092 
5093 	// Framebuffer
5094 	const VkFramebufferCreateInfo			framebufferParams		=
5095 	{
5096 		VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,				//	VkStructureType		sType;
5097 		DE_NULL,												//	const void*			pNext;
5098 		(VkFramebufferCreateFlags)0,
5099 		*renderPass,											//	VkRenderPass		renderPass;
5100 		1u,														//	deUint32			attachmentCount;
5101 		&*colorAttView,											//	const VkImageView*	pAttachments;
5102 		(deUint32)renderSize.x(),								//	deUint32			width;
5103 		(deUint32)renderSize.y(),								//	deUint32			height;
5104 		1u,														//	deUint32			layers;
5105 	};
5106 	const Unique<VkFramebuffer>				framebuffer				(createFramebuffer(vk, vkDevice, &framebufferParams));
5107 
5108 	const VkCommandPoolCreateInfo			cmdPoolParams			=
5109 	{
5110 		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,					//	VkStructureType			sType;
5111 		DE_NULL,													//	const void*				pNext;
5112 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,				//	VkCmdPoolCreateFlags	flags;
5113 		queueFamilyIndex,											//	deUint32				queueFamilyIndex;
5114 	};
5115 	const Unique<VkCommandPool>				cmdPool					(createCommandPool(vk, vkDevice, &cmdPoolParams));
5116 
5117 	// Command buffer
5118 	const VkCommandBufferAllocateInfo		cmdBufParams			=
5119 	{
5120 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,			//	VkStructureType			sType;
5121 		DE_NULL,												//	const void*				pNext;
5122 		*cmdPool,												//	VkCmdPool				pool;
5123 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,						//	VkCmdBufferLevel		level;
5124 		1u,														//	deUint32				count;
5125 	};
5126 	const Unique<VkCommandBuffer>			cmdBuf					(allocateCommandBuffer(vk, vkDevice, &cmdBufParams));
5127 
5128 	const VkCommandBufferBeginInfo			cmdBufBeginParams		=
5129 	{
5130 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,			//	VkStructureType				sType;
5131 		DE_NULL,												//	const void*					pNext;
5132 		(VkCommandBufferUsageFlags)0,
5133 		(const VkCommandBufferInheritanceInfo*)DE_NULL,
5134 	};
5135 
5136 	// Record commands
5137 	VK_CHECK(vk.beginCommandBuffer(*cmdBuf, &cmdBufBeginParams));
5138 
5139 	{
5140 		const VkMemoryBarrier		vertFlushBarrier	=
5141 		{
5142 			VK_STRUCTURE_TYPE_MEMORY_BARRIER,			//	VkStructureType		sType;
5143 			DE_NULL,									//	const void*			pNext;
5144 			VK_ACCESS_HOST_WRITE_BIT,					//	VkMemoryOutputFlags	outputMask;
5145 			VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,		//	VkMemoryInputFlags	inputMask;
5146 		};
5147 		const VkImageMemoryBarrier	colorAttBarrier		=
5148 		{
5149 			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,		//	VkStructureType			sType;
5150 			DE_NULL,									//	const void*				pNext;
5151 			0u,											//	VkMemoryOutputFlags		outputMask;
5152 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//	VkMemoryInputFlags		inputMask;
5153 			VK_IMAGE_LAYOUT_UNDEFINED,					//	VkImageLayout			oldLayout;
5154 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout			newLayout;
5155 			queueFamilyIndex,							//	deUint32				srcQueueFamilyIndex;
5156 			queueFamilyIndex,							//	deUint32				destQueueFamilyIndex;
5157 			*image,										//	VkImage					image;
5158 			{
5159 				VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspect	aspect;
5160 				0u,											//	deUint32		baseMipLevel;
5161 				1u,											//	deUint32		mipLevels;
5162 				0u,											//	deUint32		baseArraySlice;
5163 				1u,											//	deUint32		arraySize;
5164 			}											//	VkImageSubresourceRange	subresourceRange;
5165 		};
5166 		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, (VkDependencyFlags)0, 1, &vertFlushBarrier, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &colorAttBarrier);
5167 	}
5168 
5169 	{
5170 		const VkClearValue			clearValue		= makeClearValueColorF32(0.125f, 0.25f, 0.75f, 1.0f);
5171 		const VkRenderPassBeginInfo	passBeginParams	=
5172 		{
5173 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,			//	VkStructureType		sType;
5174 			DE_NULL,											//	const void*			pNext;
5175 			*renderPass,										//	VkRenderPass		renderPass;
5176 			*framebuffer,										//	VkFramebuffer		framebuffer;
5177 			{ { 0, 0 }, { renderSize.x(), renderSize.y() } },	//	VkRect2D			renderArea;
5178 			1u,													//	deUint32			clearValueCount;
5179 			&clearValue,										//	const VkClearValue*	pClearValues;
5180 		};
5181 		vk.cmdBeginRenderPass(*cmdBuf, &passBeginParams, VK_SUBPASS_CONTENTS_INLINE);
5182 	}
5183 
5184 	vk.cmdBindPipeline(*cmdBuf, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
5185 	{
5186 		const VkDeviceSize bindingOffset = 0;
5187 		vk.cmdBindVertexBuffers(*cmdBuf, 0u, 1u, &vertexBuffer.get(), &bindingOffset);
5188 	}
5189 	vk.cmdDraw(*cmdBuf, deUint32(vertexCount), 1u /*run pipeline once*/, 0u /*first vertex*/, 0u /*first instanceIndex*/);
5190 	vk.cmdEndRenderPass(*cmdBuf);
5191 
5192 	{
5193 		const VkImageMemoryBarrier	renderFinishBarrier	=
5194 		{
5195 			VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,		//	VkStructureType			sType;
5196 			DE_NULL,									//	const void*				pNext;
5197 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,		//	VkMemoryOutputFlags		outputMask;
5198 			VK_ACCESS_TRANSFER_READ_BIT,				//	VkMemoryInputFlags		inputMask;
5199 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,	//	VkImageLayout			oldLayout;
5200 			VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,		//	VkImageLayout			newLayout;
5201 			queueFamilyIndex,							//	deUint32				srcQueueFamilyIndex;
5202 			queueFamilyIndex,							//	deUint32				destQueueFamilyIndex;
5203 			*image,										//	VkImage					image;
5204 			{
5205 				VK_IMAGE_ASPECT_COLOR_BIT,					//	VkImageAspectFlags	aspectMask;
5206 				0u,											//	deUint32			baseMipLevel;
5207 				1u,											//	deUint32			mipLevels;
5208 				0u,											//	deUint32			baseArraySlice;
5209 				1u,											//	deUint32			arraySize;
5210 			}											//	VkImageSubresourceRange	subresourceRange;
5211 		};
5212 		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 0, (const VkBufferMemoryBarrier*)DE_NULL, 1, &renderFinishBarrier);
5213 	}
5214 
5215 	{
5216 		const VkBufferImageCopy	copyParams	=
5217 		{
5218 			(VkDeviceSize)0u,						//	VkDeviceSize			bufferOffset;
5219 			(deUint32)renderSize.x(),				//	deUint32				bufferRowLength;
5220 			(deUint32)renderSize.y(),				//	deUint32				bufferImageHeight;
5221 			{
5222 				VK_IMAGE_ASPECT_COLOR_BIT,				//	VkImageAspect		aspect;
5223 				0u,										//	deUint32			mipLevel;
5224 				0u,										//	deUint32			arrayLayer;
5225 				1u,										//	deUint32			arraySize;
5226 			},										//	VkImageSubresourceCopy	imageSubresource;
5227 			{ 0u, 0u, 0u },							//	VkOffset3D				imageOffset;
5228 			{ renderSize.x(), renderSize.y(), 1u }	//	VkExtent3D				imageExtent;
5229 		};
5230 		vk.cmdCopyImageToBuffer(*cmdBuf, *image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
5231 	}
5232 
5233 	{
5234 		const VkBufferMemoryBarrier	copyFinishBarrier	=
5235 		{
5236 			VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	//	VkStructureType		sType;
5237 			DE_NULL,									//	const void*			pNext;
5238 			VK_ACCESS_TRANSFER_WRITE_BIT,				//	VkMemoryOutputFlags	outputMask;
5239 			VK_ACCESS_HOST_READ_BIT,					//	VkMemoryInputFlags	inputMask;
5240 			queueFamilyIndex,							//	deUint32			srcQueueFamilyIndex;
5241 			queueFamilyIndex,							//	deUint32			destQueueFamilyIndex;
5242 			*readImageBuffer,							//	VkBuffer			buffer;
5243 			0u,											//	VkDeviceSize		offset;
5244 			imageSizeBytes								//	VkDeviceSize		size;
5245 		};
5246 		vk.cmdPipelineBarrier(*cmdBuf, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, (const VkMemoryBarrier*)DE_NULL, 1, &copyFinishBarrier, 0, (const VkImageMemoryBarrier*)DE_NULL);
5247 	}
5248 
5249 	VK_CHECK(vk.endCommandBuffer(*cmdBuf));
5250 
5251 	// Upload vertex data
5252 	{
5253 		const VkMappedMemoryRange	range			=
5254 		{
5255 			VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,	//	VkStructureType	sType;
5256 			DE_NULL,								//	const void*		pNext;
5257 			vertexBufferMemory->getMemory(),		//	VkDeviceMemory	mem;
5258 			0,										//	VkDeviceSize	offset;
5259 			(VkDeviceSize)sizeof(vertexData),		//	VkDeviceSize	size;
5260 		};
5261 		void*						vertexBufPtr	= vertexBufferMemory->getHostPtr();
5262 
5263 		deMemcpy(vertexBufPtr, &vertexData[0], sizeof(vertexData));
5264 		VK_CHECK(vk.flushMappedMemoryRanges(vkDevice, 1u, &range));
5265 	}
5266 
5267 	// Submit & wait for completion
5268 	{
5269 		const VkFenceCreateInfo	fenceParams	=
5270 		{
5271 			VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,	//	VkStructureType		sType;
5272 			DE_NULL,								//	const void*			pNext;
5273 			0u,										//	VkFenceCreateFlags	flags;
5274 		};
5275 		const Unique<VkFence>	fence		(createFence(vk, vkDevice, &fenceParams));
5276 		const VkSubmitInfo		submitInfo	=
5277 		{
5278 			VK_STRUCTURE_TYPE_SUBMIT_INFO,
5279 			DE_NULL,
5280 			0u,
5281 			(const VkSemaphore*)DE_NULL,
5282 			(const VkPipelineStageFlags*)DE_NULL,
5283 			1u,
5284 			&cmdBuf.get(),
5285 			0u,
5286 			(const VkSemaphore*)DE_NULL,
5287 		};
5288 
5289 		VK_CHECK(vk.queueSubmit(queue, 1u, &submitInfo, *fence));
5290 		VK_CHECK(vk.waitForFences(vkDevice, 1u, &fence.get(), DE_TRUE, ~0ull));
5291 	}
5292 
5293 	const void* imagePtr	= readImageBufferMemory->getHostPtr();
5294 	const tcu::ConstPixelBufferAccess pixelBuffer(tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8),
5295 												  renderSize.x(), renderSize.y(), 1, imagePtr);
5296 	// Log image
5297 	{
5298 		const VkMappedMemoryRange	range		=
5299 		{
5300 			VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,	//	VkStructureType	sType;
5301 			DE_NULL,								//	const void*		pNext;
5302 			readImageBufferMemory->getMemory(),		//	VkDeviceMemory	mem;
5303 			0,										//	VkDeviceSize	offset;
5304 			imageSizeBytes,							//	VkDeviceSize	size;
5305 		};
5306 
5307 		VK_CHECK(vk.invalidateMappedMemoryRanges(vkDevice, 1u, &range));
5308 		context.getTestContext().getLog() << TestLog::Image("Result", "Result", pixelBuffer);
5309 	}
5310 
5311 	const RGBA threshold(1, 1, 1, 1);
5312 	const RGBA upperLeft(pixelBuffer.getPixel(1, 1));
5313 	if (!tcu::compareThreshold(upperLeft, instance.outputColors[0], threshold))
5314 		return TestStatus::fail("Upper left corner mismatch");
5315 
5316 	const RGBA upperRight(pixelBuffer.getPixel(pixelBuffer.getWidth() - 1, 1));
5317 	if (!tcu::compareThreshold(upperRight, instance.outputColors[1], threshold))
5318 		return TestStatus::fail("Upper right corner mismatch");
5319 
5320 	const RGBA lowerLeft(pixelBuffer.getPixel(1, pixelBuffer.getHeight() - 1));
5321 	if (!tcu::compareThreshold(lowerLeft, instance.outputColors[2], threshold))
5322 		return TestStatus::fail("Lower left corner mismatch");
5323 
5324 	const RGBA lowerRight(pixelBuffer.getPixel(pixelBuffer.getWidth() - 1, pixelBuffer.getHeight() - 1));
5325 	if (!tcu::compareThreshold(lowerRight, instance.outputColors[3], threshold))
5326 		return TestStatus::fail("Lower right corner mismatch");
5327 
5328 	return TestStatus::pass("Rendered output matches input");
5329 }
5330 
createTestsForAllStages(const std::string & name,const RGBA (& inputColors)[4],const RGBA (& outputColors)[4],const map<string,string> & testCodeFragments,const vector<deInt32> & specConstants,tcu::TestCaseGroup * tests)5331 void createTestsForAllStages (const std::string& name, const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, const vector<deInt32>& specConstants, tcu::TestCaseGroup* tests)
5332 {
5333 	const ShaderElement		vertFragPipelineStages[]		=
5334 	{
5335 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5336 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5337 	};
5338 
5339 	const ShaderElement		tessPipelineStages[]			=
5340 	{
5341 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5342 		ShaderElement("tessc", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
5343 		ShaderElement("tesse", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
5344 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5345 	};
5346 
5347 	const ShaderElement		geomPipelineStages[]				=
5348 	{
5349 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
5350 		ShaderElement("geom", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
5351 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
5352 	};
5353 
5354 	StageToSpecConstantMap	specConstantMap;
5355 
5356 	specConstantMap[VK_SHADER_STAGE_VERTEX_BIT] = specConstants;
5357 	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_vert", "", addShaderCodeCustomVertex, runAndVerifyDefaultPipeline,
5358 												 createInstanceContext(vertFragPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5359 
5360 	specConstantMap.clear();
5361 	specConstantMap[VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT] = specConstants;
5362 	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_tessc", "", addShaderCodeCustomTessControl, runAndVerifyDefaultPipeline,
5363 												 createInstanceContext(tessPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5364 
5365 	specConstantMap.clear();
5366 	specConstantMap[VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT] = specConstants;
5367 	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_tesse", "", addShaderCodeCustomTessEval, runAndVerifyDefaultPipeline,
5368 												 createInstanceContext(tessPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5369 
5370 	specConstantMap.clear();
5371 	specConstantMap[VK_SHADER_STAGE_GEOMETRY_BIT] = specConstants;
5372 	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_geom", "", addShaderCodeCustomGeometry, runAndVerifyDefaultPipeline,
5373 												 createInstanceContext(geomPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5374 
5375 	specConstantMap.clear();
5376 	specConstantMap[VK_SHADER_STAGE_FRAGMENT_BIT] = specConstants;
5377 	addFunctionCaseWithPrograms<InstanceContext>(tests, name + "_frag", "", addShaderCodeCustomFragment, runAndVerifyDefaultPipeline,
5378 												 createInstanceContext(vertFragPipelineStages, inputColors, outputColors, testCodeFragments, specConstantMap));
5379 }
5380 
createTestsForAllStages(const std::string & name,const RGBA (& inputColors)[4],const RGBA (& outputColors)[4],const map<string,string> & testCodeFragments,tcu::TestCaseGroup * tests)5381 inline void createTestsForAllStages (const std::string& name, const RGBA (&inputColors)[4], const RGBA (&outputColors)[4], const map<string, string>& testCodeFragments, tcu::TestCaseGroup* tests)
5382 {
5383 	vector<deInt32> noSpecConstants;
5384 	createTestsForAllStages(name, inputColors, outputColors, testCodeFragments, noSpecConstants, tests);
5385 }
5386 
5387 } // anonymous
5388 
createOpSourceTests(tcu::TestContext & testCtx)5389 tcu::TestCaseGroup* createOpSourceTests (tcu::TestContext& testCtx)
5390 {
5391 	struct NameCodePair { string name, code; };
5392 	RGBA							defaultColors[4];
5393 	de::MovePtr<tcu::TestCaseGroup> opSourceTests			(new tcu::TestCaseGroup(testCtx, "opsource", "OpSource instruction"));
5394 	const std::string				opsourceGLSLWithFile	= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile ";
5395 	map<string, string>				fragments				= passthruFragments();
5396 	const NameCodePair				tests[]					=
5397 	{
5398 		{"unknown", "OpSource Unknown 321"},
5399 		{"essl", "OpSource ESSL 310"},
5400 		{"glsl", "OpSource GLSL 450"},
5401 		{"opencl_cpp", "OpSource OpenCL_CPP 120"},
5402 		{"opencl_c", "OpSource OpenCL_C 120"},
5403 		{"multiple", "OpSource GLSL 450\nOpSource GLSL 450"},
5404 		{"file", opsourceGLSLWithFile},
5405 		{"source", opsourceGLSLWithFile + "\"void main(){}\""},
5406 		// Longest possible source string: SPIR-V limits instructions to 65535
5407 		// words, of which the first 4 are opsourceGLSLWithFile; the rest will
5408 		// contain 65530 UTF8 characters (one word each) plus one last word
5409 		// containing 3 ASCII characters and \0.
5410 		{"longsource", opsourceGLSLWithFile + '"' + makeLongUTF8String(65530) + "ccc" + '"'}
5411 	};
5412 
5413 	getDefaultColors(defaultColors);
5414 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
5415 	{
5416 		fragments["debug"] = tests[testNdx].code;
5417 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
5418 	}
5419 
5420 	return opSourceTests.release();
5421 }
5422 
createOpSourceContinuedTests(tcu::TestContext & testCtx)5423 tcu::TestCaseGroup* createOpSourceContinuedTests (tcu::TestContext& testCtx)
5424 {
5425 	struct NameCodePair { string name, code; };
5426 	RGBA								defaultColors[4];
5427 	de::MovePtr<tcu::TestCaseGroup>		opSourceTests		(new tcu::TestCaseGroup(testCtx, "opsourcecontinued", "OpSourceContinued instruction"));
5428 	map<string, string>					fragments			= passthruFragments();
5429 	const std::string					opsource			= "%opsrcfile = OpString \"foo.vert\"\nOpSource GLSL 450 %opsrcfile \"void main(){}\"\n";
5430 	const NameCodePair					tests[]				=
5431 	{
5432 		{"empty", opsource + "OpSourceContinued \"\""},
5433 		{"short", opsource + "OpSourceContinued \"abcde\""},
5434 		{"multiple", opsource + "OpSourceContinued \"abcde\"\nOpSourceContinued \"fghij\""},
5435 		// Longest possible source string: SPIR-V limits instructions to 65535
5436 		// words, of which the first one is OpSourceContinued/length; the rest
5437 		// will contain 65533 UTF8 characters (one word each) plus one last word
5438 		// containing 3 ASCII characters and \0.
5439 		{"long", opsource + "OpSourceContinued \"" + makeLongUTF8String(65533) + "ccc\""}
5440 	};
5441 
5442 	getDefaultColors(defaultColors);
5443 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
5444 	{
5445 		fragments["debug"] = tests[testNdx].code;
5446 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opSourceTests.get());
5447 	}
5448 
5449 	return opSourceTests.release();
5450 }
5451 
createOpNoLineTests(tcu::TestContext & testCtx)5452 tcu::TestCaseGroup* createOpNoLineTests(tcu::TestContext& testCtx)
5453 {
5454 	RGBA								 defaultColors[4];
5455 	de::MovePtr<tcu::TestCaseGroup>		 opLineTests		 (new tcu::TestCaseGroup(testCtx, "opnoline", "OpNoLine instruction"));
5456 	map<string, string>					 fragments;
5457 	getDefaultColors(defaultColors);
5458 	fragments["debug"]			=
5459 		"%name = OpString \"name\"\n";
5460 
5461 	fragments["pre_main"]	=
5462 		"OpNoLine\n"
5463 		"OpNoLine\n"
5464 		"OpLine %name 1 1\n"
5465 		"OpNoLine\n"
5466 		"OpLine %name 1 1\n"
5467 		"OpLine %name 1 1\n"
5468 		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
5469 		"OpNoLine\n"
5470 		"OpLine %name 1 1\n"
5471 		"OpNoLine\n"
5472 		"OpLine %name 1 1\n"
5473 		"OpLine %name 1 1\n"
5474 		"%second_param1 = OpFunctionParameter %v4f32\n"
5475 		"OpNoLine\n"
5476 		"OpNoLine\n"
5477 		"%label_secondfunction = OpLabel\n"
5478 		"OpNoLine\n"
5479 		"OpReturnValue %second_param1\n"
5480 		"OpFunctionEnd\n"
5481 		"OpNoLine\n"
5482 		"OpNoLine\n";
5483 
5484 	fragments["testfun"]		=
5485 		// A %test_code function that returns its argument unchanged.
5486 		"OpNoLine\n"
5487 		"OpNoLine\n"
5488 		"OpLine %name 1 1\n"
5489 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5490 		"OpNoLine\n"
5491 		"%param1 = OpFunctionParameter %v4f32\n"
5492 		"OpNoLine\n"
5493 		"OpNoLine\n"
5494 		"%label_testfun = OpLabel\n"
5495 		"OpNoLine\n"
5496 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
5497 		"OpReturnValue %val1\n"
5498 		"OpFunctionEnd\n"
5499 		"OpLine %name 1 1\n"
5500 		"OpNoLine\n";
5501 
5502 	createTestsForAllStages("opnoline", defaultColors, defaultColors, fragments, opLineTests.get());
5503 
5504 	return opLineTests.release();
5505 }
5506 
5507 
createOpLineTests(tcu::TestContext & testCtx)5508 tcu::TestCaseGroup* createOpLineTests(tcu::TestContext& testCtx)
5509 {
5510 	RGBA													defaultColors[4];
5511 	de::MovePtr<tcu::TestCaseGroup>							opLineTests			(new tcu::TestCaseGroup(testCtx, "opline", "OpLine instruction"));
5512 	map<string, string>										fragments;
5513 	std::vector<std::pair<std::string, std::string> >		problemStrings;
5514 
5515 	problemStrings.push_back(std::make_pair<std::string, std::string>("empty_name", ""));
5516 	problemStrings.push_back(std::make_pair<std::string, std::string>("short_name", "short_name"));
5517 	problemStrings.push_back(std::make_pair<std::string, std::string>("long_name", makeLongUTF8String(65530) + "ccc"));
5518 	getDefaultColors(defaultColors);
5519 
5520 	fragments["debug"]			=
5521 		"%other_name = OpString \"other_name\"\n";
5522 
5523 	fragments["pre_main"]	=
5524 		"OpLine %file_name 32 0\n"
5525 		"OpLine %file_name 32 32\n"
5526 		"OpLine %file_name 32 40\n"
5527 		"OpLine %other_name 32 40\n"
5528 		"OpLine %other_name 0 100\n"
5529 		"OpLine %other_name 0 4294967295\n"
5530 		"OpLine %other_name 4294967295 0\n"
5531 		"OpLine %other_name 32 40\n"
5532 		"OpLine %file_name 0 0\n"
5533 		"%second_function = OpFunction %v4f32 None %v4f32_function\n"
5534 		"OpLine %file_name 1 0\n"
5535 		"%second_param1 = OpFunctionParameter %v4f32\n"
5536 		"OpLine %file_name 1 3\n"
5537 		"OpLine %file_name 1 2\n"
5538 		"%label_secondfunction = OpLabel\n"
5539 		"OpLine %file_name 0 2\n"
5540 		"OpReturnValue %second_param1\n"
5541 		"OpFunctionEnd\n"
5542 		"OpLine %file_name 0 2\n"
5543 		"OpLine %file_name 0 2\n";
5544 
5545 	fragments["testfun"]		=
5546 		// A %test_code function that returns its argument unchanged.
5547 		"OpLine %file_name 1 0\n"
5548 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5549 		"OpLine %file_name 16 330\n"
5550 		"%param1 = OpFunctionParameter %v4f32\n"
5551 		"OpLine %file_name 14 442\n"
5552 		"%label_testfun = OpLabel\n"
5553 		"OpLine %file_name 11 1024\n"
5554 		"%val1 = OpFunctionCall %v4f32 %second_function %param1\n"
5555 		"OpLine %file_name 2 97\n"
5556 		"OpReturnValue %val1\n"
5557 		"OpFunctionEnd\n"
5558 		"OpLine %file_name 5 32\n";
5559 
5560 	for (size_t i = 0; i < problemStrings.size(); ++i)
5561 	{
5562 		map<string, string> testFragments = fragments;
5563 		testFragments["debug"] += "%file_name = OpString \"" + problemStrings[i].second + "\"\n";
5564 		createTestsForAllStages(string("opline") + "_" + problemStrings[i].first, defaultColors, defaultColors, testFragments, opLineTests.get());
5565 	}
5566 
5567 	return opLineTests.release();
5568 }
5569 
createOpConstantNullTests(tcu::TestContext & testCtx)5570 tcu::TestCaseGroup* createOpConstantNullTests(tcu::TestContext& testCtx)
5571 {
5572 	de::MovePtr<tcu::TestCaseGroup> opConstantNullTests		(new tcu::TestCaseGroup(testCtx, "opconstantnull", "OpConstantNull instruction"));
5573 	RGBA							colors[4];
5574 
5575 
5576 	const char						functionStart[] =
5577 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5578 		"%param1 = OpFunctionParameter %v4f32\n"
5579 		"%lbl    = OpLabel\n";
5580 
5581 	const char						functionEnd[]	=
5582 		"OpReturnValue %transformed_param\n"
5583 		"OpFunctionEnd\n";
5584 
5585 	struct NameConstantsCode
5586 	{
5587 		string name;
5588 		string constants;
5589 		string code;
5590 	};
5591 
5592 	NameConstantsCode tests[] =
5593 	{
5594 		{
5595 			"vec4",
5596 			"%cnull = OpConstantNull %v4f32\n",
5597 			"%transformed_param = OpFAdd %v4f32 %param1 %cnull\n"
5598 		},
5599 		{
5600 			"float",
5601 			"%cnull = OpConstantNull %f32\n",
5602 			"%vp = OpVariable %fp_v4f32 Function\n"
5603 			"%v  = OpLoad %v4f32 %vp\n"
5604 			"%v0 = OpVectorInsertDynamic %v4f32 %v %cnull %c_i32_0\n"
5605 			"%v1 = OpVectorInsertDynamic %v4f32 %v0 %cnull %c_i32_1\n"
5606 			"%v2 = OpVectorInsertDynamic %v4f32 %v1 %cnull %c_i32_2\n"
5607 			"%v3 = OpVectorInsertDynamic %v4f32 %v2 %cnull %c_i32_3\n"
5608 			"%transformed_param = OpFAdd %v4f32 %param1 %v3\n"
5609 		},
5610 		{
5611 			"bool",
5612 			"%cnull             = OpConstantNull %bool\n",
5613 			"%v                 = OpVariable %fp_v4f32 Function\n"
5614 			"                     OpStore %v %param1\n"
5615 			"                     OpSelectionMerge %false_label None\n"
5616 			"                     OpBranchConditional %cnull %true_label %false_label\n"
5617 			"%true_label        = OpLabel\n"
5618 			"                     OpStore %v %c_v4f32_0_5_0_5_0_5_0_5\n"
5619 			"                     OpBranch %false_label\n"
5620 			"%false_label       = OpLabel\n"
5621 			"%transformed_param = OpLoad %v4f32 %v\n"
5622 		},
5623 		{
5624 			"i32",
5625 			"%cnull             = OpConstantNull %i32\n",
5626 			"%v                 = OpVariable %fp_v4f32 Function %c_v4f32_0_5_0_5_0_5_0_5\n"
5627 			"%b                 = OpIEqual %bool %cnull %c_i32_0\n"
5628 			"                     OpSelectionMerge %false_label None\n"
5629 			"                     OpBranchConditional %b %true_label %false_label\n"
5630 			"%true_label        = OpLabel\n"
5631 			"                     OpStore %v %param1\n"
5632 			"                     OpBranch %false_label\n"
5633 			"%false_label       = OpLabel\n"
5634 			"%transformed_param = OpLoad %v4f32 %v\n"
5635 		},
5636 		{
5637 			"struct",
5638 			"%stype             = OpTypeStruct %f32 %v4f32\n"
5639 			"%fp_stype          = OpTypePointer Function %stype\n"
5640 			"%cnull             = OpConstantNull %stype\n",
5641 			"%v                 = OpVariable %fp_stype Function %cnull\n"
5642 			"%f                 = OpAccessChain %fp_v4f32 %v %c_i32_1\n"
5643 			"%f_val             = OpLoad %v4f32 %f\n"
5644 			"%transformed_param = OpFAdd %v4f32 %param1 %f_val\n"
5645 		},
5646 		{
5647 			"array",
5648 			"%a4_v4f32          = OpTypeArray %v4f32 %c_u32_4\n"
5649 			"%fp_a4_v4f32       = OpTypePointer Function %a4_v4f32\n"
5650 			"%cnull             = OpConstantNull %a4_v4f32\n",
5651 			"%v                 = OpVariable %fp_a4_v4f32 Function %cnull\n"
5652 			"%f                 = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
5653 			"%f1                = OpAccessChain %fp_v4f32 %v %c_u32_1\n"
5654 			"%f2                = OpAccessChain %fp_v4f32 %v %c_u32_2\n"
5655 			"%f3                = OpAccessChain %fp_v4f32 %v %c_u32_3\n"
5656 			"%f_val             = OpLoad %v4f32 %f\n"
5657 			"%f1_val            = OpLoad %v4f32 %f1\n"
5658 			"%f2_val            = OpLoad %v4f32 %f2\n"
5659 			"%f3_val            = OpLoad %v4f32 %f3\n"
5660 			"%t0                = OpFAdd %v4f32 %param1 %f_val\n"
5661 			"%t1                = OpFAdd %v4f32 %t0 %f1_val\n"
5662 			"%t2                = OpFAdd %v4f32 %t1 %f2_val\n"
5663 			"%transformed_param = OpFAdd %v4f32 %t2 %f3_val\n"
5664 		},
5665 		{
5666 			"matrix",
5667 			"%mat4x4_f32        = OpTypeMatrix %v4f32 4\n"
5668 			"%cnull             = OpConstantNull %mat4x4_f32\n",
5669 			// Our null matrix * any vector should result in a zero vector.
5670 			"%v                 = OpVectorTimesMatrix %v4f32 %param1 %cnull\n"
5671 			"%transformed_param = OpFAdd %v4f32 %param1 %v\n"
5672 		}
5673 	};
5674 
5675 	getHalfColorsFullAlpha(colors);
5676 
5677 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
5678 	{
5679 		map<string, string> fragments;
5680 		fragments["pre_main"] = tests[testNdx].constants;
5681 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
5682 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, opConstantNullTests.get());
5683 	}
5684 	return opConstantNullTests.release();
5685 }
createOpConstantCompositeTests(tcu::TestContext & testCtx)5686 tcu::TestCaseGroup* createOpConstantCompositeTests(tcu::TestContext& testCtx)
5687 {
5688 	de::MovePtr<tcu::TestCaseGroup> opConstantCompositeTests		(new tcu::TestCaseGroup(testCtx, "opconstantcomposite", "OpConstantComposite instruction"));
5689 	RGBA							inputColors[4];
5690 	RGBA							outputColors[4];
5691 
5692 
5693 	const char						functionStart[]	 =
5694 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5695 		"%param1 = OpFunctionParameter %v4f32\n"
5696 		"%lbl    = OpLabel\n";
5697 
5698 	const char						functionEnd[]		=
5699 		"OpReturnValue %transformed_param\n"
5700 		"OpFunctionEnd\n";
5701 
5702 	struct NameConstantsCode
5703 	{
5704 		string name;
5705 		string constants;
5706 		string code;
5707 	};
5708 
5709 	NameConstantsCode tests[] =
5710 	{
5711 		{
5712 			"vec4",
5713 
5714 			"%cval              = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_0\n",
5715 			"%transformed_param = OpFAdd %v4f32 %param1 %cval\n"
5716 		},
5717 		{
5718 			"struct",
5719 
5720 			"%stype             = OpTypeStruct %v4f32 %f32\n"
5721 			"%fp_stype          = OpTypePointer Function %stype\n"
5722 			"%f32_n_1           = OpConstant %f32 -1.0\n"
5723 			"%f32_1_5           = OpConstant %f32 !0x3fc00000\n" // +1.5
5724 			"%cvec              = OpConstantComposite %v4f32 %f32_1_5 %f32_1_5 %f32_1_5 %c_f32_1\n"
5725 			"%cval              = OpConstantComposite %stype %cvec %f32_n_1\n",
5726 
5727 			"%v                 = OpVariable %fp_stype Function %cval\n"
5728 			"%vec_ptr           = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
5729 			"%f32_ptr           = OpAccessChain %fp_f32 %v %c_u32_1\n"
5730 			"%vec_val           = OpLoad %v4f32 %vec_ptr\n"
5731 			"%f32_val           = OpLoad %f32 %f32_ptr\n"
5732 			"%tmp1              = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_1 %f32_val\n" // vec4(-1)
5733 			"%tmp2              = OpFAdd %v4f32 %tmp1 %param1\n" // param1 + vec4(-1)
5734 			"%transformed_param = OpFAdd %v4f32 %tmp2 %vec_val\n" // param1 + vec4(-1) + vec4(1.5, 1.5, 1.5, 1.0)
5735 		},
5736 		{
5737 			// [1|0|0|0.5] [x] = x + 0.5
5738 			// [0|1|0|0.5] [y] = y + 0.5
5739 			// [0|0|1|0.5] [z] = z + 0.5
5740 			// [0|0|0|1  ] [1] = 1
5741 			"matrix",
5742 
5743 			"%mat4x4_f32          = OpTypeMatrix %v4f32 4\n"
5744 		    "%v4f32_1_0_0_0       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_0 %c_f32_0 %c_f32_0\n"
5745 		    "%v4f32_0_1_0_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_1 %c_f32_0 %c_f32_0\n"
5746 		    "%v4f32_0_0_1_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_1 %c_f32_0\n"
5747 		    "%v4f32_0_5_0_5_0_5_1 = OpConstantComposite %v4f32 %c_f32_0_5 %c_f32_0_5 %c_f32_0_5 %c_f32_1\n"
5748 			"%cval                = OpConstantComposite %mat4x4_f32 %v4f32_1_0_0_0 %v4f32_0_1_0_0 %v4f32_0_0_1_0 %v4f32_0_5_0_5_0_5_1\n",
5749 
5750 			"%transformed_param   = OpMatrixTimesVector %v4f32 %cval %param1\n"
5751 		},
5752 		{
5753 			"array",
5754 
5755 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
5756 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
5757 			"%f32_n_1             = OpConstant %f32 -1.0\n"
5758 			"%f32_1_5             = OpConstant %f32 !0x3fc00000\n" // +1.5
5759 			"%carr                = OpConstantComposite %a4f32 %c_f32_0 %f32_n_1 %f32_1_5 %c_f32_0\n",
5760 
5761 			"%v                   = OpVariable %fp_a4f32 Function %carr\n"
5762 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_0\n"
5763 			"%f1                  = OpAccessChain %fp_f32 %v %c_u32_1\n"
5764 			"%f2                  = OpAccessChain %fp_f32 %v %c_u32_2\n"
5765 			"%f3                  = OpAccessChain %fp_f32 %v %c_u32_3\n"
5766 			"%f_val               = OpLoad %f32 %f\n"
5767 			"%f1_val              = OpLoad %f32 %f1\n"
5768 			"%f2_val              = OpLoad %f32 %f2\n"
5769 			"%f3_val              = OpLoad %f32 %f3\n"
5770 			"%ftot1               = OpFAdd %f32 %f_val %f1_val\n"
5771 			"%ftot2               = OpFAdd %f32 %ftot1 %f2_val\n"
5772 			"%ftot3               = OpFAdd %f32 %ftot2 %f3_val\n"  // 0 - 1 + 1.5 + 0
5773 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %ftot3\n"
5774 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
5775 		},
5776 		{
5777 			//
5778 			// [
5779 			//   {
5780 			//      0.0,
5781 			//      [ 1.0, 1.0, 1.0, 1.0]
5782 			//   },
5783 			//   {
5784 			//      1.0,
5785 			//      [ 0.0, 0.5, 0.0, 0.0]
5786 			//   }, //     ^^^
5787 			//   {
5788 			//      0.0,
5789 			//      [ 1.0, 1.0, 1.0, 1.0]
5790 			//   }
5791 			// ]
5792 			"array_of_struct_of_array",
5793 
5794 			"%c_v4f32_1_1_1_0     = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_0\n"
5795 			"%fp_a4f32            = OpTypePointer Function %a4f32\n"
5796 			"%stype               = OpTypeStruct %f32 %a4f32\n"
5797 			"%a3stype             = OpTypeArray %stype %c_u32_3\n"
5798 			"%fp_a3stype          = OpTypePointer Function %a3stype\n"
5799 			"%ca4f32_0            = OpConstantComposite %a4f32 %c_f32_0 %c_f32_0_5 %c_f32_0 %c_f32_0\n"
5800 			"%ca4f32_1            = OpConstantComposite %a4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
5801 			"%cstype1             = OpConstantComposite %stype %c_f32_0 %ca4f32_1\n"
5802 			"%cstype2             = OpConstantComposite %stype %c_f32_1 %ca4f32_0\n"
5803 			"%carr                = OpConstantComposite %a3stype %cstype1 %cstype2 %cstype1",
5804 
5805 			"%v                   = OpVariable %fp_a3stype Function %carr\n"
5806 			"%f                   = OpAccessChain %fp_f32 %v %c_u32_1 %c_u32_1 %c_u32_1\n"
5807 			"%f_l                 = OpLoad %f32 %f\n"
5808 			"%add_vec             = OpVectorTimesScalar %v4f32 %c_v4f32_1_1_1_0 %f_l\n"
5809 			"%transformed_param   = OpFAdd %v4f32 %param1 %add_vec\n"
5810 		}
5811 	};
5812 
5813 	getHalfColorsFullAlpha(inputColors);
5814 	outputColors[0] = RGBA(255, 255, 255, 255);
5815 	outputColors[1] = RGBA(255, 127, 127, 255);
5816 	outputColors[2] = RGBA(127, 255, 127, 255);
5817 	outputColors[3] = RGBA(127, 127, 255, 255);
5818 
5819 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameConstantsCode); ++testNdx)
5820 	{
5821 		map<string, string> fragments;
5822 		fragments["pre_main"] = tests[testNdx].constants;
5823 		fragments["testfun"] = string(functionStart) + tests[testNdx].code + functionEnd;
5824 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, opConstantCompositeTests.get());
5825 	}
5826 	return opConstantCompositeTests.release();
5827 }
5828 
createSelectionBlockOrderTests(tcu::TestContext & testCtx)5829 tcu::TestCaseGroup* createSelectionBlockOrderTests(tcu::TestContext& testCtx)
5830 {
5831 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "selection_block_order", "Out-of-order blocks for selection"));
5832 	RGBA							inputColors[4];
5833 	RGBA							outputColors[4];
5834 	map<string, string>				fragments;
5835 
5836 	// vec4 test_code(vec4 param) {
5837 	//   vec4 result = param;
5838 	//   for (int i = 0; i < 4; ++i) {
5839 	//     if (i == 0) result[i] = 0.;
5840 	//     else        result[i] = 1. - result[i];
5841 	//   }
5842 	//   return result;
5843 	// }
5844 	const char						function[]			=
5845 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5846 		"%param1    = OpFunctionParameter %v4f32\n"
5847 		"%lbl       = OpLabel\n"
5848 		"%iptr      = OpVariable %fp_i32 Function\n"
5849 		"%result    = OpVariable %fp_v4f32 Function\n"
5850 		"             OpStore %iptr %c_i32_0\n"
5851 		"             OpStore %result %param1\n"
5852 		"             OpBranch %loop\n"
5853 
5854 		// Loop entry block.
5855 		"%loop      = OpLabel\n"
5856 		"%ival      = OpLoad %i32 %iptr\n"
5857 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
5858 		"             OpLoopMerge %exit %loop None\n"
5859 		"             OpBranchConditional %lt_4 %if_entry %exit\n"
5860 
5861 		// Merge block for loop.
5862 		"%exit      = OpLabel\n"
5863 		"%ret       = OpLoad %v4f32 %result\n"
5864 		"             OpReturnValue %ret\n"
5865 
5866 		// If-statement entry block.
5867 		"%if_entry  = OpLabel\n"
5868 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
5869 		"%eq_0      = OpIEqual %bool %ival %c_i32_0\n"
5870 		"             OpSelectionMerge %if_exit None\n"
5871 		"             OpBranchConditional %eq_0 %if_true %if_false\n"
5872 
5873 		// False branch for if-statement.
5874 		"%if_false  = OpLabel\n"
5875 		"%val       = OpLoad %f32 %loc\n"
5876 		"%sub       = OpFSub %f32 %c_f32_1 %val\n"
5877 		"             OpStore %loc %sub\n"
5878 		"             OpBranch %if_exit\n"
5879 
5880 		// Merge block for if-statement.
5881 		"%if_exit   = OpLabel\n"
5882 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
5883 		"             OpStore %iptr %ival_next\n"
5884 		"             OpBranch %loop\n"
5885 
5886 		// True branch for if-statement.
5887 		"%if_true   = OpLabel\n"
5888 		"             OpStore %loc %c_f32_0\n"
5889 		"             OpBranch %if_exit\n"
5890 
5891 		"             OpFunctionEnd\n";
5892 
5893 	fragments["testfun"]	= function;
5894 
5895 	inputColors[0]			= RGBA(127, 127, 127, 0);
5896 	inputColors[1]			= RGBA(127, 0,   0,   0);
5897 	inputColors[2]			= RGBA(0,   127, 0,   0);
5898 	inputColors[3]			= RGBA(0,   0,   127, 0);
5899 
5900 	outputColors[0]			= RGBA(0, 128, 128, 255);
5901 	outputColors[1]			= RGBA(0, 255, 255, 255);
5902 	outputColors[2]			= RGBA(0, 128, 255, 255);
5903 	outputColors[3]			= RGBA(0, 255, 128, 255);
5904 
5905 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
5906 
5907 	return group.release();
5908 }
5909 
createSwitchBlockOrderTests(tcu::TestContext & testCtx)5910 tcu::TestCaseGroup* createSwitchBlockOrderTests(tcu::TestContext& testCtx)
5911 {
5912 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "switch_block_order", "Out-of-order blocks for switch"));
5913 	RGBA							inputColors[4];
5914 	RGBA							outputColors[4];
5915 	map<string, string>				fragments;
5916 
5917 	const char						typesAndConstants[]	=
5918 		"%c_f32_p2  = OpConstant %f32 0.2\n"
5919 		"%c_f32_p4  = OpConstant %f32 0.4\n"
5920 		"%c_f32_p6  = OpConstant %f32 0.6\n"
5921 		"%c_f32_p8  = OpConstant %f32 0.8\n";
5922 
5923 	// vec4 test_code(vec4 param) {
5924 	//   vec4 result = param;
5925 	//   for (int i = 0; i < 4; ++i) {
5926 	//     switch (i) {
5927 	//       case 0: result[i] += .2; break;
5928 	//       case 1: result[i] += .6; break;
5929 	//       case 2: result[i] += .4; break;
5930 	//       case 3: result[i] += .8; break;
5931 	//       default: break; // unreachable
5932 	//     }
5933 	//   }
5934 	//   return result;
5935 	// }
5936 	const char						function[]			=
5937 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
5938 		"%param1    = OpFunctionParameter %v4f32\n"
5939 		"%lbl       = OpLabel\n"
5940 		"%iptr      = OpVariable %fp_i32 Function\n"
5941 		"%result    = OpVariable %fp_v4f32 Function\n"
5942 		"             OpStore %iptr %c_i32_0\n"
5943 		"             OpStore %result %param1\n"
5944 		"             OpBranch %loop\n"
5945 
5946 		// Loop entry block.
5947 		"%loop      = OpLabel\n"
5948 		"%ival      = OpLoad %i32 %iptr\n"
5949 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
5950 		"             OpLoopMerge %exit %loop None\n"
5951 		"             OpBranchConditional %lt_4 %switch_entry %exit\n"
5952 
5953 		// Merge block for loop.
5954 		"%exit      = OpLabel\n"
5955 		"%ret       = OpLoad %v4f32 %result\n"
5956 		"             OpReturnValue %ret\n"
5957 
5958 		// Switch-statement entry block.
5959 		"%switch_entry   = OpLabel\n"
5960 		"%loc            = OpAccessChain %fp_f32 %result %ival\n"
5961 		"%val            = OpLoad %f32 %loc\n"
5962 		"                  OpSelectionMerge %switch_exit None\n"
5963 		"                  OpSwitch %ival %switch_default 0 %case0 1 %case1 2 %case2 3 %case3\n"
5964 
5965 		"%case2          = OpLabel\n"
5966 		"%addp4          = OpFAdd %f32 %val %c_f32_p4\n"
5967 		"                  OpStore %loc %addp4\n"
5968 		"                  OpBranch %switch_exit\n"
5969 
5970 		"%switch_default = OpLabel\n"
5971 		"                  OpUnreachable\n"
5972 
5973 		"%case3          = OpLabel\n"
5974 		"%addp8          = OpFAdd %f32 %val %c_f32_p8\n"
5975 		"                  OpStore %loc %addp8\n"
5976 		"                  OpBranch %switch_exit\n"
5977 
5978 		"%case0          = OpLabel\n"
5979 		"%addp2          = OpFAdd %f32 %val %c_f32_p2\n"
5980 		"                  OpStore %loc %addp2\n"
5981 		"                  OpBranch %switch_exit\n"
5982 
5983 		// Merge block for switch-statement.
5984 		"%switch_exit    = OpLabel\n"
5985 		"%ival_next      = OpIAdd %i32 %ival %c_i32_1\n"
5986 		"                  OpStore %iptr %ival_next\n"
5987 		"                  OpBranch %loop\n"
5988 
5989 		"%case1          = OpLabel\n"
5990 		"%addp6          = OpFAdd %f32 %val %c_f32_p6\n"
5991 		"                  OpStore %loc %addp6\n"
5992 		"                  OpBranch %switch_exit\n"
5993 
5994 		"                  OpFunctionEnd\n";
5995 
5996 	fragments["pre_main"]	= typesAndConstants;
5997 	fragments["testfun"]	= function;
5998 
5999 	inputColors[0]			= RGBA(127, 27,  127, 51);
6000 	inputColors[1]			= RGBA(127, 0,   0,   51);
6001 	inputColors[2]			= RGBA(0,   27,  0,   51);
6002 	inputColors[3]			= RGBA(0,   0,   127, 51);
6003 
6004 	outputColors[0]			= RGBA(178, 180, 229, 255);
6005 	outputColors[1]			= RGBA(178, 153, 102, 255);
6006 	outputColors[2]			= RGBA(51,  180, 102, 255);
6007 	outputColors[3]			= RGBA(51,  153, 229, 255);
6008 
6009 	createTestsForAllStages("out_of_order", inputColors, outputColors, fragments, group.get());
6010 
6011 	return group.release();
6012 }
6013 
createDecorationGroupTests(tcu::TestContext & testCtx)6014 tcu::TestCaseGroup* createDecorationGroupTests(tcu::TestContext& testCtx)
6015 {
6016 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "decoration_group", "Decoration group tests"));
6017 	RGBA							inputColors[4];
6018 	RGBA							outputColors[4];
6019 	map<string, string>				fragments;
6020 
6021 	const char						decorations[]		=
6022 		"OpDecorate %array_group         ArrayStride 4\n"
6023 		"OpDecorate %struct_member_group Offset 0\n"
6024 		"%array_group         = OpDecorationGroup\n"
6025 		"%struct_member_group = OpDecorationGroup\n"
6026 
6027 		"OpDecorate %group1 RelaxedPrecision\n"
6028 		"OpDecorate %group3 RelaxedPrecision\n"
6029 		"OpDecorate %group3 Invariant\n"
6030 		"OpDecorate %group3 Restrict\n"
6031 		"%group0 = OpDecorationGroup\n"
6032 		"%group1 = OpDecorationGroup\n"
6033 		"%group3 = OpDecorationGroup\n";
6034 
6035 	const char						typesAndConstants[]	=
6036 		"%a3f32     = OpTypeArray %f32 %c_u32_3\n"
6037 		"%struct1   = OpTypeStruct %a3f32\n"
6038 		"%struct2   = OpTypeStruct %a3f32\n"
6039 		"%fp_struct1 = OpTypePointer Function %struct1\n"
6040 		"%fp_struct2 = OpTypePointer Function %struct2\n"
6041 		"%c_f32_2    = OpConstant %f32 2.\n"
6042 		"%c_f32_n2   = OpConstant %f32 -2.\n"
6043 
6044 		"%c_a3f32_1 = OpConstantComposite %a3f32 %c_f32_1 %c_f32_2 %c_f32_1\n"
6045 		"%c_a3f32_2 = OpConstantComposite %a3f32 %c_f32_n1 %c_f32_n2 %c_f32_n1\n"
6046 		"%c_struct1 = OpConstantComposite %struct1 %c_a3f32_1\n"
6047 		"%c_struct2 = OpConstantComposite %struct2 %c_a3f32_2\n";
6048 
6049 	const char						function[]			=
6050 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6051 		"%param     = OpFunctionParameter %v4f32\n"
6052 		"%entry     = OpLabel\n"
6053 		"%result    = OpVariable %fp_v4f32 Function\n"
6054 		"%v_struct1 = OpVariable %fp_struct1 Function\n"
6055 		"%v_struct2 = OpVariable %fp_struct2 Function\n"
6056 		"             OpStore %result %param\n"
6057 		"             OpStore %v_struct1 %c_struct1\n"
6058 		"             OpStore %v_struct2 %c_struct2\n"
6059 		"%ptr1      = OpAccessChain %fp_f32 %v_struct1 %c_i32_0 %c_i32_2\n"
6060 		"%val1      = OpLoad %f32 %ptr1\n"
6061 		"%ptr2      = OpAccessChain %fp_f32 %v_struct2 %c_i32_0 %c_i32_2\n"
6062 		"%val2      = OpLoad %f32 %ptr2\n"
6063 		"%addvalues = OpFAdd %f32 %val1 %val2\n"
6064 		"%ptr       = OpAccessChain %fp_f32 %result %c_i32_1\n"
6065 		"%val       = OpLoad %f32 %ptr\n"
6066 		"%addresult = OpFAdd %f32 %addvalues %val\n"
6067 		"             OpStore %ptr %addresult\n"
6068 		"%ret       = OpLoad %v4f32 %result\n"
6069 		"             OpReturnValue %ret\n"
6070 		"             OpFunctionEnd\n";
6071 
6072 	struct CaseNameDecoration
6073 	{
6074 		string name;
6075 		string decoration;
6076 	};
6077 
6078 	CaseNameDecoration tests[] =
6079 	{
6080 		{
6081 			"same_decoration_group_on_multiple_types",
6082 			"OpGroupMemberDecorate %struct_member_group %struct1 0 %struct2 0\n"
6083 		},
6084 		{
6085 			"empty_decoration_group",
6086 			"OpGroupDecorate %group0      %a3f32\n"
6087 			"OpGroupDecorate %group0      %result\n"
6088 		},
6089 		{
6090 			"one_element_decoration_group",
6091 			"OpGroupDecorate %array_group %a3f32\n"
6092 		},
6093 		{
6094 			"multiple_elements_decoration_group",
6095 			"OpGroupDecorate %group3      %v_struct1\n"
6096 		},
6097 		{
6098 			"multiple_decoration_groups_on_same_variable",
6099 			"OpGroupDecorate %group0      %v_struct2\n"
6100 			"OpGroupDecorate %group1      %v_struct2\n"
6101 			"OpGroupDecorate %group3      %v_struct2\n"
6102 		},
6103 		{
6104 			"same_decoration_group_multiple_times",
6105 			"OpGroupDecorate %group1      %addvalues\n"
6106 			"OpGroupDecorate %group1      %addvalues\n"
6107 			"OpGroupDecorate %group1      %addvalues\n"
6108 		},
6109 
6110 	};
6111 
6112 	getHalfColorsFullAlpha(inputColors);
6113 	getHalfColorsFullAlpha(outputColors);
6114 
6115 	for (size_t idx = 0; idx < (sizeof(tests) / sizeof(tests[0])); ++idx)
6116 	{
6117 		fragments["decoration"]	= decorations + tests[idx].decoration;
6118 		fragments["pre_main"]	= typesAndConstants;
6119 		fragments["testfun"]	= function;
6120 
6121 		createTestsForAllStages(tests[idx].name, inputColors, outputColors, fragments, group.get());
6122 	}
6123 
6124 	return group.release();
6125 }
6126 
6127 struct SpecConstantTwoIntGraphicsCase
6128 {
6129 	const char*		caseName;
6130 	const char*		scDefinition0;
6131 	const char*		scDefinition1;
6132 	const char*		scResultType;
6133 	const char*		scOperation;
6134 	deInt32			scActualValue0;
6135 	deInt32			scActualValue1;
6136 	const char*		resultOperation;
6137 	RGBA			expectedColors[4];
6138 
SpecConstantTwoIntGraphicsCasevkt::SpirVAssembly::SpecConstantTwoIntGraphicsCase6139 					SpecConstantTwoIntGraphicsCase (const char* name,
6140 											const char* definition0,
6141 											const char* definition1,
6142 											const char* resultType,
6143 											const char* operation,
6144 											deInt32		value0,
6145 											deInt32		value1,
6146 											const char* resultOp,
6147 											const RGBA	(&output)[4])
6148 						: caseName			(name)
6149 						, scDefinition0		(definition0)
6150 						, scDefinition1		(definition1)
6151 						, scResultType		(resultType)
6152 						, scOperation		(operation)
6153 						, scActualValue0	(value0)
6154 						, scActualValue1	(value1)
6155 						, resultOperation	(resultOp)
6156 	{
6157 		expectedColors[0] = output[0];
6158 		expectedColors[1] = output[1];
6159 		expectedColors[2] = output[2];
6160 		expectedColors[3] = output[3];
6161 	}
6162 };
6163 
createSpecConstantTests(tcu::TestContext & testCtx)6164 tcu::TestCaseGroup* createSpecConstantTests (tcu::TestContext& testCtx)
6165 {
6166 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opspecconstantop", "Test the OpSpecConstantOp instruction"));
6167 	vector<SpecConstantTwoIntGraphicsCase>	cases;
6168 	RGBA							inputColors[4];
6169 	RGBA							outputColors0[4];
6170 	RGBA							outputColors1[4];
6171 	RGBA							outputColors2[4];
6172 
6173 	const char	decorations1[]			=
6174 		"OpDecorate %sc_0  SpecId 0\n"
6175 		"OpDecorate %sc_1  SpecId 1\n";
6176 
6177 	const char	typesAndConstants1[]	=
6178 		"%sc_0      = OpSpecConstant${SC_DEF0}\n"
6179 		"%sc_1      = OpSpecConstant${SC_DEF1}\n"
6180 		"%sc_op     = OpSpecConstantOp ${SC_RESULT_TYPE} ${SC_OP}\n";
6181 
6182 	const char	function1[]				=
6183 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6184 		"%param     = OpFunctionParameter %v4f32\n"
6185 		"%label     = OpLabel\n"
6186 		"%result    = OpVariable %fp_v4f32 Function\n"
6187 		"             OpStore %result %param\n"
6188 		"%gen       = ${GEN_RESULT}\n"
6189 		"%index     = OpIAdd %i32 %gen %c_i32_1\n"
6190 		"%loc       = OpAccessChain %fp_f32 %result %index\n"
6191 		"%val       = OpLoad %f32 %loc\n"
6192 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
6193 		"             OpStore %loc %add\n"
6194 		"%ret       = OpLoad %v4f32 %result\n"
6195 		"             OpReturnValue %ret\n"
6196 		"             OpFunctionEnd\n";
6197 
6198 	inputColors[0] = RGBA(127, 127, 127, 255);
6199 	inputColors[1] = RGBA(127, 0,   0,   255);
6200 	inputColors[2] = RGBA(0,   127, 0,   255);
6201 	inputColors[3] = RGBA(0,   0,   127, 255);
6202 
6203 	// Derived from inputColors[x] by adding 128 to inputColors[x][0].
6204 	outputColors0[0] = RGBA(255, 127, 127, 255);
6205 	outputColors0[1] = RGBA(255, 0,   0,   255);
6206 	outputColors0[2] = RGBA(128, 127, 0,   255);
6207 	outputColors0[3] = RGBA(128, 0,   127, 255);
6208 
6209 	// Derived from inputColors[x] by adding 128 to inputColors[x][1].
6210 	outputColors1[0] = RGBA(127, 255, 127, 255);
6211 	outputColors1[1] = RGBA(127, 128, 0,   255);
6212 	outputColors1[2] = RGBA(0,   255, 0,   255);
6213 	outputColors1[3] = RGBA(0,   128, 127, 255);
6214 
6215 	// Derived from inputColors[x] by adding 128 to inputColors[x][2].
6216 	outputColors2[0] = RGBA(127, 127, 255, 255);
6217 	outputColors2[1] = RGBA(127, 0,   128, 255);
6218 	outputColors2[2] = RGBA(0,   127, 128, 255);
6219 	outputColors2[3] = RGBA(0,   0,   255, 255);
6220 
6221 	const char addZeroToSc[]		= "OpIAdd %i32 %c_i32_0 %sc_op";
6222 	const char selectTrueUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_1 %c_i32_0";
6223 	const char selectFalseUsingSc[]	= "OpSelect %i32 %sc_op %c_i32_0 %c_i32_1";
6224 
6225 	cases.push_back(SpecConstantTwoIntGraphicsCase("iadd",					" %i32 0",		" %i32 0",		"%i32",		"IAdd                 %sc_0 %sc_1",				19,		-20,	addZeroToSc,		outputColors0));
6226 	cases.push_back(SpecConstantTwoIntGraphicsCase("isub",					" %i32 0",		" %i32 0",		"%i32",		"ISub                 %sc_0 %sc_1",				19,		20,		addZeroToSc,		outputColors0));
6227 	cases.push_back(SpecConstantTwoIntGraphicsCase("imul",					" %i32 0",		" %i32 0",		"%i32",		"IMul                 %sc_0 %sc_1",				-1,		-1,		addZeroToSc,		outputColors2));
6228 	cases.push_back(SpecConstantTwoIntGraphicsCase("sdiv",					" %i32 0",		" %i32 0",		"%i32",		"SDiv                 %sc_0 %sc_1",				-126,	126,	addZeroToSc,		outputColors0));
6229 	cases.push_back(SpecConstantTwoIntGraphicsCase("udiv",					" %i32 0",		" %i32 0",		"%i32",		"UDiv                 %sc_0 %sc_1",				126,	126,	addZeroToSc,		outputColors2));
6230 	cases.push_back(SpecConstantTwoIntGraphicsCase("srem",					" %i32 0",		" %i32 0",		"%i32",		"SRem                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
6231 	cases.push_back(SpecConstantTwoIntGraphicsCase("smod",					" %i32 0",		" %i32 0",		"%i32",		"SMod                 %sc_0 %sc_1",				3,		2,		addZeroToSc,		outputColors2));
6232 	cases.push_back(SpecConstantTwoIntGraphicsCase("umod",					" %i32 0",		" %i32 0",		"%i32",		"UMod                 %sc_0 %sc_1",				1001,	500,	addZeroToSc,		outputColors2));
6233 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseand",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseAnd           %sc_0 %sc_1",				0x33,	0x0d,	addZeroToSc,		outputColors2));
6234 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwiseor",				" %i32 0",		" %i32 0",		"%i32",		"BitwiseOr            %sc_0 %sc_1",				0,		1,		addZeroToSc,		outputColors2));
6235 	cases.push_back(SpecConstantTwoIntGraphicsCase("bitwisexor",			" %i32 0",		" %i32 0",		"%i32",		"BitwiseXor           %sc_0 %sc_1",				0x2e,	0x2f,	addZeroToSc,		outputColors2));
6236 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftRightLogical    %sc_0 %sc_1",				2,		1,		addZeroToSc,		outputColors2));
6237 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftrightarithmetic",	" %i32 0",		" %i32 0",		"%i32",		"ShiftRightArithmetic %sc_0 %sc_1",				-4,		2,		addZeroToSc,		outputColors0));
6238 	cases.push_back(SpecConstantTwoIntGraphicsCase("shiftleftlogical",		" %i32 0",		" %i32 0",		"%i32",		"ShiftLeftLogical     %sc_0 %sc_1",				1,		0,		addZeroToSc,		outputColors2));
6239 	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthan",				" %i32 0",		" %i32 0",		"%bool",	"SLessThan            %sc_0 %sc_1",				-20,	-10,	selectTrueUsingSc,	outputColors2));
6240 	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthan",				" %i32 0",		" %i32 0",		"%bool",	"ULessThan            %sc_0 %sc_1",				10,		20,		selectTrueUsingSc,	outputColors2));
6241 	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"SGreaterThan         %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
6242 	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthan",			" %i32 0",		" %i32 0",		"%bool",	"UGreaterThan         %sc_0 %sc_1",				10,		5,		selectTrueUsingSc,	outputColors2));
6243 	cases.push_back(SpecConstantTwoIntGraphicsCase("slessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SLessThanEqual       %sc_0 %sc_1",				-10,	-10,	selectTrueUsingSc,	outputColors2));
6244 	cases.push_back(SpecConstantTwoIntGraphicsCase("ulessthanequal",		" %i32 0",		" %i32 0",		"%bool",	"ULessThanEqual       %sc_0 %sc_1",				50,		100,	selectTrueUsingSc,	outputColors2));
6245 	cases.push_back(SpecConstantTwoIntGraphicsCase("sgreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"SGreaterThanEqual    %sc_0 %sc_1",				-1000,	50,		selectFalseUsingSc,	outputColors2));
6246 	cases.push_back(SpecConstantTwoIntGraphicsCase("ugreaterthanequal",		" %i32 0",		" %i32 0",		"%bool",	"UGreaterThanEqual    %sc_0 %sc_1",				10,		10,		selectTrueUsingSc,	outputColors2));
6247 	cases.push_back(SpecConstantTwoIntGraphicsCase("iequal",				" %i32 0",		" %i32 0",		"%bool",	"IEqual               %sc_0 %sc_1",				42,		24,		selectFalseUsingSc,	outputColors2));
6248 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicaland",			"True %bool",	"True %bool",	"%bool",	"LogicalAnd           %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
6249 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalor",				"False %bool",	"False %bool",	"%bool",	"LogicalOr            %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
6250 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalequal",			"True %bool",	"True %bool",	"%bool",	"LogicalEqual         %sc_0 %sc_1",				0,		1,		selectFalseUsingSc,	outputColors2));
6251 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnotequal",		"False %bool",	"False %bool",	"%bool",	"LogicalNotEqual      %sc_0 %sc_1",				1,		0,		selectTrueUsingSc,	outputColors2));
6252 	cases.push_back(SpecConstantTwoIntGraphicsCase("snegate",				" %i32 0",		" %i32 0",		"%i32",		"SNegate              %sc_0",					-1,		0,		addZeroToSc,		outputColors2));
6253 	cases.push_back(SpecConstantTwoIntGraphicsCase("not",					" %i32 0",		" %i32 0",		"%i32",		"Not                  %sc_0",					-2,		0,		addZeroToSc,		outputColors2));
6254 	cases.push_back(SpecConstantTwoIntGraphicsCase("logicalnot",			"False %bool",	"False %bool",	"%bool",	"LogicalNot           %sc_0",					1,		0,		selectFalseUsingSc,	outputColors2));
6255 	cases.push_back(SpecConstantTwoIntGraphicsCase("select",				"False %bool",	" %i32 0",		"%i32",		"Select               %sc_0 %sc_1 %c_i32_0",	1,		1,		addZeroToSc,		outputColors2));
6256 	// OpSConvert, OpFConvert: these two instructions involve ints/floats of different bitwidths.
6257 	// \todo[2015-12-1 antiagainst] OpQuantizeToF16
6258 
6259 	for (size_t caseNdx = 0; caseNdx < cases.size(); ++caseNdx)
6260 	{
6261 		map<string, string>	specializations;
6262 		map<string, string>	fragments;
6263 		vector<deInt32>		specConstants;
6264 
6265 		specializations["SC_DEF0"]			= cases[caseNdx].scDefinition0;
6266 		specializations["SC_DEF1"]			= cases[caseNdx].scDefinition1;
6267 		specializations["SC_RESULT_TYPE"]	= cases[caseNdx].scResultType;
6268 		specializations["SC_OP"]			= cases[caseNdx].scOperation;
6269 		specializations["GEN_RESULT"]		= cases[caseNdx].resultOperation;
6270 
6271 		fragments["decoration"]				= tcu::StringTemplate(decorations1).specialize(specializations);
6272 		fragments["pre_main"]				= tcu::StringTemplate(typesAndConstants1).specialize(specializations);
6273 		fragments["testfun"]				= tcu::StringTemplate(function1).specialize(specializations);
6274 
6275 		specConstants.push_back(cases[caseNdx].scActualValue0);
6276 		specConstants.push_back(cases[caseNdx].scActualValue1);
6277 
6278 		createTestsForAllStages(cases[caseNdx].caseName, inputColors, cases[caseNdx].expectedColors, fragments, specConstants, group.get());
6279 	}
6280 
6281 	const char	decorations2[]			=
6282 		"OpDecorate %sc_0  SpecId 0\n"
6283 		"OpDecorate %sc_1  SpecId 1\n"
6284 		"OpDecorate %sc_2  SpecId 2\n";
6285 
6286 	const char	typesAndConstants2[]	=
6287 		"%v3i32     = OpTypeVector %i32 3\n"
6288 
6289 		"%sc_0      = OpSpecConstant %i32 0\n"
6290 		"%sc_1      = OpSpecConstant %i32 0\n"
6291 		"%sc_2      = OpSpecConstant %i32 0\n"
6292 
6293 		"%vec3_0      = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
6294 		"%sc_vec3_0   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_0        %vec3_0    0\n"     // (sc_0, 0, 0)
6295 		"%sc_vec3_1   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_1        %vec3_0    1\n"     // (0, sc_1, 0)
6296 		"%sc_vec3_2   = OpSpecConstantOp %v3i32 CompositeInsert  %sc_2        %vec3_0    2\n"     // (0, 0, sc_2)
6297 		"%sc_vec3_01  = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_0   %sc_vec3_1 1 0 4\n" // (0,    sc_0, sc_1)
6298 		"%sc_vec3_012 = OpSpecConstantOp %v3i32 VectorShuffle    %sc_vec3_01  %sc_vec3_2 5 1 2\n" // (sc_2, sc_0, sc_1)
6299 		"%sc_ext_0    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            0\n"     // sc_2
6300 		"%sc_ext_1    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            1\n"     // sc_0
6301 		"%sc_ext_2    = OpSpecConstantOp %i32   CompositeExtract %sc_vec3_012            2\n"     // sc_1
6302 		"%sc_sub      = OpSpecConstantOp %i32   ISub             %sc_ext_0    %sc_ext_1\n"        // (sc_2 - sc_0)
6303 		"%sc_final    = OpSpecConstantOp %i32   IMul             %sc_sub      %sc_ext_2\n";       // (sc_2 - sc_0) * sc_1
6304 
6305 	const char	function2[]				=
6306 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6307 		"%param     = OpFunctionParameter %v4f32\n"
6308 		"%label     = OpLabel\n"
6309 		"%result    = OpVariable %fp_v4f32 Function\n"
6310 		"             OpStore %result %param\n"
6311 		"%loc       = OpAccessChain %fp_f32 %result %sc_final\n"
6312 		"%val       = OpLoad %f32 %loc\n"
6313 		"%add       = OpFAdd %f32 %val %c_f32_0_5\n"
6314 		"             OpStore %loc %add\n"
6315 		"%ret       = OpLoad %v4f32 %result\n"
6316 		"             OpReturnValue %ret\n"
6317 		"             OpFunctionEnd\n";
6318 
6319 	map<string, string>	fragments;
6320 	vector<deInt32>		specConstants;
6321 
6322 	fragments["decoration"]	= decorations2;
6323 	fragments["pre_main"]	= typesAndConstants2;
6324 	fragments["testfun"]	= function2;
6325 
6326 	specConstants.push_back(56789);
6327 	specConstants.push_back(-2);
6328 	specConstants.push_back(56788);
6329 
6330 	createTestsForAllStages("vector_related", inputColors, outputColors2, fragments, specConstants, group.get());
6331 
6332 	return group.release();
6333 }
6334 
createOpPhiTests(tcu::TestContext & testCtx)6335 tcu::TestCaseGroup* createOpPhiTests(tcu::TestContext& testCtx)
6336 {
6337 	de::MovePtr<tcu::TestCaseGroup> group				(new tcu::TestCaseGroup(testCtx, "opphi", "Test the OpPhi instruction"));
6338 	RGBA							inputColors[4];
6339 	RGBA							outputColors1[4];
6340 	RGBA							outputColors2[4];
6341 	RGBA							outputColors3[4];
6342 	map<string, string>				fragments1;
6343 	map<string, string>				fragments2;
6344 	map<string, string>				fragments3;
6345 
6346 	const char	typesAndConstants1[]	=
6347 		"%c_f32_p2  = OpConstant %f32 0.2\n"
6348 		"%c_f32_p4  = OpConstant %f32 0.4\n"
6349 		"%c_f32_p5  = OpConstant %f32 0.5\n"
6350 		"%c_f32_p8  = OpConstant %f32 0.8\n";
6351 
6352 	// vec4 test_code(vec4 param) {
6353 	//   vec4 result = param;
6354 	//   for (int i = 0; i < 4; ++i) {
6355 	//     float operand;
6356 	//     switch (i) {
6357 	//       case 0: operand = .2; break;
6358 	//       case 1: operand = .5; break;
6359 	//       case 2: operand = .4; break;
6360 	//       case 3: operand = .0; break;
6361 	//       default: break; // unreachable
6362 	//     }
6363 	//     result[i] += operand;
6364 	//   }
6365 	//   return result;
6366 	// }
6367 	const char	function1[]				=
6368 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6369 		"%param1    = OpFunctionParameter %v4f32\n"
6370 		"%lbl       = OpLabel\n"
6371 		"%iptr      = OpVariable %fp_i32 Function\n"
6372 		"%result    = OpVariable %fp_v4f32 Function\n"
6373 		"             OpStore %iptr %c_i32_0\n"
6374 		"             OpStore %result %param1\n"
6375 		"             OpBranch %loop\n"
6376 
6377 		"%loop      = OpLabel\n"
6378 		"%ival      = OpLoad %i32 %iptr\n"
6379 		"%lt_4      = OpSLessThan %bool %ival %c_i32_4\n"
6380 		"             OpLoopMerge %exit %loop None\n"
6381 		"             OpBranchConditional %lt_4 %entry %exit\n"
6382 
6383 		"%entry     = OpLabel\n"
6384 		"%loc       = OpAccessChain %fp_f32 %result %ival\n"
6385 		"%val       = OpLoad %f32 %loc\n"
6386 		"             OpSelectionMerge %phi None\n"
6387 		"             OpSwitch %ival %default 0 %case0 1 %case1 2 %case2 3 %case3\n"
6388 
6389 		"%case0     = OpLabel\n"
6390 		"             OpBranch %phi\n"
6391 		"%case1     = OpLabel\n"
6392 		"             OpBranch %phi\n"
6393 		"%case2     = OpLabel\n"
6394 		"             OpBranch %phi\n"
6395 		"%case3     = OpLabel\n"
6396 		"             OpBranch %phi\n"
6397 
6398 		"%default   = OpLabel\n"
6399 		"             OpUnreachable\n"
6400 
6401 		"%phi       = OpLabel\n"
6402 		"%operand   = OpPhi %f32 %c_f32_p4 %case2 %c_f32_p5 %case1 %c_f32_p2 %case0 %c_f32_0 %case3\n" // not in the order of blocks
6403 		"%add       = OpFAdd %f32 %val %operand\n"
6404 		"             OpStore %loc %add\n"
6405 		"%ival_next = OpIAdd %i32 %ival %c_i32_1\n"
6406 		"             OpStore %iptr %ival_next\n"
6407 		"             OpBranch %loop\n"
6408 
6409 		"%exit      = OpLabel\n"
6410 		"%ret       = OpLoad %v4f32 %result\n"
6411 		"             OpReturnValue %ret\n"
6412 
6413 		"             OpFunctionEnd\n";
6414 
6415 	fragments1["pre_main"]	= typesAndConstants1;
6416 	fragments1["testfun"]	= function1;
6417 
6418 	getHalfColorsFullAlpha(inputColors);
6419 
6420 	outputColors1[0]		= RGBA(178, 255, 229, 255);
6421 	outputColors1[1]		= RGBA(178, 127, 102, 255);
6422 	outputColors1[2]		= RGBA(51,  255, 102, 255);
6423 	outputColors1[3]		= RGBA(51,  127, 229, 255);
6424 
6425 	createTestsForAllStages("out_of_order", inputColors, outputColors1, fragments1, group.get());
6426 
6427 	const char	typesAndConstants2[]	=
6428 		"%c_f32_p2  = OpConstant %f32 0.2\n";
6429 
6430 	// Add .4 to the second element of the given parameter.
6431 	const char	function2[]				=
6432 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6433 		"%param     = OpFunctionParameter %v4f32\n"
6434 		"%entry     = OpLabel\n"
6435 		"%result    = OpVariable %fp_v4f32 Function\n"
6436 		"             OpStore %result %param\n"
6437 		"%loc       = OpAccessChain %fp_f32 %result %c_i32_1\n"
6438 		"%val       = OpLoad %f32 %loc\n"
6439 		"             OpBranch %phi\n"
6440 
6441 		"%phi        = OpLabel\n"
6442 		"%step       = OpPhi %i32 %c_i32_0  %entry %step_next  %phi\n"
6443 		"%accum      = OpPhi %f32 %val      %entry %accum_next %phi\n"
6444 		"%step_next  = OpIAdd %i32 %step  %c_i32_1\n"
6445 		"%accum_next = OpFAdd %f32 %accum %c_f32_p2\n"
6446 		"%still_loop = OpSLessThan %bool %step %c_i32_2\n"
6447 		"              OpLoopMerge %exit %phi None\n"
6448 		"              OpBranchConditional %still_loop %phi %exit\n"
6449 
6450 		"%exit       = OpLabel\n"
6451 		"              OpStore %loc %accum\n"
6452 		"%ret        = OpLoad %v4f32 %result\n"
6453 		"              OpReturnValue %ret\n"
6454 
6455 		"              OpFunctionEnd\n";
6456 
6457 	fragments2["pre_main"]	= typesAndConstants2;
6458 	fragments2["testfun"]	= function2;
6459 
6460 	outputColors2[0]			= RGBA(127, 229, 127, 255);
6461 	outputColors2[1]			= RGBA(127, 102, 0,   255);
6462 	outputColors2[2]			= RGBA(0,   229, 0,   255);
6463 	outputColors2[3]			= RGBA(0,   102, 127, 255);
6464 
6465 	createTestsForAllStages("induction", inputColors, outputColors2, fragments2, group.get());
6466 
6467 	const char	typesAndConstants3[]	=
6468 		"%true      = OpConstantTrue %bool\n"
6469 		"%false     = OpConstantFalse %bool\n"
6470 		"%c_f32_p2  = OpConstant %f32 0.2\n";
6471 
6472 	// Swap the second and the third element of the given parameter.
6473 	const char	function3[]				=
6474 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6475 		"%param     = OpFunctionParameter %v4f32\n"
6476 		"%entry     = OpLabel\n"
6477 		"%result    = OpVariable %fp_v4f32 Function\n"
6478 		"             OpStore %result %param\n"
6479 		"%a_loc     = OpAccessChain %fp_f32 %result %c_i32_1\n"
6480 		"%a_init    = OpLoad %f32 %a_loc\n"
6481 		"%b_loc     = OpAccessChain %fp_f32 %result %c_i32_2\n"
6482 		"%b_init    = OpLoad %f32 %b_loc\n"
6483 		"             OpBranch %phi\n"
6484 
6485 		"%phi        = OpLabel\n"
6486 		"%still_loop = OpPhi %bool %true   %entry %false  %phi\n"
6487 		"%a_next     = OpPhi %f32  %a_init %entry %b_next %phi\n"
6488 		"%b_next     = OpPhi %f32  %b_init %entry %a_next %phi\n"
6489 		"              OpLoopMerge %exit %phi None\n"
6490 		"              OpBranchConditional %still_loop %phi %exit\n"
6491 
6492 		"%exit       = OpLabel\n"
6493 		"              OpStore %a_loc %a_next\n"
6494 		"              OpStore %b_loc %b_next\n"
6495 		"%ret        = OpLoad %v4f32 %result\n"
6496 		"              OpReturnValue %ret\n"
6497 
6498 		"              OpFunctionEnd\n";
6499 
6500 	fragments3["pre_main"]	= typesAndConstants3;
6501 	fragments3["testfun"]	= function3;
6502 
6503 	outputColors3[0]			= RGBA(127, 127, 127, 255);
6504 	outputColors3[1]			= RGBA(127, 0,   0,   255);
6505 	outputColors3[2]			= RGBA(0,   0,   127, 255);
6506 	outputColors3[3]			= RGBA(0,   127, 0,   255);
6507 
6508 	createTestsForAllStages("swap", inputColors, outputColors3, fragments3, group.get());
6509 
6510 	return group.release();
6511 }
6512 
createNoContractionTests(tcu::TestContext & testCtx)6513 tcu::TestCaseGroup* createNoContractionTests(tcu::TestContext& testCtx)
6514 {
6515 	de::MovePtr<tcu::TestCaseGroup> group			(new tcu::TestCaseGroup(testCtx, "nocontraction", "Test the NoContraction decoration"));
6516 	RGBA							inputColors[4];
6517 	RGBA							outputColors[4];
6518 
6519 	// With NoContraction, (1 + 2^-23) * (1 - 2^-23) - 1 should be conducted as a multiplication and an addition separately.
6520 	// For the multiplication, the result is 1 - 2^-46, which is out of the precision range for 32-bit float. (32-bit float
6521 	// only have 23-bit fraction.) So it will be rounded to 1. Or 0x1.fffffc. Then the final result is 0 or -0x1p-24.
6522 	// On the contrary, the result will be 2^-46, which is a normalized number perfectly representable as 32-bit float.
6523 	const char						constantsAndTypes[]	 =
6524 		"%c_vec4_0       = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_1\n"
6525 		"%c_vec4_1       = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
6526 		"%c_f32_1pl2_23  = OpConstant %f32 0x1.000002p+0\n" // 1 + 2^-23
6527 		"%c_f32_1mi2_23  = OpConstant %f32 0x1.fffffcp-1\n" // 1 - 2^-23
6528 		"%c_f32_n1pn24   = OpConstant %f32 -0x1p-24\n"
6529 		;
6530 
6531 	const char						function[]	 =
6532 		"%test_code      = OpFunction %v4f32 None %v4f32_function\n"
6533 		"%param          = OpFunctionParameter %v4f32\n"
6534 		"%label          = OpLabel\n"
6535 		"%var1           = OpVariable %fp_f32 Function %c_f32_1pl2_23\n"
6536 		"%var2           = OpVariable %fp_f32 Function\n"
6537 		"%red            = OpCompositeExtract %f32 %param 0\n"
6538 		"%plus_red       = OpFAdd %f32 %c_f32_1mi2_23 %red\n"
6539 		"                  OpStore %var2 %plus_red\n"
6540 		"%val1           = OpLoad %f32 %var1\n"
6541 		"%val2           = OpLoad %f32 %var2\n"
6542 		"%mul            = OpFMul %f32 %val1 %val2\n"
6543 		"%add            = OpFAdd %f32 %mul %c_f32_n1\n"
6544 		"%is0            = OpFOrdEqual %bool %add %c_f32_0\n"
6545 		"%isn1n24         = OpFOrdEqual %bool %add %c_f32_n1pn24\n"
6546 		"%success        = OpLogicalOr %bool %is0 %isn1n24\n"
6547 		"%v4success      = OpCompositeConstruct %v4bool %success %success %success %success\n"
6548 		"%ret            = OpSelect %v4f32 %v4success %c_vec4_0 %c_vec4_1\n"
6549 		"                  OpReturnValue %ret\n"
6550 		"                  OpFunctionEnd\n";
6551 
6552 	struct CaseNameDecoration
6553 	{
6554 		string name;
6555 		string decoration;
6556 	};
6557 
6558 
6559 	CaseNameDecoration tests[] = {
6560 		{"multiplication",	"OpDecorate %mul NoContraction"},
6561 		{"addition",		"OpDecorate %add NoContraction"},
6562 		{"both",			"OpDecorate %mul NoContraction\nOpDecorate %add NoContraction"},
6563 	};
6564 
6565 	getHalfColorsFullAlpha(inputColors);
6566 
6567 	for (deUint8 idx = 0; idx < 4; ++idx)
6568 	{
6569 		inputColors[idx].setRed(0);
6570 		outputColors[idx] = RGBA(0, 0, 0, 255);
6571 	}
6572 
6573 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(CaseNameDecoration); ++testNdx)
6574 	{
6575 		map<string, string> fragments;
6576 
6577 		fragments["decoration"] = tests[testNdx].decoration;
6578 		fragments["pre_main"] = constantsAndTypes;
6579 		fragments["testfun"] = function;
6580 
6581 		createTestsForAllStages(tests[testNdx].name, inputColors, outputColors, fragments, group.get());
6582 	}
6583 
6584 	return group.release();
6585 }
6586 
createMemoryAccessTests(tcu::TestContext & testCtx)6587 tcu::TestCaseGroup* createMemoryAccessTests(tcu::TestContext& testCtx)
6588 {
6589 	de::MovePtr<tcu::TestCaseGroup> memoryAccessTests (new tcu::TestCaseGroup(testCtx, "opmemoryaccess", "Memory Semantics"));
6590 	RGBA							colors[4];
6591 
6592 	const char						constantsAndTypes[]	 =
6593 		"%c_a2f32_1         = OpConstantComposite %a2f32 %c_f32_1 %c_f32_1\n"
6594 		"%fp_a2f32          = OpTypePointer Function %a2f32\n"
6595 		"%stype             = OpTypeStruct  %v4f32 %a2f32 %f32\n"
6596 		"%fp_stype          = OpTypePointer Function %stype\n";
6597 
6598 	const char						function[]	 =
6599 		"%test_code         = OpFunction %v4f32 None %v4f32_function\n"
6600 		"%param1            = OpFunctionParameter %v4f32\n"
6601 		"%lbl               = OpLabel\n"
6602 		"%v1                = OpVariable %fp_v4f32 Function\n"
6603 		"%v2                = OpVariable %fp_a2f32 Function\n"
6604 		"%v3                = OpVariable %fp_f32 Function\n"
6605 		"%v                 = OpVariable %fp_stype Function\n"
6606 		"%vv                = OpVariable %fp_stype Function\n"
6607 		"%vvv               = OpVariable %fp_f32 Function\n"
6608 
6609 		"                     OpStore %v1 %c_v4f32_1_1_1_1\n"
6610 		"                     OpStore %v2 %c_a2f32_1\n"
6611 		"                     OpStore %v3 %c_f32_1\n"
6612 
6613 		"%p_v4f32          = OpAccessChain %fp_v4f32 %v %c_u32_0\n"
6614 		"%p_a2f32          = OpAccessChain %fp_a2f32 %v %c_u32_1\n"
6615 		"%p_f32            = OpAccessChain %fp_f32 %v %c_u32_2\n"
6616 		"%v1_v             = OpLoad %v4f32 %v1 ${access_type}\n"
6617 		"%v2_v             = OpLoad %a2f32 %v2 ${access_type}\n"
6618 		"%v3_v             = OpLoad %f32 %v3 ${access_type}\n"
6619 
6620 		"                    OpStore %p_v4f32 %v1_v ${access_type}\n"
6621 		"                    OpStore %p_a2f32 %v2_v ${access_type}\n"
6622 		"                    OpStore %p_f32 %v3_v ${access_type}\n"
6623 
6624 		"                    OpCopyMemory %vv %v ${access_type}\n"
6625 		"                    OpCopyMemory %vvv %p_f32 ${access_type}\n"
6626 
6627 		"%p_f32_2          = OpAccessChain %fp_f32 %vv %c_u32_2\n"
6628 		"%v_f32_2          = OpLoad %f32 %p_f32_2\n"
6629 		"%v_f32_3          = OpLoad %f32 %vvv\n"
6630 
6631 		"%ret1             = OpVectorTimesScalar %v4f32 %param1 %v_f32_2\n"
6632 		"%ret2             = OpVectorTimesScalar %v4f32 %ret1 %v_f32_3\n"
6633 		"                    OpReturnValue %ret2\n"
6634 		"                    OpFunctionEnd\n";
6635 
6636 	struct NameMemoryAccess
6637 	{
6638 		string name;
6639 		string accessType;
6640 	};
6641 
6642 
6643 	NameMemoryAccess tests[] =
6644 	{
6645 		{ "none", "" },
6646 		{ "volatile", "Volatile" },
6647 		{ "aligned",  "Aligned 1" },
6648 		{ "volatile_aligned",  "Volatile|Aligned 1" },
6649 		{ "nontemporal_aligned",  "Nontemporal|Aligned 1" },
6650 		{ "volatile_nontemporal",  "Volatile|Nontemporal" },
6651 		{ "volatile_nontermporal_aligned",  "Volatile|Nontemporal|Aligned 1" },
6652 	};
6653 
6654 	getHalfColorsFullAlpha(colors);
6655 
6656 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameMemoryAccess); ++testNdx)
6657 	{
6658 		map<string, string> fragments;
6659 		map<string, string> memoryAccess;
6660 		memoryAccess["access_type"] = tests[testNdx].accessType;
6661 
6662 		fragments["pre_main"] = constantsAndTypes;
6663 		fragments["testfun"] = tcu::StringTemplate(function).specialize(memoryAccess);
6664 		createTestsForAllStages(tests[testNdx].name, colors, colors, fragments, memoryAccessTests.get());
6665 	}
6666 	return memoryAccessTests.release();
6667 }
createOpUndefTests(tcu::TestContext & testCtx)6668 tcu::TestCaseGroup* createOpUndefTests(tcu::TestContext& testCtx)
6669 {
6670 	de::MovePtr<tcu::TestCaseGroup>		opUndefTests		 (new tcu::TestCaseGroup(testCtx, "opundef", "Test OpUndef"));
6671 	RGBA								defaultColors[4];
6672 	map<string, string>					fragments;
6673 	getDefaultColors(defaultColors);
6674 
6675 	// First, simple cases that don't do anything with the OpUndef result.
6676 	fragments["testfun"] =
6677 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6678 		"%param1 = OpFunctionParameter %v4f32\n"
6679 		"%label_testfun = OpLabel\n"
6680 		"%undef = OpUndef %type\n"
6681 		"OpReturnValue %param1\n"
6682 		"OpFunctionEnd\n"
6683 		;
6684 	struct NameCodePair { string name, code; };
6685 	const NameCodePair tests[] =
6686 	{
6687 		{"bool", "%type = OpTypeBool"},
6688 		{"vec2uint32", "%type = OpTypeVector %u32 2"},
6689 		{"image", "%type = OpTypeImage %f32 2D 0 0 0 1 Unknown"},
6690 		{"sampler", "%type = OpTypeSampler"},
6691 		{"sampledimage", "%img = OpTypeImage %f32 2D 0 0 0 1 Unknown\n" "%type = OpTypeSampledImage %img"},
6692 		{"pointer", "%type = OpTypePointer Function %i32"},
6693 		{"runtimearray", "%type = OpTypeRuntimeArray %f32"},
6694 		{"array", "%c_u32_100 = OpConstant %u32 100\n" "%type = OpTypeArray %i32 %c_u32_100"},
6695 		{"struct", "%type = OpTypeStruct %f32 %i32 %u32"}};
6696 	for (size_t testNdx = 0; testNdx < sizeof(tests) / sizeof(NameCodePair); ++testNdx)
6697 	{
6698 		fragments["pre_main"] = tests[testNdx].code;
6699 		createTestsForAllStages(tests[testNdx].name, defaultColors, defaultColors, fragments, opUndefTests.get());
6700 	}
6701 	fragments.clear();
6702 
6703 	fragments["testfun"] =
6704 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6705 		"%param1 = OpFunctionParameter %v4f32\n"
6706 		"%label_testfun = OpLabel\n"
6707 		"%undef = OpUndef %f32\n"
6708 		"%zero = OpFMul %f32 %undef %c_f32_0\n"
6709 		"%is_nan = OpIsNan %bool %zero\n" //OpUndef may result in NaN which may turn %zero into Nan.
6710 		"%actually_zero = OpSelect %f32 %is_nan %c_f32_0 %zero\n"
6711 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6712 		"%b = OpFAdd %f32 %a %actually_zero\n"
6713 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %b %c_i32_0\n"
6714 		"OpReturnValue %ret\n"
6715 		"OpFunctionEnd\n"
6716 		;
6717 	createTestsForAllStages("float32", defaultColors, defaultColors, fragments, opUndefTests.get());
6718 
6719 	fragments["testfun"] =
6720 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6721 		"%param1 = OpFunctionParameter %v4f32\n"
6722 		"%label_testfun = OpLabel\n"
6723 		"%undef = OpUndef %i32\n"
6724 		"%zero = OpIMul %i32 %undef %c_i32_0\n"
6725 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
6726 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
6727 		"OpReturnValue %ret\n"
6728 		"OpFunctionEnd\n"
6729 		;
6730 	createTestsForAllStages("sint32", defaultColors, defaultColors, fragments, opUndefTests.get());
6731 
6732 	fragments["testfun"] =
6733 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6734 		"%param1 = OpFunctionParameter %v4f32\n"
6735 		"%label_testfun = OpLabel\n"
6736 		"%undef = OpUndef %u32\n"
6737 		"%zero = OpIMul %u32 %undef %c_i32_0\n"
6738 		"%a = OpVectorExtractDynamic %f32 %param1 %zero\n"
6739 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %a %c_i32_0\n"
6740 		"OpReturnValue %ret\n"
6741 		"OpFunctionEnd\n"
6742 		;
6743 	createTestsForAllStages("uint32", defaultColors, defaultColors, fragments, opUndefTests.get());
6744 
6745 	fragments["testfun"] =
6746 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6747 		"%param1 = OpFunctionParameter %v4f32\n"
6748 		"%label_testfun = OpLabel\n"
6749 		"%undef = OpUndef %v4f32\n"
6750 		"%vzero = OpVectorTimesScalar %v4f32 %undef %c_f32_0\n"
6751 		"%zero_0 = OpVectorExtractDynamic %f32 %vzero %c_i32_0\n"
6752 		"%zero_1 = OpVectorExtractDynamic %f32 %vzero %c_i32_1\n"
6753 		"%zero_2 = OpVectorExtractDynamic %f32 %vzero %c_i32_2\n"
6754 		"%zero_3 = OpVectorExtractDynamic %f32 %vzero %c_i32_3\n"
6755 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
6756 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
6757 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
6758 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
6759 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
6760 		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
6761 		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
6762 		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
6763 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6764 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
6765 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
6766 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
6767 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
6768 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
6769 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
6770 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
6771 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
6772 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
6773 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
6774 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
6775 		"OpReturnValue %ret\n"
6776 		"OpFunctionEnd\n"
6777 		;
6778 	createTestsForAllStages("vec4float32", defaultColors, defaultColors, fragments, opUndefTests.get());
6779 
6780 	fragments["pre_main"] =
6781 		"%v2f32 = OpTypeVector %f32 2\n"
6782 		"%m2x2f32 = OpTypeMatrix %v2f32 2\n";
6783 	fragments["testfun"] =
6784 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
6785 		"%param1 = OpFunctionParameter %v4f32\n"
6786 		"%label_testfun = OpLabel\n"
6787 		"%undef = OpUndef %m2x2f32\n"
6788 		"%mzero = OpMatrixTimesScalar %m2x2f32 %undef %c_f32_0\n"
6789 		"%zero_0 = OpCompositeExtract %f32 %mzero 0 0\n"
6790 		"%zero_1 = OpCompositeExtract %f32 %mzero 0 1\n"
6791 		"%zero_2 = OpCompositeExtract %f32 %mzero 1 0\n"
6792 		"%zero_3 = OpCompositeExtract %f32 %mzero 1 1\n"
6793 		"%is_nan_0 = OpIsNan %bool %zero_0\n"
6794 		"%is_nan_1 = OpIsNan %bool %zero_1\n"
6795 		"%is_nan_2 = OpIsNan %bool %zero_2\n"
6796 		"%is_nan_3 = OpIsNan %bool %zero_3\n"
6797 		"%actually_zero_0 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_0\n"
6798 		"%actually_zero_1 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_1\n"
6799 		"%actually_zero_2 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_2\n"
6800 		"%actually_zero_3 = OpSelect %f32 %is_nan_0 %c_f32_0 %zero_3\n"
6801 		"%param1_0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6802 		"%param1_1 = OpVectorExtractDynamic %f32 %param1 %c_i32_1\n"
6803 		"%param1_2 = OpVectorExtractDynamic %f32 %param1 %c_i32_2\n"
6804 		"%param1_3 = OpVectorExtractDynamic %f32 %param1 %c_i32_3\n"
6805 		"%sum_0 = OpFAdd %f32 %param1_0 %actually_zero_0\n"
6806 		"%sum_1 = OpFAdd %f32 %param1_1 %actually_zero_1\n"
6807 		"%sum_2 = OpFAdd %f32 %param1_2 %actually_zero_2\n"
6808 		"%sum_3 = OpFAdd %f32 %param1_3 %actually_zero_3\n"
6809 		"%ret3 = OpVectorInsertDynamic %v4f32 %param1 %sum_3 %c_i32_3\n"
6810 		"%ret2 = OpVectorInsertDynamic %v4f32 %ret3 %sum_2 %c_i32_2\n"
6811 		"%ret1 = OpVectorInsertDynamic %v4f32 %ret2 %sum_1 %c_i32_1\n"
6812 		"%ret = OpVectorInsertDynamic %v4f32 %ret1 %sum_0 %c_i32_0\n"
6813 		"OpReturnValue %ret\n"
6814 		"OpFunctionEnd\n"
6815 		;
6816 	createTestsForAllStages("matrix", defaultColors, defaultColors, fragments, opUndefTests.get());
6817 
6818 	return opUndefTests.release();
6819 }
6820 
createOpQuantizeSingleOptionTests(tcu::TestCaseGroup * testCtx)6821 void createOpQuantizeSingleOptionTests(tcu::TestCaseGroup* testCtx)
6822 {
6823 	const RGBA		inputColors[4]		=
6824 	{
6825 		RGBA(0,		0,		0,		255),
6826 		RGBA(0,		0,		255,	255),
6827 		RGBA(0,		255,	0,		255),
6828 		RGBA(0,		255,	255,	255)
6829 	};
6830 
6831 	const RGBA		expectedColors[4]	=
6832 	{
6833 		RGBA(255,	 0,		 0,		 255),
6834 		RGBA(255,	 0,		 0,		 255),
6835 		RGBA(255,	 0,		 0,		 255),
6836 		RGBA(255,	 0,		 0,		 255)
6837 	};
6838 
6839 	const struct SingleFP16Possibility
6840 	{
6841 		const char* name;
6842 		const char* constant;  // Value to assign to %test_constant.
6843 		float		valueAsFloat;
6844 		const char* condition; // Must assign to %cond an expression that evaluates to true after %c = OpQuantizeToF16(%test_constant + 0).
6845 	}				tests[]				=
6846 	{
6847 		{
6848 			"negative",
6849 			"-0x1.3p1\n",
6850 			-constructNormalizedFloat(1, 0x300000),
6851 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
6852 		}, // -19
6853 		{
6854 			"positive",
6855 			"0x1.0p7\n",
6856 			constructNormalizedFloat(7, 0x000000),
6857 			"%cond = OpFOrdEqual %bool %c %test_constant\n"
6858 		},  // +128
6859 		// SPIR-V requires that OpQuantizeToF16 flushes
6860 		// any numbers that would end up denormalized in F16 to zero.
6861 		{
6862 			"denorm",
6863 			"0x0.0006p-126\n",
6864 			std::ldexp(1.5f, -140),
6865 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6866 		},  // denorm
6867 		{
6868 			"negative_denorm",
6869 			"-0x0.0006p-126\n",
6870 			-std::ldexp(1.5f, -140),
6871 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6872 		}, // -denorm
6873 		{
6874 			"too_small",
6875 			"0x1.0p-16\n",
6876 			std::ldexp(1.0f, -16),
6877 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6878 		},     // too small positive
6879 		{
6880 			"negative_too_small",
6881 			"-0x1.0p-32\n",
6882 			-std::ldexp(1.0f, -32),
6883 			"%cond = OpFOrdEqual %bool %c %c_f32_0\n"
6884 		},      // too small negative
6885 		{
6886 			"negative_inf",
6887 			"-0x1.0p128\n",
6888 			-std::ldexp(1.0f, 128),
6889 
6890 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
6891 			"%inf = OpIsInf %bool %c\n"
6892 			"%cond = OpLogicalAnd %bool %gz %inf\n"
6893 		},     // -inf to -inf
6894 		{
6895 			"inf",
6896 			"0x1.0p128\n",
6897 			std::ldexp(1.0f, 128),
6898 
6899 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
6900 			"%inf = OpIsInf %bool %c\n"
6901 			"%cond = OpLogicalAnd %bool %gz %inf\n"
6902 		},     // +inf to +inf
6903 		{
6904 			"round_to_negative_inf",
6905 			"-0x1.0p32\n",
6906 			-std::ldexp(1.0f, 32),
6907 
6908 			"%gz = OpFOrdLessThan %bool %c %c_f32_0\n"
6909 			"%inf = OpIsInf %bool %c\n"
6910 			"%cond = OpLogicalAnd %bool %gz %inf\n"
6911 		},     // round to -inf
6912 		{
6913 			"round_to_inf",
6914 			"0x1.0p16\n",
6915 			std::ldexp(1.0f, 16),
6916 
6917 			"%gz = OpFOrdGreaterThan %bool %c %c_f32_0\n"
6918 			"%inf = OpIsInf %bool %c\n"
6919 			"%cond = OpLogicalAnd %bool %gz %inf\n"
6920 		},     // round to +inf
6921 		{
6922 			"nan",
6923 			"0x1.1p128\n",
6924 			std::numeric_limits<float>::quiet_NaN(),
6925 
6926 			// Test for any NaN value, as NaNs are not preserved
6927 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
6928 			"%cond = OpIsNan %bool %direct_quant\n"
6929 		}, // nan
6930 		{
6931 			"negative_nan",
6932 			"-0x1.0001p128\n",
6933 			std::numeric_limits<float>::quiet_NaN(),
6934 
6935 			// Test for any NaN value, as NaNs are not preserved
6936 			"%direct_quant = OpQuantizeToF16 %f32 %test_constant\n"
6937 			"%cond = OpIsNan %bool %direct_quant\n"
6938 		} // -nan
6939 	};
6940 	const char*		constants			=
6941 		"%test_constant = OpConstant %f32 ";  // The value will be test.constant.
6942 
6943 	StringTemplate	function			(
6944 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
6945 		"%param1        = OpFunctionParameter %v4f32\n"
6946 		"%label_testfun = OpLabel\n"
6947 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
6948 		"%b             = OpFAdd %f32 %test_constant %a\n"
6949 		"%c             = OpQuantizeToF16 %f32 %b\n"
6950 		"${condition}\n"
6951 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
6952 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
6953 		"                 OpReturnValue %retval\n"
6954 		"OpFunctionEnd\n"
6955 	);
6956 
6957 	const char*		specDecorations		= "OpDecorate %test_constant SpecId 0\n";
6958 	const char*		specConstants		=
6959 			"%test_constant = OpSpecConstant %f32 0.\n"
6960 			"%c             = OpSpecConstantOp %f32 QuantizeToF16 %test_constant\n";
6961 
6962 	StringTemplate	specConstantFunction(
6963 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
6964 		"%param1        = OpFunctionParameter %v4f32\n"
6965 		"%label_testfun = OpLabel\n"
6966 		"${condition}\n"
6967 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
6968 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1\n"
6969 		"                 OpReturnValue %retval\n"
6970 		"OpFunctionEnd\n"
6971 	);
6972 
6973 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
6974 	{
6975 		map<string, string>								codeSpecialization;
6976 		map<string, string>								fragments;
6977 		codeSpecialization["condition"]					= tests[idx].condition;
6978 		fragments["testfun"]							= function.specialize(codeSpecialization);
6979 		fragments["pre_main"]							= string(constants) + tests[idx].constant + "\n";
6980 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
6981 	}
6982 
6983 	for (size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx)
6984 	{
6985 		map<string, string>								codeSpecialization;
6986 		map<string, string>								fragments;
6987 		vector<deInt32>									passConstants;
6988 		deInt32											specConstant;
6989 
6990 		codeSpecialization["condition"]					= tests[idx].condition;
6991 		fragments["testfun"]							= specConstantFunction.specialize(codeSpecialization);
6992 		fragments["decoration"]							= specDecorations;
6993 		fragments["pre_main"]							= specConstants;
6994 
6995 		memcpy(&specConstant, &tests[idx].valueAsFloat, sizeof(float));
6996 		passConstants.push_back(specConstant);
6997 
6998 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
6999 	}
7000 }
7001 
createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup * testCtx)7002 void createOpQuantizeTwoPossibilityTests(tcu::TestCaseGroup* testCtx)
7003 {
7004 	RGBA inputColors[4] =  {
7005 		RGBA(0,		0,		0,		255),
7006 		RGBA(0,		0,		255,	255),
7007 		RGBA(0,		255,	0,		255),
7008 		RGBA(0,		255,	255,	255)
7009 	};
7010 
7011 	RGBA expectedColors[4] =
7012 	{
7013 		RGBA(255,	 0,		 0,		 255),
7014 		RGBA(255,	 0,		 0,		 255),
7015 		RGBA(255,	 0,		 0,		 255),
7016 		RGBA(255,	 0,		 0,		 255)
7017 	};
7018 
7019 	struct DualFP16Possibility
7020 	{
7021 		const char* name;
7022 		const char* input;
7023 		float		inputAsFloat;
7024 		const char* possibleOutput1;
7025 		const char* possibleOutput2;
7026 	} tests[] = {
7027 		{
7028 			"positive_round_up_or_round_down",
7029 			"0x1.3003p8",
7030 			constructNormalizedFloat(8, 0x300300),
7031 			"0x1.304p8",
7032 			"0x1.3p8"
7033 		},
7034 		{
7035 			"negative_round_up_or_round_down",
7036 			"-0x1.6008p-7",
7037 			-constructNormalizedFloat(-7, 0x600800),
7038 			"-0x1.6p-7",
7039 			"-0x1.604p-7"
7040 		},
7041 		{
7042 			"carry_bit",
7043 			"0x1.01ep2",
7044 			constructNormalizedFloat(2, 0x01e000),
7045 			"0x1.01cp2",
7046 			"0x1.02p2"
7047 		},
7048 		{
7049 			"carry_to_exponent",
7050 			"0x1.ffep1",
7051 			constructNormalizedFloat(1, 0xffe000),
7052 			"0x1.ffcp1",
7053 			"0x1.0p2"
7054 		},
7055 	};
7056 	StringTemplate constants (
7057 		"%input_const = OpConstant %f32 ${input}\n"
7058 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
7059 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
7060 		);
7061 
7062 	StringTemplate specConstants (
7063 		"%input_const = OpSpecConstant %f32 0.\n"
7064 		"%possible_solution1 = OpConstant %f32 ${output1}\n"
7065 		"%possible_solution2 = OpConstant %f32 ${output2}\n"
7066 	);
7067 
7068 	const char* specDecorations = "OpDecorate %input_const  SpecId 0\n";
7069 
7070 	const char* function  =
7071 		"%test_code     = OpFunction %v4f32 None %v4f32_function\n"
7072 		"%param1        = OpFunctionParameter %v4f32\n"
7073 		"%label_testfun = OpLabel\n"
7074 		"%a             = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7075 		// For the purposes of this test we assume that 0.f will always get
7076 		// faithfully passed through the pipeline stages.
7077 		"%b             = OpFAdd %f32 %input_const %a\n"
7078 		"%c             = OpQuantizeToF16 %f32 %b\n"
7079 		"%eq_1          = OpFOrdEqual %bool %c %possible_solution1\n"
7080 		"%eq_2          = OpFOrdEqual %bool %c %possible_solution2\n"
7081 		"%cond          = OpLogicalOr %bool %eq_1 %eq_2\n"
7082 		"%v4cond        = OpCompositeConstruct %v4bool %cond %cond %cond %cond\n"
7083 		"%retval        = OpSelect %v4f32 %v4cond %c_v4f32_1_0_0_1 %param1"
7084 		"                 OpReturnValue %retval\n"
7085 		"OpFunctionEnd\n";
7086 
7087 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
7088 		map<string, string>									fragments;
7089 		map<string, string>									constantSpecialization;
7090 
7091 		constantSpecialization["input"]						= tests[idx].input;
7092 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
7093 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
7094 		fragments["testfun"]								= function;
7095 		fragments["pre_main"]								= constants.specialize(constantSpecialization);
7096 		createTestsForAllStages(tests[idx].name, inputColors, expectedColors, fragments, testCtx);
7097 	}
7098 
7099 	for(size_t idx = 0; idx < (sizeof(tests)/sizeof(tests[0])); ++idx) {
7100 		map<string, string>									fragments;
7101 		map<string, string>									constantSpecialization;
7102 		vector<deInt32>										passConstants;
7103 		deInt32												specConstant;
7104 
7105 		constantSpecialization["output1"]					= tests[idx].possibleOutput1;
7106 		constantSpecialization["output2"]					= tests[idx].possibleOutput2;
7107 		fragments["testfun"]								= function;
7108 		fragments["decoration"]								= specDecorations;
7109 		fragments["pre_main"]								= specConstants.specialize(constantSpecialization);
7110 
7111 		memcpy(&specConstant, &tests[idx].inputAsFloat, sizeof(float));
7112 		passConstants.push_back(specConstant);
7113 
7114 		createTestsForAllStages(string("spec_const_") + tests[idx].name, inputColors, expectedColors, fragments, passConstants, testCtx);
7115 	}
7116 }
7117 
createOpQuantizeTests(tcu::TestContext & testCtx)7118 tcu::TestCaseGroup* createOpQuantizeTests(tcu::TestContext& testCtx)
7119 {
7120 	de::MovePtr<tcu::TestCaseGroup> opQuantizeTests (new tcu::TestCaseGroup(testCtx, "opquantize", "Test OpQuantizeToF16"));
7121 	createOpQuantizeSingleOptionTests(opQuantizeTests.get());
7122 	createOpQuantizeTwoPossibilityTests(opQuantizeTests.get());
7123 	return opQuantizeTests.release();
7124 }
7125 
7126 struct ShaderPermutation
7127 {
7128 	deUint8 vertexPermutation;
7129 	deUint8 geometryPermutation;
7130 	deUint8 tesscPermutation;
7131 	deUint8 tessePermutation;
7132 	deUint8 fragmentPermutation;
7133 };
7134 
getShaderPermutation(deUint8 inputValue)7135 ShaderPermutation getShaderPermutation(deUint8 inputValue)
7136 {
7137 	ShaderPermutation	permutation =
7138 	{
7139 		static_cast<deUint8>(inputValue & 0x10? 1u: 0u),
7140 		static_cast<deUint8>(inputValue & 0x08? 1u: 0u),
7141 		static_cast<deUint8>(inputValue & 0x04? 1u: 0u),
7142 		static_cast<deUint8>(inputValue & 0x02? 1u: 0u),
7143 		static_cast<deUint8>(inputValue & 0x01? 1u: 0u)
7144 	};
7145 	return permutation;
7146 }
7147 
createModuleTests(tcu::TestContext & testCtx)7148 tcu::TestCaseGroup* createModuleTests(tcu::TestContext& testCtx)
7149 {
7150 	RGBA								defaultColors[4];
7151 	RGBA								invertedColors[4];
7152 	de::MovePtr<tcu::TestCaseGroup>		moduleTests			(new tcu::TestCaseGroup(testCtx, "module", "Multiple entry points into shaders"));
7153 
7154 	const ShaderElement					combinedPipeline[]	=
7155 	{
7156 		ShaderElement("module", "main", VK_SHADER_STAGE_VERTEX_BIT),
7157 		ShaderElement("module", "main", VK_SHADER_STAGE_GEOMETRY_BIT),
7158 		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7159 		ShaderElement("module", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7160 		ShaderElement("module", "main", VK_SHADER_STAGE_FRAGMENT_BIT)
7161 	};
7162 
7163 	getDefaultColors(defaultColors);
7164 	getInvertedDefaultColors(invertedColors);
7165 	addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), "same_module", "", createCombinedModule, runAndVerifyDefaultPipeline, createInstanceContext(combinedPipeline, map<string, string>()));
7166 
7167 	const char* numbers[] =
7168 	{
7169 		"1", "2"
7170 	};
7171 
7172 	for (deInt8 idx = 0; idx < 32; ++idx)
7173 	{
7174 		ShaderPermutation			permutation		= getShaderPermutation(idx);
7175 		string						name			= string("vert") + numbers[permutation.vertexPermutation] + "_geom" + numbers[permutation.geometryPermutation] + "_tessc" + numbers[permutation.tesscPermutation] + "_tesse" + numbers[permutation.tessePermutation] + "_frag" + numbers[permutation.fragmentPermutation];
7176 		const ShaderElement			pipeline[]		=
7177 		{
7178 			ShaderElement("vert",	string("vert") +	numbers[permutation.vertexPermutation],		VK_SHADER_STAGE_VERTEX_BIT),
7179 			ShaderElement("geom",	string("geom") +	numbers[permutation.geometryPermutation],	VK_SHADER_STAGE_GEOMETRY_BIT),
7180 			ShaderElement("tessc",	string("tessc") +	numbers[permutation.tesscPermutation],		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7181 			ShaderElement("tesse",	string("tesse") +	numbers[permutation.tessePermutation],		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7182 			ShaderElement("frag",	string("frag") +	numbers[permutation.fragmentPermutation],	VK_SHADER_STAGE_FRAGMENT_BIT)
7183 		};
7184 
7185 		// If there are an even number of swaps, then it should be no-op.
7186 		// If there are an odd number, the color should be flipped.
7187 		if ((permutation.vertexPermutation + permutation.geometryPermutation + permutation.tesscPermutation + permutation.tessePermutation + permutation.fragmentPermutation) % 2 == 0)
7188 		{
7189 			addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline, createInstanceContext(pipeline, defaultColors, defaultColors, map<string, string>()));
7190 		}
7191 		else
7192 		{
7193 			addFunctionCaseWithPrograms<InstanceContext>(moduleTests.get(), name, "", createMultipleEntries, runAndVerifyDefaultPipeline, createInstanceContext(pipeline, defaultColors, invertedColors, map<string, string>()));
7194 		}
7195 	}
7196 	return moduleTests.release();
7197 }
7198 
createLoopTests(tcu::TestContext & testCtx)7199 tcu::TestCaseGroup* createLoopTests(tcu::TestContext& testCtx)
7200 {
7201 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "loop", "Looping control flow"));
7202 	RGBA defaultColors[4];
7203 	getDefaultColors(defaultColors);
7204 	map<string, string> fragments;
7205 	fragments["pre_main"] =
7206 		"%c_f32_5 = OpConstant %f32 5.\n";
7207 
7208 	// A loop with a single block. The Continue Target is the loop block
7209 	// itself. In SPIR-V terms, the "loop construct" contains no blocks at all
7210 	// -- the "continue construct" forms the entire loop.
7211 	fragments["testfun"] =
7212 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7213 		"%param1 = OpFunctionParameter %v4f32\n"
7214 
7215 		"%entry = OpLabel\n"
7216 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7217 		"OpBranch %loop\n"
7218 
7219 		";adds and subtracts 1.0 to %val in alternate iterations\n"
7220 		"%loop = OpLabel\n"
7221 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
7222 		"%delta = OpPhi %f32 %c_f32_1 %entry %minus_delta %loop\n"
7223 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
7224 		"%val = OpFAdd %f32 %val1 %delta\n"
7225 		"%minus_delta = OpFSub %f32 %c_f32_0 %delta\n"
7226 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7227 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7228 		"OpLoopMerge %exit %loop None\n"
7229 		"OpBranchConditional %again %loop %exit\n"
7230 
7231 		"%exit = OpLabel\n"
7232 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
7233 		"OpReturnValue %result\n"
7234 
7235 		"OpFunctionEnd\n"
7236 		;
7237 	createTestsForAllStages("single_block", defaultColors, defaultColors, fragments, testGroup.get());
7238 
7239 	// Body comprised of multiple basic blocks.
7240 	const StringTemplate multiBlock(
7241 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7242 		"%param1 = OpFunctionParameter %v4f32\n"
7243 
7244 		"%entry = OpLabel\n"
7245 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7246 		"OpBranch %loop\n"
7247 
7248 		";adds and subtracts 1.0 to %val in alternate iterations\n"
7249 		"%loop = OpLabel\n"
7250 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %gather\n"
7251 		"%delta = OpPhi %f32 %c_f32_1 %entry %delta_next %gather\n"
7252 		"%val1 = OpPhi %f32 %val0 %entry %val %gather\n"
7253 		// There are several possibilities for the Continue Target below.  Each
7254 		// will be specialized into a separate test case.
7255 		"OpLoopMerge %exit ${continue_target} None\n"
7256 		"OpBranch %if\n"
7257 
7258 		"%if = OpLabel\n"
7259 		";delta_next = (delta > 0) ? -1 : 1;\n"
7260 		"%gt0 = OpFOrdGreaterThan %bool %delta %c_f32_0\n"
7261 		"OpSelectionMerge %gather DontFlatten\n"
7262 		"OpBranchConditional %gt0 %even %odd ;tells us if %count is even or odd\n"
7263 
7264 		"%odd = OpLabel\n"
7265 		"OpBranch %gather\n"
7266 
7267 		"%even = OpLabel\n"
7268 		"OpBranch %gather\n"
7269 
7270 		"%gather = OpLabel\n"
7271 		"%delta_next = OpPhi %f32 %c_f32_n1 %even %c_f32_1 %odd\n"
7272 		"%val = OpFAdd %f32 %val1 %delta\n"
7273 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7274 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7275 		"OpBranchConditional %again %loop %exit\n"
7276 
7277 		"%exit = OpLabel\n"
7278 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val %c_i32_0\n"
7279 		"OpReturnValue %result\n"
7280 
7281 		"OpFunctionEnd\n");
7282 
7283 	map<string, string> continue_target;
7284 
7285 	// The Continue Target is the loop block itself.
7286 	continue_target["continue_target"] = "%loop";
7287 	fragments["testfun"] = multiBlock.specialize(continue_target);
7288 	createTestsForAllStages("multi_block_continue_construct", defaultColors, defaultColors, fragments, testGroup.get());
7289 
7290 	// The Continue Target is at the end of the loop.
7291 	continue_target["continue_target"] = "%gather";
7292 	fragments["testfun"] = multiBlock.specialize(continue_target);
7293 	createTestsForAllStages("multi_block_loop_construct", defaultColors, defaultColors, fragments, testGroup.get());
7294 
7295 	// A loop with continue statement.
7296 	fragments["testfun"] =
7297 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7298 		"%param1 = OpFunctionParameter %v4f32\n"
7299 
7300 		"%entry = OpLabel\n"
7301 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7302 		"OpBranch %loop\n"
7303 
7304 		";adds 4, 3, and 1 to %val0 (skips 2)\n"
7305 		"%loop = OpLabel\n"
7306 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7307 		"%val1 = OpPhi %f32 %val0 %entry %val %continue\n"
7308 		"OpLoopMerge %exit %continue None\n"
7309 		"OpBranch %if\n"
7310 
7311 		"%if = OpLabel\n"
7312 		";skip if %count==2\n"
7313 		"%eq2 = OpIEqual %bool %count %c_i32_2\n"
7314 		"OpSelectionMerge %continue DontFlatten\n"
7315 		"OpBranchConditional %eq2 %continue %body\n"
7316 
7317 		"%body = OpLabel\n"
7318 		"%fcount = OpConvertSToF %f32 %count\n"
7319 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7320 		"OpBranch %continue\n"
7321 
7322 		"%continue = OpLabel\n"
7323 		"%val = OpPhi %f32 %val2 %body %val1 %if\n"
7324 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7325 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7326 		"OpBranchConditional %again %loop %exit\n"
7327 
7328 		"%exit = OpLabel\n"
7329 		"%same = OpFSub %f32 %val %c_f32_8\n"
7330 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7331 		"OpReturnValue %result\n"
7332 		"OpFunctionEnd\n";
7333 	createTestsForAllStages("continue", defaultColors, defaultColors, fragments, testGroup.get());
7334 
7335 	// A loop with break.
7336 	fragments["testfun"] =
7337 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7338 		"%param1 = OpFunctionParameter %v4f32\n"
7339 
7340 		"%entry = OpLabel\n"
7341 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7342 		"%dot = OpDot %f32 %param1 %param1\n"
7343 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7344 		"%zero = OpConvertFToU %u32 %div\n"
7345 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
7346 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7347 		"OpBranch %loop\n"
7348 
7349 		";adds 4 and 3 to %val0 (exits early)\n"
7350 		"%loop = OpLabel\n"
7351 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7352 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
7353 		"OpLoopMerge %exit %continue None\n"
7354 		"OpBranch %if\n"
7355 
7356 		"%if = OpLabel\n"
7357 		";end loop if %count==%two\n"
7358 		"%above2 = OpSGreaterThan %bool %count %two\n"
7359 		"OpSelectionMerge %continue DontFlatten\n"
7360 		"OpBranchConditional %above2 %body %exit\n"
7361 
7362 		"%body = OpLabel\n"
7363 		"%fcount = OpConvertSToF %f32 %count\n"
7364 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7365 		"OpBranch %continue\n"
7366 
7367 		"%continue = OpLabel\n"
7368 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7369 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7370 		"OpBranchConditional %again %loop %exit\n"
7371 
7372 		"%exit = OpLabel\n"
7373 		"%val_post = OpPhi %f32 %val2 %continue %val1 %if\n"
7374 		"%same = OpFSub %f32 %val_post %c_f32_7\n"
7375 		"%result = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7376 		"OpReturnValue %result\n"
7377 		"OpFunctionEnd\n";
7378 	createTestsForAllStages("break", defaultColors, defaultColors, fragments, testGroup.get());
7379 
7380 	// A loop with return.
7381 	fragments["testfun"] =
7382 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7383 		"%param1 = OpFunctionParameter %v4f32\n"
7384 
7385 		"%entry = OpLabel\n"
7386 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7387 		"%dot = OpDot %f32 %param1 %param1\n"
7388 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7389 		"%zero = OpConvertFToU %u32 %div\n"
7390 		"%two = OpIAdd %i32 %zero %c_i32_2\n"
7391 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7392 		"OpBranch %loop\n"
7393 
7394 		";returns early without modifying %param1\n"
7395 		"%loop = OpLabel\n"
7396 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %continue\n"
7397 		"%val1 = OpPhi %f32 %val0 %entry %val2 %continue\n"
7398 		"OpLoopMerge %exit %continue None\n"
7399 		"OpBranch %if\n"
7400 
7401 		"%if = OpLabel\n"
7402 		";return if %count==%two\n"
7403 		"%above2 = OpSGreaterThan %bool %count %two\n"
7404 		"OpSelectionMerge %continue DontFlatten\n"
7405 		"OpBranchConditional %above2 %body %early_exit\n"
7406 
7407 		"%early_exit = OpLabel\n"
7408 		"OpReturnValue %param1\n"
7409 
7410 		"%body = OpLabel\n"
7411 		"%fcount = OpConvertSToF %f32 %count\n"
7412 		"%val2 = OpFAdd %f32 %val1 %fcount\n"
7413 		"OpBranch %continue\n"
7414 
7415 		"%continue = OpLabel\n"
7416 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7417 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7418 		"OpBranchConditional %again %loop %exit\n"
7419 
7420 		"%exit = OpLabel\n"
7421 		";should never get here, so return an incorrect result\n"
7422 		"%result = OpVectorInsertDynamic %v4f32 %param1 %val2 %c_i32_0\n"
7423 		"OpReturnValue %result\n"
7424 		"OpFunctionEnd\n";
7425 	createTestsForAllStages("return", defaultColors, defaultColors, fragments, testGroup.get());
7426 
7427 	return testGroup.release();
7428 }
7429 
7430 // Adds a new test to group using custom fragments for the tessellation-control
7431 // stage and passthrough fragments for all other stages.  Uses default colors
7432 // for input and expected output.
addTessCtrlTest(tcu::TestCaseGroup * group,const char * name,const map<string,string> & fragments)7433 void addTessCtrlTest(tcu::TestCaseGroup* group, const char* name, const map<string, string>& fragments)
7434 {
7435 	RGBA defaultColors[4];
7436 	getDefaultColors(defaultColors);
7437 	const ShaderElement pipelineStages[] =
7438 	{
7439 		ShaderElement("vert", "main", VK_SHADER_STAGE_VERTEX_BIT),
7440 		ShaderElement("tessc", "main", VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT),
7441 		ShaderElement("tesse", "main", VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT),
7442 		ShaderElement("frag", "main", VK_SHADER_STAGE_FRAGMENT_BIT),
7443 	};
7444 
7445 	addFunctionCaseWithPrograms<InstanceContext>(group, name, "", addShaderCodeCustomTessControl,
7446 												 runAndVerifyDefaultPipeline, createInstanceContext(
7447 													 pipelineStages, defaultColors, defaultColors, fragments, StageToSpecConstantMap()));
7448 }
7449 
7450 // A collection of tests putting OpControlBarrier in places GLSL forbids but SPIR-V allows.
createBarrierTests(tcu::TestContext & testCtx)7451 tcu::TestCaseGroup* createBarrierTests(tcu::TestContext& testCtx)
7452 {
7453 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "barrier", "OpControlBarrier"));
7454 	map<string, string> fragments;
7455 
7456 	// A barrier inside a function body.
7457 	fragments["pre_main"] =
7458 		"%Workgroup = OpConstant %i32 2\n"
7459 		"%SequentiallyConsistent = OpConstant %i32 0x10\n";
7460 	fragments["testfun"] =
7461 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7462 		"%param1 = OpFunctionParameter %v4f32\n"
7463 		"%label_testfun = OpLabel\n"
7464 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7465 		"OpReturnValue %param1\n"
7466 		"OpFunctionEnd\n";
7467 	addTessCtrlTest(testGroup.get(), "in_function", fragments);
7468 
7469 	// Common setup code for the following tests.
7470 	fragments["pre_main"] =
7471 		"%Workgroup = OpConstant %i32 2\n"
7472 		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
7473 		"%c_f32_5 = OpConstant %f32 5.\n";
7474 	const string setupPercentZero =	 // Begins %test_code function with code that sets %zero to 0u but cannot be optimized away.
7475 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7476 		"%param1 = OpFunctionParameter %v4f32\n"
7477 		"%entry = OpLabel\n"
7478 		";param1 components are between 0 and 1, so dot product is 4 or less\n"
7479 		"%dot = OpDot %f32 %param1 %param1\n"
7480 		"%div = OpFDiv %f32 %dot %c_f32_5\n"
7481 		"%zero = OpConvertFToU %u32 %div\n";
7482 
7483 	// Barriers inside OpSwitch branches.
7484 	fragments["testfun"] =
7485 		setupPercentZero +
7486 		"OpSelectionMerge %switch_exit None\n"
7487 		"OpSwitch %zero %switch_default 0 %case0 1 %case1 ;should always go to %case0\n"
7488 
7489 		"%case1 = OpLabel\n"
7490 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7491 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7492 		"%wrong_branch_alert1 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7493 		"OpBranch %switch_exit\n"
7494 
7495 		"%switch_default = OpLabel\n"
7496 		"%wrong_branch_alert2 = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7497 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7498 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7499 		"OpBranch %switch_exit\n"
7500 
7501 		"%case0 = OpLabel\n"
7502 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7503 		"OpBranch %switch_exit\n"
7504 
7505 		"%switch_exit = OpLabel\n"
7506 		"%ret = OpPhi %v4f32 %param1 %case0 %wrong_branch_alert1 %case1 %wrong_branch_alert2 %switch_default\n"
7507 		"OpReturnValue %ret\n"
7508 		"OpFunctionEnd\n";
7509 	addTessCtrlTest(testGroup.get(), "in_switch", fragments);
7510 
7511 	// Barriers inside if-then-else.
7512 	fragments["testfun"] =
7513 		setupPercentZero +
7514 		"%eq0 = OpIEqual %bool %zero %c_u32_0\n"
7515 		"OpSelectionMerge %exit DontFlatten\n"
7516 		"OpBranchConditional %eq0 %then %else\n"
7517 
7518 		"%else = OpLabel\n"
7519 		";This barrier should never be executed, but its presence makes test failure more likely when there's a bug.\n"
7520 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7521 		"%wrong_branch_alert = OpVectorInsertDynamic %v4f32 %param1 %c_f32_0_5 %c_i32_0\n"
7522 		"OpBranch %exit\n"
7523 
7524 		"%then = OpLabel\n"
7525 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7526 		"OpBranch %exit\n"
7527 
7528 		"%exit = OpLabel\n"
7529 		"%ret = OpPhi %v4f32 %param1 %then %wrong_branch_alert %else\n"
7530 		"OpReturnValue %ret\n"
7531 		"OpFunctionEnd\n";
7532 	addTessCtrlTest(testGroup.get(), "in_if", fragments);
7533 
7534 	// A barrier after control-flow reconvergence, tempting the compiler to attempt something like this:
7535 	// http://lists.llvm.org/pipermail/llvm-dev/2009-October/026317.html.
7536 	fragments["testfun"] =
7537 		setupPercentZero +
7538 		"%thread_id = OpLoad %i32 %BP_gl_InvocationID\n"
7539 		"%thread0 = OpIEqual %bool %thread_id %c_i32_0\n"
7540 		"OpSelectionMerge %exit DontFlatten\n"
7541 		"OpBranchConditional %thread0 %then %else\n"
7542 
7543 		"%else = OpLabel\n"
7544 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7545 		"OpBranch %exit\n"
7546 
7547 		"%then = OpLabel\n"
7548 		"%val1 = OpVectorExtractDynamic %f32 %param1 %zero\n"
7549 		"OpBranch %exit\n"
7550 
7551 		"%exit = OpLabel\n"
7552 		"%val = OpPhi %f32 %val0 %else %val1 %then\n"
7553 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7554 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %val %zero\n"
7555 		"OpReturnValue %ret\n"
7556 		"OpFunctionEnd\n";
7557 	addTessCtrlTest(testGroup.get(), "after_divergent_if", fragments);
7558 
7559 	// A barrier inside a loop.
7560 	fragments["pre_main"] =
7561 		"%Workgroup = OpConstant %i32 2\n"
7562 		"%SequentiallyConsistent = OpConstant %i32 0x10\n"
7563 		"%c_f32_10 = OpConstant %f32 10.\n";
7564 	fragments["testfun"] =
7565 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7566 		"%param1 = OpFunctionParameter %v4f32\n"
7567 		"%entry = OpLabel\n"
7568 		"%val0 = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7569 		"OpBranch %loop\n"
7570 
7571 		";adds 4, 3, 2, and 1 to %val0\n"
7572 		"%loop = OpLabel\n"
7573 		"%count = OpPhi %i32 %c_i32_4 %entry %count__ %loop\n"
7574 		"%val1 = OpPhi %f32 %val0 %entry %val %loop\n"
7575 		"OpControlBarrier %Workgroup %Workgroup %SequentiallyConsistent\n"
7576 		"%fcount = OpConvertSToF %f32 %count\n"
7577 		"%val = OpFAdd %f32 %val1 %fcount\n"
7578 		"%count__ = OpISub %i32 %count %c_i32_1\n"
7579 		"%again = OpSGreaterThan %bool %count__ %c_i32_0\n"
7580 		"OpLoopMerge %exit %loop None\n"
7581 		"OpBranchConditional %again %loop %exit\n"
7582 
7583 		"%exit = OpLabel\n"
7584 		"%same = OpFSub %f32 %val %c_f32_10\n"
7585 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %same %c_i32_0\n"
7586 		"OpReturnValue %ret\n"
7587 		"OpFunctionEnd\n";
7588 	addTessCtrlTest(testGroup.get(), "in_loop", fragments);
7589 
7590 	return testGroup.release();
7591 }
7592 
7593 // Test for the OpFRem instruction.
createFRemTests(tcu::TestContext & testCtx)7594 tcu::TestCaseGroup* createFRemTests(tcu::TestContext& testCtx)
7595 {
7596 	de::MovePtr<tcu::TestCaseGroup>		testGroup(new tcu::TestCaseGroup(testCtx, "frem", "OpFRem"));
7597 	map<string, string>					fragments;
7598 	RGBA								inputColors[4];
7599 	RGBA								outputColors[4];
7600 
7601 	fragments["pre_main"]				 =
7602 		"%c_f32_3 = OpConstant %f32 3.0\n"
7603 		"%c_f32_n3 = OpConstant %f32 -3.0\n"
7604 		"%c_f32_4 = OpConstant %f32 4.0\n"
7605 		"%c_f32_p75 = OpConstant %f32 0.75\n"
7606 		"%c_v4f32_p75_p75_p75_p75 = OpConstantComposite %v4f32 %c_f32_p75 %c_f32_p75 %c_f32_p75 %c_f32_p75 \n"
7607 		"%c_v4f32_4_4_4_4 = OpConstantComposite %v4f32 %c_f32_4 %c_f32_4 %c_f32_4 %c_f32_4\n"
7608 		"%c_v4f32_3_n3_3_n3 = OpConstantComposite %v4f32 %c_f32_3 %c_f32_n3 %c_f32_3 %c_f32_n3\n";
7609 
7610 	// The test does the following.
7611 	// vec4 result = (param1 * 8.0) - 4.0;
7612 	// return (frem(result.x,3) + 0.75, frem(result.y, -3) + 0.75, 0, 1)
7613 	fragments["testfun"]				 =
7614 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7615 		"%param1 = OpFunctionParameter %v4f32\n"
7616 		"%label_testfun = OpLabel\n"
7617 		"%v_times_8 = OpVectorTimesScalar %v4f32 %param1 %c_f32_8\n"
7618 		"%minus_4 = OpFSub %v4f32 %v_times_8 %c_v4f32_4_4_4_4\n"
7619 		"%frem = OpFRem %v4f32 %minus_4 %c_v4f32_3_n3_3_n3\n"
7620 		"%added = OpFAdd %v4f32 %frem %c_v4f32_p75_p75_p75_p75\n"
7621 		"%xyz_1 = OpVectorInsertDynamic %v4f32 %added %c_f32_1 %c_i32_3\n"
7622 		"%xy_0_1 = OpVectorInsertDynamic %v4f32 %xyz_1 %c_f32_0 %c_i32_2\n"
7623 		"OpReturnValue %xy_0_1\n"
7624 		"OpFunctionEnd\n";
7625 
7626 
7627 	inputColors[0]		= RGBA(16,	16,		0, 255);
7628 	inputColors[1]		= RGBA(232, 232,	0, 255);
7629 	inputColors[2]		= RGBA(232, 16,		0, 255);
7630 	inputColors[3]		= RGBA(16,	232,	0, 255);
7631 
7632 	outputColors[0]		= RGBA(64,	64,		0, 255);
7633 	outputColors[1]		= RGBA(255, 255,	0, 255);
7634 	outputColors[2]		= RGBA(255, 64,		0, 255);
7635 	outputColors[3]		= RGBA(64,	255,	0, 255);
7636 
7637 	createTestsForAllStages("frem", inputColors, outputColors, fragments, testGroup.get());
7638 	return testGroup.release();
7639 }
7640 
createInstructionTests(tcu::TestContext & testCtx)7641 tcu::TestCaseGroup* createInstructionTests (tcu::TestContext& testCtx)
7642 {
7643 	de::MovePtr<tcu::TestCaseGroup> instructionTests	(new tcu::TestCaseGroup(testCtx, "instruction", "Instructions with special opcodes/operands"));
7644 	de::MovePtr<tcu::TestCaseGroup> computeTests		(new tcu::TestCaseGroup(testCtx, "compute", "Compute Instructions with special opcodes/operands"));
7645 	de::MovePtr<tcu::TestCaseGroup> graphicsTests		(new tcu::TestCaseGroup(testCtx, "graphics", "Graphics Instructions with special opcodes/operands"));
7646 
7647 	computeTests->addChild(createOpNopGroup(testCtx));
7648 	computeTests->addChild(createOpLineGroup(testCtx));
7649 	computeTests->addChild(createOpNoLineGroup(testCtx));
7650 	computeTests->addChild(createOpConstantNullGroup(testCtx));
7651 	computeTests->addChild(createOpConstantCompositeGroup(testCtx));
7652 	computeTests->addChild(createOpConstantUsageGroup(testCtx));
7653 	computeTests->addChild(createSpecConstantGroup(testCtx));
7654 	computeTests->addChild(createOpSourceGroup(testCtx));
7655 	computeTests->addChild(createOpSourceExtensionGroup(testCtx));
7656 	computeTests->addChild(createDecorationGroupGroup(testCtx));
7657 	computeTests->addChild(createOpPhiGroup(testCtx));
7658 	computeTests->addChild(createLoopControlGroup(testCtx));
7659 	computeTests->addChild(createFunctionControlGroup(testCtx));
7660 	computeTests->addChild(createSelectionControlGroup(testCtx));
7661 	computeTests->addChild(createBlockOrderGroup(testCtx));
7662 	computeTests->addChild(createMultipleShaderGroup(testCtx));
7663 	computeTests->addChild(createMemoryAccessGroup(testCtx));
7664 	computeTests->addChild(createOpCopyMemoryGroup(testCtx));
7665 	computeTests->addChild(createOpCopyObjectGroup(testCtx));
7666 	computeTests->addChild(createNoContractionGroup(testCtx));
7667 	computeTests->addChild(createOpUndefGroup(testCtx));
7668 	computeTests->addChild(createOpUnreachableGroup(testCtx));
7669 	computeTests ->addChild(createOpQuantizeToF16Group(testCtx));
7670 	computeTests ->addChild(createOpFRemGroup(testCtx));
7671 
7672 	RGBA defaultColors[4];
7673 	getDefaultColors(defaultColors);
7674 
7675 	de::MovePtr<tcu::TestCaseGroup> opnopTests (new tcu::TestCaseGroup(testCtx, "opnop", "Test OpNop"));
7676 	map<string, string> opNopFragments;
7677 	opNopFragments["testfun"] =
7678 		"%test_code = OpFunction %v4f32 None %v4f32_function\n"
7679 		"%param1 = OpFunctionParameter %v4f32\n"
7680 		"%label_testfun = OpLabel\n"
7681 		"OpNop\n"
7682 		"OpNop\n"
7683 		"OpNop\n"
7684 		"OpNop\n"
7685 		"OpNop\n"
7686 		"OpNop\n"
7687 		"OpNop\n"
7688 		"OpNop\n"
7689 		"%a = OpVectorExtractDynamic %f32 %param1 %c_i32_0\n"
7690 		"%b = OpFAdd %f32 %a %a\n"
7691 		"OpNop\n"
7692 		"%c = OpFSub %f32 %b %a\n"
7693 		"%ret = OpVectorInsertDynamic %v4f32 %param1 %c %c_i32_0\n"
7694 		"OpNop\n"
7695 		"OpNop\n"
7696 		"OpReturnValue %ret\n"
7697 		"OpFunctionEnd\n"
7698 		;
7699 	createTestsForAllStages("opnop", defaultColors, defaultColors, opNopFragments, opnopTests.get());
7700 
7701 
7702 	graphicsTests->addChild(opnopTests.release());
7703 	graphicsTests->addChild(createOpSourceTests(testCtx));
7704 	graphicsTests->addChild(createOpSourceContinuedTests(testCtx));
7705 	graphicsTests->addChild(createOpLineTests(testCtx));
7706 	graphicsTests->addChild(createOpNoLineTests(testCtx));
7707 	graphicsTests->addChild(createOpConstantNullTests(testCtx));
7708 	graphicsTests->addChild(createOpConstantCompositeTests(testCtx));
7709 	graphicsTests->addChild(createMemoryAccessTests(testCtx));
7710 	graphicsTests->addChild(createOpUndefTests(testCtx));
7711 	graphicsTests->addChild(createSelectionBlockOrderTests(testCtx));
7712 	graphicsTests->addChild(createModuleTests(testCtx));
7713 	graphicsTests->addChild(createSwitchBlockOrderTests(testCtx));
7714 	graphicsTests->addChild(createOpPhiTests(testCtx));
7715 	graphicsTests->addChild(createNoContractionTests(testCtx));
7716 	graphicsTests->addChild(createOpQuantizeTests(testCtx));
7717 	graphicsTests->addChild(createLoopTests(testCtx));
7718 	graphicsTests->addChild(createSpecConstantTests(testCtx));
7719 	graphicsTests->addChild(createSpecConstantOpQuantizeToF16Group(testCtx));
7720 	graphicsTests->addChild(createBarrierTests(testCtx));
7721 	graphicsTests->addChild(createDecorationGroupTests(testCtx));
7722 	graphicsTests->addChild(createFRemTests(testCtx));
7723 
7724 	instructionTests->addChild(computeTests.release());
7725 	instructionTests->addChild(graphicsTests.release());
7726 
7727 	return instructionTests.release();
7728 }
7729 
7730 } // SpirVAssembly
7731 } // vkt
7732