1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 #include "gluShaderUtil.hpp"
39 
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43 
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47 
48 #include <map>
49 #include <sstream>
50 #include <iostream>
51 
52 using std::vector;
53 using namespace vk;
54 
55 namespace vkt
56 {
57 namespace shaderexecutor
58 {
59 namespace
60 {
61 
62 enum
63 {
64 	DEFAULT_RENDER_WIDTH	= 100,
65 	DEFAULT_RENDER_HEIGHT	= 100,
66 };
67 
68 // Common typedefs
69 
70 typedef de::SharedPtr<Unique<VkImage> >		VkImageSp;
71 typedef de::SharedPtr<Unique<VkImageView> >	VkImageViewSp;
72 typedef de::SharedPtr<Unique<VkBuffer> >	VkBufferSp;
73 typedef de::SharedPtr<Allocation>			AllocationSp;
74 
75 static VkFormat getAttributeFormat(const glu::DataType dataType);
76 
77 // Shader utilities
78 
getDefaultClearColor(void)79 static VkClearValue	getDefaultClearColor (void)
80 {
81 	return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
82 }
83 
generateEmptyFragmentSource(void)84 static std::string generateEmptyFragmentSource (void)
85 {
86 	std::ostringstream src;
87 
88 	src << "#version 450\n"
89 		   "layout(location=0) out highp vec4 o_color;\n";
90 
91 	src << "void main (void)\n{\n";
92 	src << "	o_color = vec4(0.0);\n";
93 	src << "}\n";
94 
95 	return src.str();
96 }
97 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)98 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
99 {
100 	for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
101 	{
102 		if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
103 		{
104 			if(glu::isDataTypeVector(symIter->varType.getBasicType()))
105 			{
106 				for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
107 				{
108 					src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
109 				}
110 			}
111 			else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
112 			{
113 				int maxRow = 0;
114 				int maxCol = 0;
115 				switch (symIter->varType.getBasicType())
116 				{
117 				case glu::TYPE_FLOAT_MAT2:
118 					maxRow = maxCol = 2;
119 					break;
120 				case glu::TYPE_FLOAT_MAT2X3:
121 					maxRow = 2;
122 					maxCol = 3;
123 					break;
124 				case glu::TYPE_FLOAT_MAT2X4:
125 					maxRow = 2;
126 					maxCol = 4;
127 					break;
128 				case glu::TYPE_FLOAT_MAT3X2:
129 					maxRow = 3;
130 					maxCol = 2;
131 					break;
132 				case glu::TYPE_FLOAT_MAT3:
133 					maxRow = maxCol = 3;
134 					break;
135 				case glu::TYPE_FLOAT_MAT3X4:
136 					maxRow = 3;
137 					maxCol = 4;
138 					break;
139 				case glu::TYPE_FLOAT_MAT4X2:
140 					maxRow = 4;
141 					maxCol = 2;
142 					break;
143 				case glu::TYPE_FLOAT_MAT4X3:
144 					maxRow = 4;
145 					maxCol = 3;
146 					break;
147 				case glu::TYPE_FLOAT_MAT4:
148 					maxRow = maxCol = 4;
149 					break;
150 				default:
151 					DE_ASSERT(false);
152 					break;
153 				}
154 
155 				for(int i = 0; i < maxRow; i++)
156 				for(int j = 0; j < maxCol; j++)
157 				{
158 					src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
159 				}
160 			}
161 			else
162 			{
163 					src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
164 			}
165 		}
166 	}
167 }
168 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)169 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
170 {
171 	std::ostringstream	src;
172 	int					location	= 0;
173 
174 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
175 
176 	if (!shaderSpec.globalDeclarations.empty())
177 		src << shaderSpec.globalDeclarations << "\n";
178 
179 	src << "layout(location = " << location << ") in highp vec4 a_position;\n";
180 
181 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
182 	{
183 		location++;
184 		src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
185 			<< "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
186 	}
187 
188 	src << "\nvoid main (void)\n{\n"
189 		<< "	gl_Position = a_position;\n"
190 		<< "	gl_PointSize = 1.0;\n";
191 
192 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
193 		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
194 
195 	src << "}\n";
196 
197 	return src.str();
198 }
199 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)200 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
201 {
202 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
203 
204 	std::ostringstream	src;
205 
206 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
207 
208 	if (!shaderSpec.globalDeclarations.empty())
209 		src << shaderSpec.globalDeclarations << "\n";
210 
211 	src << "layout(location = 0) in highp vec4 a_position;\n";
212 
213 	int			locationNumber	= 1;
214 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
215 	{
216 		src <<  "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
217 	}
218 
219 	locationNumber = 0;
220 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
221 	{
222 		DE_ASSERT(output->varType.isBasicType());
223 
224 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
225 		{
226 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
227 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
228 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
229 
230 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
231 		}
232 		else
233 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
234 	}
235 
236 	src << "\n"
237 		<< "void main (void)\n"
238 		<< "{\n"
239 		<< "	gl_Position = a_position;\n"
240 		<< "	gl_PointSize = 1.0;\n";
241 
242 	// Declare & fetch local input variables
243 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
244 	{
245 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
246 		{
247 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
248 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
249 		}
250 		else
251 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
252 	}
253 
254 	// Declare local output variables
255 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
256 	{
257 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
258 		{
259 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
260 			src << "\t" << tname << " " << output->name << ";\n";
261 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
262 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
263 		}
264 		else
265 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
266 	}
267 
268 	// Operation - indented to correct level.
269 	{
270 		std::istringstream	opSrc	(shaderSpec.source);
271 		std::string			line;
272 
273 		while (std::getline(opSrc, line))
274 			src << "\t" << line << "\n";
275 	}
276 
277 	if (shaderSpec.packFloat16Bit)
278 		packFloat16Bit(src, shaderSpec.outputs);
279 
280 	// Assignments to outputs.
281 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
282 	{
283 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
284 		{
285 			src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
286 		}
287 		else
288 		{
289 			if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
290 			{
291 				const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
292 				const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
293 
294 				src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
295 			}
296 			else
297 				src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
298 		}
299 	}
300 
301 	src << "}\n";
302 
303 	return src.str();
304 }
305 
306 struct FragmentOutputLayout
307 {
308 	std::vector<const Symbol*>		locationSymbols;		//! Symbols by location
309 	std::map<std::string, int>		locationMap;			//! Map from symbol name to start location
310 };
311 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)312 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
313 {
314 	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
315 	{
316 		const Symbol&				output		= shaderSpec.outputs[outNdx];
317 		const int					location	= de::lookup(outLocationMap, output.name);
318 		const std::string			outVarName	= outputPrefix + output.name;
319 		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
320 
321 		TCU_CHECK_INTERNAL(output.varType.isBasicType());
322 
323 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
324 		{
325 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
326 			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
327 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
328 
329 			decl.varType = uintType;
330 			src << decl << ";\n";
331 		}
332 		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
333 		{
334 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
335 			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
336 			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
337 
338 			decl.varType = intType;
339 			src << decl << ";\n";
340 		}
341 		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
342 		{
343 			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
344 			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
345 			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
346 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
347 
348 			decl.varType = uintType;
349 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
350 			{
351 				decl.name				= outVarName + "_" + de::toString(vecNdx);
352 				decl.layout.location	= location + vecNdx;
353 				src << decl << ";\n";
354 			}
355 		}
356 		else
357 			src << decl << ";\n";
358 	}
359 }
360 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)361 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
362 {
363 	if (isInput16Bit)
364 		packFloat16Bit(src, shaderSpec.outputs);
365 
366 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
367 	{
368 		const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
369 
370 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
371 			src << "	o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
372 		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
373 		{
374 			const int	numVecs		= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
375 
376 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
377 				if (useIntOutputs)
378 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
379 				else
380 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
381 		}
382 		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
383 		{
384 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
385 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
386 
387 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
388 		}
389 		else
390 			src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
391 	}
392 }
393 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)394 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
395 {
396 	std::ostringstream	src;
397 
398 	src <<"#version 450\n";
399 
400 	if (!shaderSpec.globalDeclarations.empty())
401 		src << shaderSpec.globalDeclarations << "\n";
402 
403 	int locationNumber = 0;
404 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
405 	{
406 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
407 		{
408 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
409 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
410 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
411 
412 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
413 		}
414 		else
415 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
416 	}
417 
418 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
419 
420 	src << "\nvoid main (void)\n{\n";
421 
422 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
423 
424 	src << "}\n";
425 
426 	return src.str();
427 }
428 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)429 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
430 {
431 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
432 
433 	std::ostringstream	src;
434 
435 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
436 
437 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
438 		src << "#extension GL_EXT_geometry_shader : require\n";
439 
440 	if (!shaderSpec.globalDeclarations.empty())
441 		src << shaderSpec.globalDeclarations << "\n";
442 
443 	src << "layout(points) in;\n"
444 		<< "layout(points, max_vertices = 1) out;\n";
445 
446 	int locationNumber = 0;
447 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
448 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
449 
450 	locationNumber = 0;
451 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
452 	{
453 		DE_ASSERT(output->varType.isBasicType());
454 
455 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
456 		{
457 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
458 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
459 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
460 
461 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
462 		}
463 		else
464 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
465 	}
466 
467 	src << "\n"
468 		<< "void main (void)\n"
469 		<< "{\n"
470 		<< "	gl_Position = gl_in[0].gl_Position;\n"
471 		<< (pointSizeSupported ? "	gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
472 
473 	// Fetch input variables
474 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
475 		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
476 
477 	// Declare local output variables.
478 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
479 		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
480 
481 	src << "\n";
482 
483 	// Operation - indented to correct level.
484 	{
485 		std::istringstream	opSrc	(shaderSpec.source);
486 		std::string			line;
487 
488 		while (std::getline(opSrc, line))
489 			src << "\t" << line << "\n";
490 	}
491 
492 	// Assignments to outputs.
493 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
494 	{
495 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
496 		{
497 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
498 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
499 
500 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
501 		}
502 		else
503 			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
504 	}
505 
506 	src << "	EmitVertex();\n"
507 		<< "	EndPrimitive();\n"
508 		<< "}\n";
509 
510 	return src.str();
511 }
512 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)513 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
514 {
515 	std::ostringstream src;
516 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
517 	if (!shaderSpec.globalDeclarations.empty())
518 		src << shaderSpec.globalDeclarations << "\n";
519 
520 	int			locationNumber	= 0;
521 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
522 	{
523 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
524 	}
525 
526 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
527 
528 	src << "\nvoid main (void)\n{\n";
529 
530 	// Declare & fetch local input variables
531 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
532 	{
533 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
534 		{
535 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
536 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
537 		}
538 		else
539 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
540 	}
541 
542 	// Declare output variables
543 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
544 	{
545 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
546 		{
547 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
548 			src << "\t" << tname << " " << output->name << ";\n";
549 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
550 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
551 		}
552 		else
553 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
554 	}
555 
556 	// Operation - indented to correct level.
557 	{
558 		std::istringstream	opSrc	(shaderSpec.source);
559 		std::string			line;
560 
561 		while (std::getline(opSrc, line))
562 			src << "\t" << line << "\n";
563 	}
564 
565 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
566 
567 	src << "}\n";
568 
569 	return src.str();
570 }
571 
572 // FragmentOutExecutor
573 
574 class FragmentOutExecutor : public ShaderExecutor
575 {
576 public:
577 														FragmentOutExecutor		(Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
578 	virtual												~FragmentOutExecutor	(void);
579 
580 	virtual void										execute					(int					numValues,
581 																				 const void* const*		inputs,
582 																				 void* const*			outputs,
583 																				 VkDescriptorSet		extraResources);
584 
585 protected:
586 	const glu::ShaderType								m_shaderType;
587 	const FragmentOutputLayout							m_outputLayout;
588 
589 private:
590 	void												bindAttributes			(int					numValues,
591 																				 const void* const*		inputs);
592 
593 	void												addAttribute			(deUint32				bindingLocation,
594 																				 VkFormat				format,
595 																				 deUint32				sizePerElement,
596 																				 deUint32				count,
597 																				 const void*			dataPtr);
598 	// reinit render data members
599 	virtual void										clearRenderData			(void);
600 
601 	const VkDescriptorSetLayout							m_extraResourcesLayout;
602 
603 	std::vector<VkVertexInputBindingDescription>		m_vertexBindingDescriptions;
604 	std::vector<VkVertexInputAttributeDescription>		m_vertexAttributeDescriptions;
605 	std::vector<VkBufferSp>								m_vertexBuffers;
606 	std::vector<AllocationSp>							m_vertexBufferAllocs;
607 };
608 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)609 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
610 {
611 	FragmentOutputLayout	ret;
612 	int						location	= 0;
613 
614 	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
615 	{
616 		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
617 
618 		TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
619 		de::insert(ret.locationMap, it->name, location);
620 		location += numLocations;
621 
622 		for (int ndx = 0; ndx < numLocations; ++ndx)
623 			ret.locationSymbols.push_back(&*it);
624 	}
625 
626 	return ret;
627 }
628 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)629 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
630 	: ShaderExecutor			(context, shaderSpec)
631 	, m_shaderType				(shaderType)
632 	, m_outputLayout			(computeFragmentOutputLayout(m_shaderSpec.outputs))
633 	, m_extraResourcesLayout	(extraResourcesLayout)
634 {
635 	const VkPhysicalDevice		physicalDevice = m_context.getPhysicalDevice();
636 	const InstanceInterface&	vki = m_context.getInstanceInterface();
637 
638 	// Input attributes
639 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
640 	{
641 		const Symbol&				symbol = m_shaderSpec.inputs[inputNdx];
642 		const glu::DataType			basicType = symbol.varType.getBasicType();
643 		const VkFormat				format = getAttributeFormat(basicType);
644 		const VkFormatProperties	formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
645 		if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
646 			TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
647 	}
648 }
649 
~FragmentOutExecutor(void)650 FragmentOutExecutor::~FragmentOutExecutor (void)
651 {
652 }
653 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)654 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
655 {
656 	std::vector<tcu::Vec2> positions(numValues);
657 	for (int valNdx = 0; valNdx < numValues; valNdx++)
658 	{
659 		const int		ix		= valNdx % renderSize.x();
660 		const int		iy		= valNdx / renderSize.x();
661 		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
662 		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
663 
664 		positions[valNdx] = tcu::Vec2(fx, fy);
665 	}
666 
667 	return positions;
668 }
669 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)670 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
671 {
672 	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
673 	{
674 		tcu::TextureFormat::R,
675 		tcu::TextureFormat::RG,
676 		tcu::TextureFormat::RGBA,	// No RGB variants available.
677 		tcu::TextureFormat::RGBA
678 	};
679 
680 	const glu::DataType					basicType		= outputType.getBasicType();
681 	const int							numComps		= glu::getDataTypeNumComponents(basicType);
682 	tcu::TextureFormat::ChannelType		channelType;
683 
684 	switch (glu::getDataTypeScalarType(basicType))
685 	{
686 		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;														break;
687 		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;															break;
688 		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;															break;
689 		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;			break;
690 		case glu::TYPE_FLOAT16:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;		break;
691 		default:
692 			throw tcu::InternalError("Invalid output type");
693 	}
694 
695 	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
696 
697 	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
698 }
699 
getAttributeFormat(const glu::DataType dataType)700 static VkFormat getAttributeFormat (const glu::DataType dataType)
701 {
702 	switch (dataType)
703 	{
704 		case glu::TYPE_FLOAT16:			return VK_FORMAT_R16_SFLOAT;
705 		case glu::TYPE_FLOAT16_VEC2:	return VK_FORMAT_R16G16_SFLOAT;
706 		case glu::TYPE_FLOAT16_VEC3:	return VK_FORMAT_R16G16B16_SFLOAT;
707 		case glu::TYPE_FLOAT16_VEC4:	return VK_FORMAT_R16G16B16A16_SFLOAT;
708 
709 		case glu::TYPE_FLOAT:			return VK_FORMAT_R32_SFLOAT;
710 		case glu::TYPE_FLOAT_VEC2:		return VK_FORMAT_R32G32_SFLOAT;
711 		case glu::TYPE_FLOAT_VEC3:		return VK_FORMAT_R32G32B32_SFLOAT;
712 		case glu::TYPE_FLOAT_VEC4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
713 
714 		case glu::TYPE_INT:				return VK_FORMAT_R32_SINT;
715 		case glu::TYPE_INT_VEC2:		return VK_FORMAT_R32G32_SINT;
716 		case glu::TYPE_INT_VEC3:		return VK_FORMAT_R32G32B32_SINT;
717 		case glu::TYPE_INT_VEC4:		return VK_FORMAT_R32G32B32A32_SINT;
718 
719 		case glu::TYPE_UINT:			return VK_FORMAT_R32_UINT;
720 		case glu::TYPE_UINT_VEC2:		return VK_FORMAT_R32G32_UINT;
721 		case glu::TYPE_UINT_VEC3:		return VK_FORMAT_R32G32B32_UINT;
722 		case glu::TYPE_UINT_VEC4:		return VK_FORMAT_R32G32B32A32_UINT;
723 
724 		case glu::TYPE_FLOAT_MAT2:		return VK_FORMAT_R32G32_SFLOAT;
725 		case glu::TYPE_FLOAT_MAT2X3:	return VK_FORMAT_R32G32B32_SFLOAT;
726 		case glu::TYPE_FLOAT_MAT2X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
727 		case glu::TYPE_FLOAT_MAT3X2:	return VK_FORMAT_R32G32_SFLOAT;
728 		case glu::TYPE_FLOAT_MAT3:		return VK_FORMAT_R32G32B32_SFLOAT;
729 		case glu::TYPE_FLOAT_MAT3X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
730 		case glu::TYPE_FLOAT_MAT4X2:	return VK_FORMAT_R32G32_SFLOAT;
731 		case glu::TYPE_FLOAT_MAT4X3:	return VK_FORMAT_R32G32B32_SFLOAT;
732 		case glu::TYPE_FLOAT_MAT4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
733 		default:
734 			DE_ASSERT(false);
735 			return VK_FORMAT_UNDEFINED;
736 	}
737 }
738 
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)739 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
740 {
741 	// Add binding specification
742 	const deUint32							binding = (deUint32)m_vertexBindingDescriptions.size();
743 	const VkVertexInputBindingDescription	bindingDescription =
744 	{
745 		binding,
746 		sizePerElement,
747 		VK_VERTEX_INPUT_RATE_VERTEX
748 	};
749 
750 	m_vertexBindingDescriptions.push_back(bindingDescription);
751 
752 	// Add location and format specification
753 	const VkVertexInputAttributeDescription attributeDescription =
754 	{
755 		bindingLocation,			// deUint32	location;
756 		binding,					// deUint32	binding;
757 		format,						// VkFormat	format;
758 		0u,							// deUint32	offsetInBytes;
759 	};
760 
761 	m_vertexAttributeDescriptions.push_back(attributeDescription);
762 
763 	// Upload data to buffer
764 	const VkDevice				vkDevice			= m_context.getDevice();
765 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
766 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
767 
768 	const VkDeviceSize			inputSize			= sizePerElement * count;
769 	const VkBufferCreateInfo	vertexBufferParams	=
770 	{
771 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
772 		DE_NULL,									// const void*			pNext;
773 		0u,											// VkBufferCreateFlags	flags;
774 		inputSize,									// VkDeviceSize			size;
775 		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,			// VkBufferUsageFlags	usage;
776 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
777 		1u,											// deUint32				queueFamilyCount;
778 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
779 	};
780 
781 	Move<VkBuffer>			buffer	= createBuffer(vk, vkDevice, &vertexBufferParams);
782 	de::MovePtr<Allocation>	alloc	= m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
783 
784 	VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
785 
786 	deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
787 	flushAlloc(vk, vkDevice, *alloc);
788 
789 	m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
790 	m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
791 }
792 
bindAttributes(int numValues,const void * const * inputs)793 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
794 {
795 	// Input attributes
796 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
797 	{
798 		const Symbol&		symbol			= m_shaderSpec.inputs[inputNdx];
799 		const void*			ptr				= inputs[inputNdx];
800 		const glu::DataType	basicType		= symbol.varType.getBasicType();
801 		const int			vecSize			= glu::getDataTypeScalarSize(basicType);
802 		const VkFormat		format			= getAttributeFormat(basicType);
803 		int					elementSize		= 0;
804 		int					numAttrsToAdd	= 1;
805 
806 		if (glu::isDataTypeFloatOrVec(basicType))
807 			elementSize = sizeof(float);
808 		else if (glu::isDataTypeFloat16OrVec(basicType))
809 			elementSize = sizeof(deUint16);
810 		else if (glu::isDataTypeIntOrIVec(basicType))
811 			elementSize = sizeof(int);
812 		else if (glu::isDataTypeUintOrUVec(basicType))
813 			elementSize = sizeof(deUint32);
814 		else if (glu::isDataTypeMatrix(basicType))
815 		{
816 			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
817 			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
818 
819 			elementSize = numRows * numCols * (int)sizeof(float);
820 			numAttrsToAdd = numCols;
821 		}
822 		else
823 			DE_ASSERT(false);
824 
825 		// add attributes, in case of matrix every column is binded as an attribute
826 		for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
827 		{
828 			addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
829 		}
830 	}
831 }
832 
clearRenderData(void)833 void FragmentOutExecutor::clearRenderData (void)
834 {
835 	m_vertexBindingDescriptions.clear();
836 	m_vertexAttributeDescriptions.clear();
837 	m_vertexBuffers.clear();
838 	m_vertexBufferAllocs.clear();
839 }
840 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)841 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
842 {
843 	const VkDescriptorSetLayoutCreateInfo	createInfo	=
844 	{
845 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
846 		DE_NULL,
847 		(VkDescriptorSetLayoutCreateFlags)0,
848 		0u,
849 		DE_NULL,
850 	};
851 	return createDescriptorSetLayout(vkd, device, &createInfo);
852 }
853 
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)854 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
855 {
856 	const VkDescriptorPoolSize			dummySize	=
857 	{
858 		VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
859 		1u,
860 	};
861 	const VkDescriptorPoolCreateInfo	createInfo	=
862 	{
863 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
864 		DE_NULL,
865 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
866 		1u,
867 		1u,
868 		&dummySize
869 	};
870 	return createDescriptorPool(vkd, device, &createInfo);
871 }
872 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)873 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
874 {
875 	const VkDescriptorSetAllocateInfo	allocInfo	=
876 	{
877 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
878 		DE_NULL,
879 		pool,
880 		1u,
881 		&layout,
882 	};
883 	return allocateDescriptorSet(vkd, device, &allocInfo);
884 }
885 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)886 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
887 {
888 	const VkDevice										vkDevice				= m_context.getDevice();
889 	const DeviceInterface&								vk						= m_context.getDeviceInterface();
890 	const VkQueue										queue					= m_context.getUniversalQueue();
891 	const deUint32										queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
892 	Allocator&											memAlloc				= m_context.getDefaultAllocator();
893 
894 	const deUint32										renderSizeX				= de::min(static_cast<deUint32>(DEFAULT_RENDER_WIDTH), (deUint32)numValues);
895 	const deUint32										renderSizeY				= ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
896 	const tcu::UVec2									renderSize				(renderSizeX, renderSizeY);
897 	std::vector<tcu::Vec2>								positions;
898 
899 	const bool											useGeometryShader		= m_shaderType == glu::SHADERTYPE_GEOMETRY;
900 
901 	std::vector<VkImageSp>								colorImages;
902 	std::vector<VkImageMemoryBarrier>					colorImagePreRenderBarriers;
903 	std::vector<VkImageMemoryBarrier>					colorImagePostRenderBarriers;
904 	std::vector<AllocationSp>							colorImageAllocs;
905 	std::vector<VkAttachmentDescription>				attachments;
906 	std::vector<VkClearValue>							attachmentClearValues;
907 	std::vector<VkImageViewSp>							colorImageViews;
908 
909 	std::vector<VkPipelineColorBlendAttachmentState>	colorBlendAttachmentStates;
910 	std::vector<VkAttachmentReference>					colorAttachmentReferences;
911 
912 	Move<VkRenderPass>									renderPass;
913 	Move<VkFramebuffer>									framebuffer;
914 	Move<VkPipelineLayout>								pipelineLayout;
915 	Move<VkPipeline>									graphicsPipeline;
916 
917 	Move<VkShaderModule>								vertexShaderModule;
918 	Move<VkShaderModule>								geometryShaderModule;
919 	Move<VkShaderModule>								fragmentShaderModule;
920 
921 	Move<VkCommandPool>									cmdPool;
922 	Move<VkCommandBuffer>								cmdBuffer;
923 
924 	Unique<VkDescriptorSetLayout>						emptyDescriptorSetLayout	(createEmptyDescriptorSetLayout(vk, vkDevice));
925 	Unique<VkDescriptorPool>							dummyDescriptorPool			(createDummyDescriptorPool(vk, vkDevice));
926 	Unique<VkDescriptorSet>								emptyDescriptorSet			(allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
927 
928 	clearRenderData();
929 
930 	// Compute positions - 1px points are used to drive fragment shading.
931 	positions = computeVertexPositions(numValues, renderSize.cast<int>());
932 
933 	// Bind attributes
934 	addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
935 	bindAttributes(numValues, inputs);
936 
937 	// Create color images
938 	{
939 		const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
940 		{
941 			VK_FALSE,																	// VkBool32						blendEnable;
942 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcColorBlendFactor;
943 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				dstColorBlendFactor;
944 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpColor;
945 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcAlphaBlendFactor;
946 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				destAlphaBlendFactor;
947 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpAlpha;
948 			(VK_COLOR_COMPONENT_R_BIT |
949 			 VK_COLOR_COMPONENT_G_BIT |
950 			 VK_COLOR_COMPONENT_B_BIT |
951 			 VK_COLOR_COMPONENT_A_BIT)													// VkColorComponentFlags		colorWriteMask;
952 		};
953 
954 		for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
955 		{
956 			const bool		isFloat		= isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
957 			const bool		isFloat16b	= glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
958 			const bool		isSigned	= isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
959 			const bool		isBool		= isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960 			const VkFormat	colorFormat = isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT));
961 
962 			{
963 				const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
964 				if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
965 					TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
966 			}
967 
968 			const VkImageCreateInfo	 colorImageParams =
969 			{
970 				VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType				sType;
971 				DE_NULL,																	// const void*					pNext;
972 				0u,																			// VkImageCreateFlags			flags;
973 				VK_IMAGE_TYPE_2D,															// VkImageType					imageType;
974 				colorFormat,																// VkFormat						format;
975 				{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D					extent;
976 				1u,																			// deUint32						mipLevels;
977 				1u,																			// deUint32						arraySize;
978 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
979 				VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling				tiling;
980 				VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags			usage;
981 				VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode				sharingMode;
982 				1u,																			// deUint32						queueFamilyCount;
983 				&queueFamilyIndex,															// const deUint32*				pQueueFamilyIndices;
984 				VK_IMAGE_LAYOUT_UNDEFINED,													// VkImageLayout				initialLayout;
985 			};
986 
987 			const VkAttachmentDescription colorAttachmentDescription =
988 			{
989 				0u,																			// VkAttachmentDescriptorFlags	flags;
990 				colorFormat,																// VkFormat						format;
991 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
992 				VK_ATTACHMENT_LOAD_OP_CLEAR,												// VkAttachmentLoadOp			loadOp;
993 				VK_ATTACHMENT_STORE_OP_STORE,												// VkAttachmentStoreOp			storeOp;
994 				VK_ATTACHMENT_LOAD_OP_DONT_CARE,											// VkAttachmentLoadOp			stencilLoadOp;
995 				VK_ATTACHMENT_STORE_OP_DONT_CARE,											// VkAttachmentStoreOp			stencilStoreOp;
996 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				initialLayout;
997 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				finalLayout;
998 			};
999 
1000 			Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1001 			colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1002 			attachmentClearValues.push_back(getDefaultClearColor());
1003 
1004 			// Allocate and bind color image memory
1005 			{
1006 				de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1007 				VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1008 				colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1009 
1010 				attachments.push_back(colorAttachmentDescription);
1011 				colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1012 
1013 				const VkAttachmentReference colorAttachmentReference =
1014 				{
1015 					(deUint32) (colorImages.size() - 1),			//	deUint32		attachment;
1016 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL		//	VkImageLayout	layout;
1017 				};
1018 
1019 				colorAttachmentReferences.push_back(colorAttachmentReference);
1020 			}
1021 
1022 			// Create color attachment view
1023 			{
1024 				const VkImageViewCreateInfo colorImageViewParams =
1025 				{
1026 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
1027 					DE_NULL,											// const void*				pNext;
1028 					0u,													// VkImageViewCreateFlags	flags;
1029 					colorImages.back().get()->get(),					// VkImage					image;
1030 					VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
1031 					colorFormat,										// VkFormat					format;
1032 					{
1033 						VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
1034 						VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
1035 						VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
1036 						VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
1037 					},													// VkComponentMapping		components;
1038 					{
1039 						VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
1040 						0u,												// deUint32					baseMipLevel;
1041 						1u,												// deUint32					mipLevels;
1042 						0u,												// deUint32					baseArraySlice;
1043 						1u												// deUint32					arraySize;
1044 					}													// VkImageSubresourceRange	subresourceRange;
1045 				};
1046 
1047 				Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1048 				colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1049 
1050 				const VkImageMemoryBarrier	colorImagePreRenderBarrier =
1051 				{
1052 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1053 					DE_NULL,												// pNext
1054 					0u,														// srcAccessMask
1055 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1056 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// dstAccessMask
1057 					VK_IMAGE_LAYOUT_UNDEFINED,								// oldLayout
1058 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// newLayout
1059 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1060 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1061 					colorImages.back().get()->get(),						// image
1062 					{
1063 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1064 						0u,														// baseMipLevel
1065 						1u,														// levelCount
1066 						0u,														// baseArrayLayer
1067 						1u,														// layerCount
1068 					}														// subresourceRange
1069 				};
1070 				colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1071 
1072 				const VkImageMemoryBarrier	colorImagePostRenderBarrier =
1073 				{
1074 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1075 					DE_NULL,												// pNext
1076 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1077 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// srcAccessMask
1078 					VK_ACCESS_TRANSFER_READ_BIT,							// dstAccessMask
1079 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// oldLayout
1080 					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,					// newLayout
1081 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1082 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1083 					colorImages.back().get()->get(),						// image
1084 					{
1085 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1086 						0u,														// baseMipLevel
1087 						1u,														// levelCount
1088 						0u,														// baseArrayLayer
1089 						1u,														// layerCount
1090 					}														// subresourceRange
1091 				};
1092 				colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1093 			}
1094 		}
1095 	}
1096 
1097 	// Create render pass
1098 	{
1099 		const VkSubpassDescription subpassDescription =
1100 		{
1101 			0u,													// VkSubpassDescriptionFlags	flags;
1102 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
1103 			0u,													// deUint32						inputCount;
1104 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
1105 			(deUint32)colorImages.size(),						// deUint32						colorCount;
1106 			&colorAttachmentReferences[0],						// const VkAttachmentReference*	colorAttachments;
1107 			DE_NULL,											// const VkAttachmentReference*	resolveAttachments;
1108 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
1109 			0u,													// deUint32						preserveCount;
1110 			DE_NULL												// const VkAttachmentReference*	pPreserveAttachments;
1111 		};
1112 
1113 		const VkRenderPassCreateInfo renderPassParams =
1114 		{
1115 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
1116 			DE_NULL,											// const void*						pNext;
1117 			(VkRenderPassCreateFlags)0,							// VkRenderPassCreateFlags			flags;
1118 			(deUint32)attachments.size(),						// deUint32							attachmentCount;
1119 			&attachments[0],									// const VkAttachmentDescription*	pAttachments;
1120 			1u,													// deUint32							subpassCount;
1121 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
1122 			0u,													// deUint32							dependencyCount;
1123 			DE_NULL												// const VkSubpassDependency*		pDependencies;
1124 		};
1125 
1126 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1127 	}
1128 
1129 	// Create framebuffer
1130 	{
1131 		std::vector<VkImageView> views(colorImageViews.size());
1132 		for (size_t i = 0; i < colorImageViews.size(); i++)
1133 		{
1134 			views[i] = colorImageViews[i].get()->get();
1135 		}
1136 
1137 		const VkFramebufferCreateInfo framebufferParams =
1138 		{
1139 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
1140 			DE_NULL,											// const void*					pNext;
1141 			0u,													// VkFramebufferCreateFlags		flags;
1142 			*renderPass,										// VkRenderPass					renderPass;
1143 			(deUint32)views.size(),								// deUint32						attachmentCount;
1144 			&views[0],											// const VkImageView*			pAttachments;
1145 			(deUint32)renderSize.x(),							// deUint32						width;
1146 			(deUint32)renderSize.y(),							// deUint32						height;
1147 			1u													// deUint32						layers;
1148 		};
1149 
1150 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1151 	}
1152 
1153 	// Create pipeline layout
1154 	{
1155 		const VkDescriptorSetLayout			setLayouts[]			=
1156 		{
1157 			*emptyDescriptorSetLayout,
1158 			m_extraResourcesLayout
1159 		};
1160 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
1161 		{
1162 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
1163 			DE_NULL,											// const void*					pNext;
1164 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
1165 			(m_extraResourcesLayout != 0 ? 2u : 0u),			// deUint32						descriptorSetCount;
1166 			setLayouts,											// const VkDescriptorSetLayout*	pSetLayouts;
1167 			0u,													// deUint32						pushConstantRangeCount;
1168 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
1169 		};
1170 
1171 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1172 	}
1173 
1174 	// Create shaders
1175 	{
1176 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1177 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1178 
1179 		if (useGeometryShader)
1180 		{
1181 			if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1182 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1183 			else
1184 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1185 		}
1186 	}
1187 
1188 	// Create pipeline
1189 	{
1190 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1191 		{
1192 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
1193 			DE_NULL,													// const void*									pNext;
1194 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags		flags;
1195 			(deUint32)m_vertexBindingDescriptions.size(),				// deUint32										bindingCount;
1196 			&m_vertexBindingDescriptions[0],							// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
1197 			(deUint32)m_vertexAttributeDescriptions.size(),				// deUint32										attributeCount;
1198 			&m_vertexAttributeDescriptions[0],							// const VkVertexInputAttributeDescription*		pvertexAttributeDescriptions;
1199 		};
1200 
1201 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
1202 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
1203 
1204 		const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1205 		{
1206 			VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
1207 			DE_NULL,														// const void*									pNext;
1208 			(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
1209 			VK_FALSE,														// VkBool32										logicOpEnable;
1210 			VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
1211 			(deUint32)colorBlendAttachmentStates.size(),					// deUint32										attachmentCount;
1212 			&colorBlendAttachmentStates[0],									// const VkPipelineColorBlendAttachmentState*	pAttachments;
1213 			{ 0.0f, 0.0f, 0.0f, 0.0f }										// float										blendConst[4];
1214 		};
1215 
1216 		graphicsPipeline = makeGraphicsPipeline(vk,														// const DeviceInterface&                        vk
1217 												vkDevice,												// const VkDevice                                device
1218 												*pipelineLayout,										// const VkPipelineLayout                        pipelineLayout
1219 												*vertexShaderModule,									// const VkShaderModule                          vertexShaderModule
1220 												DE_NULL,												// const VkShaderModule                          tessellationControlShaderModule
1221 												DE_NULL,												// const VkShaderModule                          tessellationEvalShaderModule
1222 												useGeometryShader ? *geometryShaderModule : DE_NULL,	// const VkShaderModule                          geometryShaderModule
1223 												*fragmentShaderModule,									// const VkShaderModule                          fragmentShaderModule
1224 												*renderPass,											// const VkRenderPass                            renderPass
1225 												viewports,												// const std::vector<VkViewport>&                viewports
1226 												scissors,												// const std::vector<VkRect2D>&                  scissors
1227 												VK_PRIMITIVE_TOPOLOGY_POINT_LIST,						// const VkPrimitiveTopology                     topology
1228 												0u,														// const deUint32                                subpass
1229 												0u,														// const deUint32                                patchControlPoints
1230 												&vertexInputStateParams,								// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1231 												DE_NULL,												// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1232 												DE_NULL,												// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1233 												DE_NULL,												// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1234 												&colorBlendStateParams);								// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1235 	}
1236 
1237 	// Create command pool
1238 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1239 
1240 	// Create command buffer
1241 	{
1242 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1243 
1244 		beginCommandBuffer(vk, *cmdBuffer);
1245 
1246 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1247 							  0, (const VkMemoryBarrier*)DE_NULL,
1248 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1249 							  (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1250 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1251 
1252 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1253 
1254 		if (m_extraResourcesLayout != 0)
1255 		{
1256 			DE_ASSERT(extraResources != 0);
1257 			const VkDescriptorSet	descriptorSets[]	= { *emptyDescriptorSet, extraResources };
1258 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1259 		}
1260 		else
1261 			DE_ASSERT(extraResources == 0);
1262 
1263 		const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1264 
1265 		std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1266 
1267 		std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1268 		for (size_t i = 0; i < numberOfVertexAttributes; i++)
1269 		{
1270 			buffers[i] = m_vertexBuffers[i].get()->get();
1271 		}
1272 
1273 		vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1274 		vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1275 
1276 		endRenderPass(vk, *cmdBuffer);
1277 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1278 							  0, (const VkMemoryBarrier*)DE_NULL,
1279 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1280 							  (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1281 
1282 		endCommandBuffer(vk, *cmdBuffer);
1283 	}
1284 
1285 	// Execute Draw
1286 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1287 
1288 	// Read back result and output
1289 	{
1290 		const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1291 		const VkBufferCreateInfo readImageBufferParams =
1292 		{
1293 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1294 			DE_NULL,									// const void*			pNext;
1295 			0u,											// VkBufferCreateFlags	flags;
1296 			imageSizeBytes,								// VkDeviceSize			size;
1297 			VK_BUFFER_USAGE_TRANSFER_DST_BIT,			// VkBufferUsageFlags	usage;
1298 			VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1299 			1u,											// deUint32				queueFamilyCount;
1300 			&queueFamilyIndex,							// const deUint32*		pQueueFamilyIndices;
1301 		};
1302 
1303 		// constants for image copy
1304 		Move<VkCommandPool>	copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1305 
1306 		const VkBufferImageCopy copyParams =
1307 		{
1308 			0u,											// VkDeviceSize			bufferOffset;
1309 			(deUint32)renderSize.x(),					// deUint32				bufferRowLength;
1310 			(deUint32)renderSize.y(),					// deUint32				bufferImageHeight;
1311 			{
1312 				VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspect		aspect;
1313 				0u,										// deUint32				mipLevel;
1314 				0u,										// deUint32				arraySlice;
1315 				1u,										// deUint32				arraySize;
1316 			},											// VkImageSubresource	imageSubresource;
1317 			{ 0u, 0u, 0u },								// VkOffset3D			imageOffset;
1318 			{ renderSize.x(), renderSize.y(), 1u }		// VkExtent3D			imageExtent;
1319 		};
1320 
1321 		// Read back pixels.
1322 		for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1323 		{
1324 			const Symbol&				output			= m_shaderSpec.outputs[outNdx];
1325 			const int					outSize			= output.varType.getScalarSize();
1326 			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
1327 			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
1328 			const int					outLocation		= de::lookup(m_outputLayout.locationMap, output.name);
1329 
1330 			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1331 			{
1332 				tcu::TextureLevel			tmpBuf;
1333 				const tcu::TextureFormat	format = getRenderbufferFormatForOutput(output.varType, false);
1334 				const tcu::TextureFormat	readFormat (tcu::TextureFormat::RGBA, format.type);
1335 				const Unique<VkBuffer>		readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1336 				const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1337 
1338 				VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1339 
1340 				// Copy image to buffer
1341 				{
1342 
1343 					Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1344 
1345 					beginCommandBuffer(vk, *copyCmdBuffer);
1346 					vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1347 					endCommandBuffer(vk, *copyCmdBuffer);
1348 
1349 					submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1350 				}
1351 
1352 				invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1353 
1354 				tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1355 
1356 				const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1357 				const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1358 
1359 				tcu::copy(tmpBuf.getAccess(), resultAccess);
1360 
1361 				if (isOutput16Bit(static_cast<size_t>(outNdx)))
1362 				{
1363 					deUint16*	dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1364 					if (outSize == 4 && outNumLocs == 1)
1365 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1366 					else
1367 					{
1368 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1369 						{
1370 							const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1371 							deUint16*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1372 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1373 						}
1374 					}
1375 				}
1376 				else
1377 				{
1378 					deUint32*	dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1379 					if (outSize == 4 && outNumLocs == 1)
1380 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1381 					else
1382 					{
1383 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1384 						{
1385 							const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1386 							deUint32*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1387 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1388 						}
1389 					}
1390 				}
1391 			}
1392 		}
1393 	}
1394 }
1395 
1396 // VertexShaderExecutor
1397 
1398 class VertexShaderExecutor : public FragmentOutExecutor
1399 {
1400 public:
1401 								VertexShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1402 	virtual						~VertexShaderExecutor	(void);
1403 
1404 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& dst);
1405 };
1406 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1407 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1408 	: FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1409 {
1410 }
1411 
~VertexShaderExecutor(void)1412 VertexShaderExecutor::~VertexShaderExecutor (void)
1413 {
1414 }
1415 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1416 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1417 {
1418 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1419 
1420 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1421 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1422 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1423 }
1424 
1425 // GeometryShaderExecutor
1426 
1427 class GeometryShaderExecutor : public FragmentOutExecutor
1428 {
1429 public:
1430 								GeometryShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1431 	virtual						~GeometryShaderExecutor	(void);
1432 
1433 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1434 
1435 };
1436 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1437 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1438 	: FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1439 {
1440 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1441 
1442 	if (!features.geometryShader)
1443 		TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1444 }
1445 
~GeometryShaderExecutor(void)1446 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1447 {
1448 }
1449 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1450 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1451 {
1452 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1453 
1454 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1455 
1456 	programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1457 	programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1458 
1459 	/* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1460 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1461 
1462 }
1463 
1464 // FragmentShaderExecutor
1465 
1466 class FragmentShaderExecutor : public FragmentOutExecutor
1467 {
1468 public:
1469 								FragmentShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1470 	virtual						~FragmentShaderExecutor (void);
1471 
1472 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1473 
1474 };
1475 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1476 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1477 	: FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1478 {
1479 }
1480 
~FragmentShaderExecutor(void)1481 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1482 {
1483 }
1484 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1485 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1486 {
1487 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1488 
1489 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1490 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1491 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1492 }
1493 
1494 // Shared utilities for compute and tess executors
1495 
getVecStd430ByteAlignment(glu::DataType type)1496 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1497 {
1498 	switch (type)
1499 	{
1500 		case glu::TYPE_FLOAT16:			return 2u;
1501 		case glu::TYPE_FLOAT16_VEC2:	return 4u;
1502 		case glu::TYPE_FLOAT16_VEC3:	return 8u;
1503 		case glu::TYPE_FLOAT16_VEC4:	return 8u;
1504 		default: break;
1505 	}
1506 
1507 	switch (glu::getDataTypeScalarSize(type))
1508 	{
1509 		case 1:		return 4u;
1510 		case 2:		return 8u;
1511 		case 3:		return 16u;
1512 		case 4:		return 16u;
1513 		default:
1514 			DE_ASSERT(false);
1515 			return 0u;
1516 	}
1517 }
1518 
1519 class BufferIoExecutor : public ShaderExecutor
1520 {
1521 public:
1522 							BufferIoExecutor	(Context& context, const ShaderSpec& shaderSpec);
1523 	virtual					~BufferIoExecutor	(void);
1524 
1525 protected:
1526 	enum
1527 	{
1528 		INPUT_BUFFER_BINDING	= 0,
1529 		OUTPUT_BUFFER_BINDING	= 1,
1530 	};
1531 
1532 	void					initBuffers			(int numValues);
getInputBuffer(void) const1533 	VkBuffer				getInputBuffer		(void) const		{ return *m_inputBuffer;					}
getOutputBuffer(void) const1534 	VkBuffer				getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
getInputStride(void) const1535 	deUint32				getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
getOutputStride(void) const1536 	deUint32				getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
1537 
1538 	void					uploadInputBuffer	(const void* const* inputPtrs, int numValues);
1539 	void					readOutputBuffer	(void* const* outputPtrs, int numValues);
1540 
1541 	static void				declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
1542 	static void				generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1543 
1544 protected:
1545 	Move<VkBuffer>			m_inputBuffer;
1546 	Move<VkBuffer>			m_outputBuffer;
1547 
1548 private:
1549 	struct VarLayout
1550 	{
1551 		deUint32		offset;
1552 		deUint32		stride;
1553 		deUint32		matrixStride;
1554 
VarLayoutvkt::shaderexecutor::__anon9d58b36f0111::BufferIoExecutor::VarLayout1555 		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1556 	};
1557 
1558 	static void				computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1559 	static deUint32			getLayoutStride		(const vector<VarLayout>& layout);
1560 
1561 	static void				copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1562 	static void				copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1563 
1564 	de::MovePtr<Allocation>	m_inputAlloc;
1565 	de::MovePtr<Allocation>	m_outputAlloc;
1566 
1567 	vector<VarLayout>		m_inputLayout;
1568 	vector<VarLayout>		m_outputLayout;
1569 };
1570 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1571 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1572 	: ShaderExecutor(context, shaderSpec)
1573 {
1574 	computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1575 	computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1576 }
1577 
~BufferIoExecutor(void)1578 BufferIoExecutor::~BufferIoExecutor (void)
1579 {
1580 }
1581 
getLayoutStride(const vector<VarLayout> & layout)1582 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1583 {
1584 	return layout.empty() ? 0 : layout[0].stride;
1585 }
1586 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1587 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1588 {
1589 	deUint32	maxAlignment	= 0;
1590 	deUint32	curOffset		= 0;
1591 
1592 	DE_ASSERT(layout != DE_NULL);
1593 	DE_ASSERT(layout->empty());
1594 	layout->resize(symbols.size());
1595 
1596 	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1597 	{
1598 		const Symbol&		symbol		= symbols[varNdx];
1599 		const glu::DataType	basicType	= symbol.varType.getBasicType();
1600 		VarLayout&			layoutEntry	= (*layout)[varNdx];
1601 
1602 		if (glu::isDataTypeScalarOrVector(basicType))
1603 		{
1604 			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
1605 			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1606 
1607 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
1608 			maxAlignment	= de::max(maxAlignment, alignment);
1609 
1610 			layoutEntry.offset			= curOffset;
1611 			layoutEntry.matrixStride	= 0;
1612 
1613 			curOffset += size;
1614 		}
1615 		else if (glu::isDataTypeMatrix(basicType))
1616 		{
1617 			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
1618 			const glu::DataType		vecType			= glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
1619 			const deUint32			vecAlignment	= isDataTypeFloat16OrVec(basicType) ? getVecStd430ByteAlignment(vecType)/2 : getVecStd430ByteAlignment(vecType);
1620 
1621 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1622 			maxAlignment	= de::max(maxAlignment, vecAlignment);
1623 
1624 			layoutEntry.offset			= curOffset;
1625 			layoutEntry.matrixStride	= vecAlignment;
1626 
1627 			curOffset += vecAlignment*numVecs;
1628 		}
1629 		else
1630 			DE_ASSERT(false);
1631 	}
1632 
1633 	{
1634 		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
1635 
1636 		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1637 			varIter->stride = totalSize;
1638 	}
1639 }
1640 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1641 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1642 {
1643 	// Input struct
1644 	if (!spec.inputs.empty())
1645 	{
1646 		glu::StructType inputStruct("Inputs");
1647 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1648 			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1649 		src << glu::declare(&inputStruct) << ";\n";
1650 	}
1651 
1652 	// Output struct
1653 	{
1654 		glu::StructType outputStruct("Outputs");
1655 		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1656 			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1657 		src << glu::declare(&outputStruct) << ";\n";
1658 	}
1659 
1660 	src << "\n";
1661 
1662 	if (!spec.inputs.empty())
1663 	{
1664 		src	<< "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1665 			<< "{\n"
1666 			<< "	Inputs inputs[];\n"
1667 			<< "};\n";
1668 	}
1669 
1670 	src	<< "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1671 		<< "{\n"
1672 		<< "	Outputs outputs[];\n"
1673 		<< "};\n"
1674 		<< "\n";
1675 }
1676 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1677 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1678 {
1679 	std::string	tname;
1680 	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1681 	{
1682 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1683 		if (f16BitTest)
1684 		{
1685 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1686 		}
1687 		else
1688 		{
1689 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1690 		}
1691 		src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1692 	}
1693 
1694 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1695 	{
1696 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1697 		if (f16BitTest)
1698 		{
1699 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1700 		}
1701 		else
1702 		{
1703 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1704 		}
1705 		src << "\t" << tname << " " << symIter->name << ";\n";
1706 		if (f16BitTest)
1707 		{
1708 			const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1709 			src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1710 		}
1711 	}
1712 
1713 	src << "\n";
1714 
1715 	{
1716 		std::istringstream	opSrc	(spec.source);
1717 		std::string			line;
1718 
1719 		while (std::getline(opSrc, line))
1720 			src << "\t" << line << "\n";
1721 	}
1722 
1723 	if (spec.packFloat16Bit)
1724 		packFloat16Bit (src, spec.outputs);
1725 
1726 	src << "\n";
1727 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1728 	{
1729 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1730 		if(f16BitTest)
1731 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1732 		else
1733 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1734 	}
1735 }
1736 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1737 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1738 {
1739 	if (varType.isBasicType())
1740 	{
1741 		const glu::DataType		basicType		= varType.getBasicType();
1742 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1743 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1744 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1745 		const int				numComps		= scalarSize / numVecs;
1746 
1747 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1748 		{
1749 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1750 			{
1751 				const int		size			= (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1752 				const int		srcOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1753 				const int		dstOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1754 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1755 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1756 
1757 				deMemcpy(dstPtr, srcPtr, size * numComps);
1758 			}
1759 		}
1760 	}
1761 	else
1762 		throw tcu::InternalError("Unsupported type");
1763 }
1764 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1765 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1766 {
1767 	if (varType.isBasicType())
1768 	{
1769 		const glu::DataType		basicType		= varType.getBasicType();
1770 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1771 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1772 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1773 		const int				numComps		= scalarSize / numVecs;
1774 
1775 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1776 		{
1777 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1778 			{
1779 				const int		size			= (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1780 				const int		srcOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1781 				const int		dstOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1782 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1783 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1784 
1785 				deMemcpy(dstPtr, srcPtr, size * numComps);
1786 			}
1787 		}
1788 	}
1789 	else
1790 		throw tcu::InternalError("Unsupported type");
1791 }
1792 
uploadInputBuffer(const void * const * inputPtrs,int numValues)1793 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1794 {
1795 	const VkDevice			vkDevice			= m_context.getDevice();
1796 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1797 
1798 	const deUint32			inputStride			= getLayoutStride(m_inputLayout);
1799 	const int				inputBufferSize		= inputStride * numValues;
1800 
1801 	if (inputBufferSize == 0)
1802 		return; // No inputs
1803 
1804 	DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1805 	for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1806 	{
1807 		const glu::VarType&		varType		= m_shaderSpec.inputs[inputNdx].varType;
1808 		const VarLayout&		layout		= m_inputLayout[inputNdx];
1809 
1810 		copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr());
1811 	}
1812 
1813 	flushAlloc(vk, vkDevice, *m_inputAlloc);
1814 }
1815 
readOutputBuffer(void * const * outputPtrs,int numValues)1816 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1817 {
1818 	const VkDevice			vkDevice			= m_context.getDevice();
1819 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1820 
1821 	DE_ASSERT(numValues > 0); // At least some outputs are required.
1822 
1823 	invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1824 
1825 	DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1826 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1827 	{
1828 		const glu::VarType&		varType		= m_shaderSpec.outputs[outputNdx].varType;
1829 		const VarLayout&		layout		= m_outputLayout[outputNdx];
1830 
1831 		copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1832 	}
1833 }
1834 
initBuffers(int numValues)1835 void BufferIoExecutor::initBuffers (int numValues)
1836 {
1837 	const deUint32				inputStride			= getLayoutStride(m_inputLayout);
1838 	const deUint32				outputStride		= getLayoutStride(m_outputLayout);
1839 	// Avoid creating zero-sized buffer/memory
1840 	const size_t				inputBufferSize		= de::max(numValues * inputStride, 1u);
1841 	const size_t				outputBufferSize	= numValues * outputStride;
1842 
1843 	// Upload data to buffer
1844 	const VkDevice				vkDevice			= m_context.getDevice();
1845 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
1846 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1847 	Allocator&					memAlloc			= m_context.getDefaultAllocator();
1848 
1849 	const VkBufferCreateInfo inputBufferParams =
1850 	{
1851 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1852 		DE_NULL,									// const void*			pNext;
1853 		0u,											// VkBufferCreateFlags	flags;
1854 		inputBufferSize,							// VkDeviceSize			size;
1855 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1856 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1857 		1u,											// deUint32				queueFamilyCount;
1858 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1859 	};
1860 
1861 	m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1862 	m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1863 
1864 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1865 
1866 	const VkBufferCreateInfo outputBufferParams =
1867 	{
1868 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1869 		DE_NULL,									// const void*			pNext;
1870 		0u,											// VkBufferCreateFlags	flags;
1871 		outputBufferSize,							// VkDeviceSize			size;
1872 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1873 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1874 		1u,											// deUint32				queueFamilyCount;
1875 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1876 	};
1877 
1878 	m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1879 	m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1880 
1881 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1882 }
1883 
1884 // ComputeShaderExecutor
1885 
1886 class ComputeShaderExecutor : public BufferIoExecutor
1887 {
1888 public:
1889 						ComputeShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1890 	virtual				~ComputeShaderExecutor	(void);
1891 
1892 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1893 
1894 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1895 
1896 protected:
1897 	static std::string	generateComputeShader	(const ShaderSpec& spec);
1898 
1899 private:
1900 	const VkDescriptorSetLayout					m_extraResourcesLayout;
1901 };
1902 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1903 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1904 	: BufferIoExecutor			(context, shaderSpec)
1905 	, m_extraResourcesLayout	(extraResourcesLayout)
1906 {
1907 }
1908 
~ComputeShaderExecutor(void)1909 ComputeShaderExecutor::~ComputeShaderExecutor	(void)
1910 {
1911 }
1912 
getTypeSpirv(const glu::DataType type)1913 std::string getTypeSpirv(const glu::DataType type)
1914 {
1915 	switch(type)
1916 	{
1917 	case glu::TYPE_FLOAT16:
1918 		return "%f16";
1919 	case glu::TYPE_FLOAT16_VEC2:
1920 		return "%v2f16";
1921 	case glu::TYPE_FLOAT16_VEC3:
1922 		return "%v3f16";
1923 	case glu::TYPE_FLOAT16_VEC4:
1924 		return "%v4f16";
1925 	case glu::TYPE_FLOAT:
1926 		return "%f32";
1927 	case glu::TYPE_FLOAT_VEC2:
1928 		return "%v2f32";
1929 	case glu::TYPE_FLOAT_VEC3:
1930 		return "%v3f32";
1931 	case glu::TYPE_FLOAT_VEC4:
1932 		return "%v4f32";
1933 	case glu::TYPE_INT:
1934 		return "%i32";
1935 	case glu::TYPE_INT_VEC2:
1936 		return "%v2i32";
1937 	case glu::TYPE_INT_VEC3:
1938 		return "%v3i32";
1939 	case glu::TYPE_INT_VEC4:
1940 		return "%v4i32";
1941 	default:
1942 		DE_ASSERT(0);
1943 		return "";
1944 		break;
1945 	}
1946 }
1947 
moveBitOperation(std::string variableName,const int operationNdx)1948 std::string moveBitOperation (std::string variableName, const int operationNdx)
1949 {
1950 	std::ostringstream	src;
1951 	src << "\n"
1952 	<< "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
1953 	<< "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
1954 	<< "OpStore " << variableName << " %move1_" << operationNdx << "\n";
1955 	return src.str();
1956 }
1957 
sclarComparison(const std::string opeartion,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)1958 std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
1959 {
1960 	std::ostringstream	src;
1961 	std::string			boolType;
1962 
1963 	switch (type)
1964 	{
1965 	case glu::TYPE_FLOAT16:
1966 	case glu::TYPE_FLOAT:
1967 		src << "\n"
1968 			<< "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
1969 			<< "OpSelectionMerge %IF_" << operationNdx << " None\n"
1970 			<< "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
1971 			<< "%label_IF_" << operationNdx << " = OpLabel\n"
1972 			<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
1973 			<< "%out_val_" << operationNdx << " = OpLoad %i32 %out\n"
1974 			<< "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
1975 			<< "OpStore %out %add_if_" << operationNdx << "\n"
1976 			<< "OpBranch %IF_" << operationNdx << "\n"
1977 			<< "%IF_" << operationNdx << " = OpLabel\n";
1978 		return src.str();
1979 	case glu::TYPE_FLOAT16_VEC2:
1980 	case glu::TYPE_FLOAT_VEC2:
1981 		boolType = "%v2bool";
1982 		break;
1983 	case glu::TYPE_FLOAT16_VEC3:
1984 	case glu::TYPE_FLOAT_VEC3:
1985 		boolType = "%v3bool";
1986 		break;
1987 	case glu::TYPE_FLOAT16_VEC4:
1988 	case glu::TYPE_FLOAT_VEC4:
1989 		boolType = "%v4bool";
1990 		break;
1991 	default:
1992 		DE_ASSERT(0);
1993 		return "";
1994 		break;
1995 	}
1996 
1997 	src << "\n"
1998 		<< "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
1999 		<< "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2000 		<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2001 
2002 	src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2003 	for(int ndx = 0; ndx < scalarSize; ++ndx)
2004 		src << " %operation_val_" << operationNdx;
2005 	src << "\n";
2006 
2007 	src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2008 		<< "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out\n"
2009 
2010 		<< "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2011 		<< "OpStore %out %add_if_" << operationNdx << "\n";
2012 
2013 	return src.str();
2014 }
2015 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool isMediump)2016 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
2017 {
2018 	const int			operationAmount	= 10;
2019 	int					moveBitNdx		= 0;
2020 	const std::string	inputType1		= getTypeSpirv(spec.inputs[0].varType.getBasicType());
2021 	const std::string	inputType2		= getTypeSpirv(spec.inputs[1].varType.getBasicType());
2022 	const std::string	outputType		= getTypeSpirv(spec.outputs[0].varType.getBasicType());
2023 	const std::string	packType		= spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2024 
2025 	std::string	opeartions[operationAmount]	=
2026 	{
2027 		"OpFOrdEqual",
2028 		"OpFOrdGreaterThan",
2029 		"OpFOrdLessThan",
2030 		"OpFOrdGreaterThanEqual",
2031 		"OpFOrdLessThanEqual",
2032 		"OpFUnordEqual",
2033 		"OpFUnordGreaterThan",
2034 		"OpFUnordLessThan",
2035 		"OpFUnordGreaterThanEqual",
2036 		"OpFUnordLessThanEqual"
2037 	};
2038 
2039 	std::ostringstream	src;
2040 	src << "; SPIR-V\n"
2041 		"; Version: 1.0\n"
2042 		"; Generator: Khronos Glslang Reference Front End; 4\n"
2043 		"; Bound: 114\n"
2044 		"; Schema: 0\n"
2045 		"OpCapability Shader\n";
2046 
2047 	if (spec.packFloat16Bit || are16Bit)
2048 		src << "OpCapability Float16\n";
2049 
2050 	if (are16Bit)
2051 		src << "OpCapability StorageBuffer16BitAccess\n"
2052 			"OpCapability UniformAndStorageBuffer16BitAccess\n";
2053 
2054 	if (are16Bit)
2055 		src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2056 
2057 	src << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2058 		"OpMemoryModel Logical GLSL450\n"
2059 		"OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2060 		"OpExecutionMode %BP_main LocalSize 1 1 1\n"
2061 		"OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2062 		"OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2063 
2064 	//input offset
2065 	{
2066 		int offset = 0;
2067 		int ndx = 0;
2068 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2069 		{
2070 			src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2071 			++ndx;
2072 			offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2073 		}
2074 		src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
2075 	}
2076 
2077 	src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2078 		"OpDecorate %ssboIN BufferBlock\n"
2079 		"OpDecorate %ssbo_src DescriptorSet 0\n"
2080 		"OpDecorate %ssbo_src Binding 0\n"
2081 		"\n";
2082 
2083 	if (isMediump)
2084 	{
2085 		src << "OpMemberDecorate %SSB0_IN 1 RelaxedPrecision\n"
2086 			"OpDecorate %in0 RelaxedPrecision\n"
2087 			"OpMemberDecorate %SSB0_IN 0 RelaxedPrecision\n"
2088 			"OpDecorate %src_val_0_0 RelaxedPrecision\n"
2089 			"OpDecorate %src_val_0_0 RelaxedPrecision\n"
2090 			"OpDecorate %in1 RelaxedPrecision\n"
2091 			"OpDecorate %src_val_0_1 RelaxedPrecision\n"
2092 			"OpDecorate %src_val_0_1 RelaxedPrecision\n"
2093 			"OpDecorate %in0_val RelaxedPrecision\n"
2094 			"OpDecorate %in1_val RelaxedPrecision\n"
2095 			"OpDecorate %in0_val RelaxedPrecision\n"
2096 			"OpDecorate %in1_val RelaxedPrecision\n"
2097 			"OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
2098 	}
2099 
2100 	//output offset
2101 	{
2102 		int offset = 0;
2103 		int ndx = 0;
2104 		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
2105 		{
2106 			src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2107 			++ndx;
2108 			offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2109 		}
2110 		src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
2111 	}
2112 
2113 	src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2114 		"OpDecorate %ssboOUT BufferBlock\n"
2115 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
2116 		"OpDecorate %ssbo_dst Binding 1\n"
2117 		"\n"
2118 		"%void  = OpTypeVoid\n"
2119 		"%bool  = OpTypeBool\n"
2120 		"%v2bool = OpTypeVector %bool 2\n"
2121 		"%v3bool = OpTypeVector %bool 3\n"
2122 		"%v4bool = OpTypeVector %bool 4\n"
2123 		"%u32   = OpTypeInt 32 0\n";
2124 
2125 	if (!are16Bit) //f32 is not needed when shader operates only on f16
2126 		src << "%f32   = OpTypeFloat 32\n"
2127 			"%v2f32 = OpTypeVector %f32 2\n"
2128 			"%v3f32 = OpTypeVector %f32 3\n"
2129 			"%v4f32 = OpTypeVector %f32 4\n";
2130 
2131 	if (spec.packFloat16Bit || are16Bit)
2132 		src << "%f16   = OpTypeFloat 16\n"
2133 			"%v2f16 = OpTypeVector %f16 2\n"
2134 			"%v3f16 = OpTypeVector %f16 3\n"
2135 			"%v4f16 = OpTypeVector %f16 4\n";
2136 
2137 	src << "%i32   = OpTypeInt 32 1\n"
2138 		"%v2i32 = OpTypeVector %i32 2\n"
2139 		"%v3i32 = OpTypeVector %i32 3\n"
2140 		"%v4i32 = OpTypeVector %i32 4\n"
2141 		"%v3u32 = OpTypeVector %u32 3\n"
2142 		"\n"
2143 		"%ip_u32   = OpTypePointer Input %u32\n"
2144 		"%ip_v3u32 = OpTypePointer Input %v3u32\n"
2145 		"%up_float   = OpTypePointer Uniform " << inputType1 << "\n"
2146 		"\n"
2147 		"%fun     = OpTypeFunction %void\n"
2148 		"%fp_u32  = OpTypePointer Function %u32\n"
2149 		"%fp_i32  = OpTypePointer Function " << outputType << "\n"
2150 		"%fp_f32  = OpTypePointer Function " << inputType1 << "\n"
2151 		"%fp_operation =  OpTypePointer Function %i32\n";
2152 
2153 	if (spec.packFloat16Bit)
2154 		src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2155 
2156 	src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2157 		"%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2158 		"%up_i32 = OpTypePointer Uniform " << outputType << "\n"
2159 		"\n"
2160 		"%c_u32_0 = OpConstant %u32 0\n"
2161 		"%c_u32_1 = OpConstant %u32 1\n"
2162 		"%c_u32_2 = OpConstant %u32 2\n"
2163 		"%c_i32_0 = OpConstant %i32 0\n"
2164 		"%c_i32_1 = OpConstant %i32 1\n"
2165 		"%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2166 		"%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2167 		"%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2168 		"%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2169 		"%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2170 		"%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2171 		"\n"
2172 		"%SSB0_IN    = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
2173 		"%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2174 		"%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2175 		"%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2176 		"%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2177 		"\n"
2178 		"%SSB0_OUT    = OpTypeStruct " << outputType << "\n"
2179 		"%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2180 		"%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2181 		"%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2182 		"%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2183 		"\n"
2184 		"%BP_main = OpFunction %void None %fun\n"
2185 		"%BP_label = OpLabel\n"
2186 		"%invocationNdx = OpVariable  %fp_u32 Function\n";
2187 
2188 	if (spec.packFloat16Bit)
2189 		src << "%in0 = OpVariable %fp_f16 Function\n"
2190 			"%in1 = OpVariable %fp_f16 Function\n";
2191 	else
2192 		src << "%in0 = OpVariable %fp_f32 Function\n"
2193 			"%in1 = OpVariable %fp_f32 Function\n";
2194 
2195 	src << "%operation = OpVariable %fp_operation Function\n"
2196 		"%out = OpVariable %fp_i32 Function\n"
2197 		"%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2198 		"%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2199 		"%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2200 		"%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2201 		"%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2202 		"%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2203 		"%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2204 		"%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2205 		"%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2206 		"%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2207 		"\n"
2208 		"%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2209 		"%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2210 		"%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2211 		"%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2212 		"%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2213 		"OpStore %invocationNdx %add_2\n"
2214 		"%invocationNdx_val = OpLoad %u32 %invocationNdx\n"
2215 		"\n"
2216 		"%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
2217 		"%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
2218 
2219 	if(spec.packFloat16Bit)
2220 		src << "%val_f16_0_0 = OpFConvert " << packType <<" %src_val_0_0\n"
2221 			"OpStore %in0 %val_f16_0_0\n";
2222 	else
2223 		src << "OpStore %in0 %src_val_0_0\n";
2224 
2225 	src << "\n"
2226 		"%src_ptr_0_1 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_1\n"
2227 		"%src_val_0_1 = OpLoad " << inputType2 << " %src_ptr_0_1\n";
2228 
2229 	if (spec.packFloat16Bit)
2230 		src << "%val_f16_0_1 = OpFConvert " << packType << " %src_val_0_1\n"
2231 			"OpStore %in1 %val_f16_0_1\n";
2232 	else
2233 		src << "OpStore %in1 %src_val_0_1\n";
2234 
2235 	src << "\n"
2236 		"OpStore %operation %c_i32_1\n"
2237 		"OpStore %out %c_" << &outputType[1] << "_0\n"
2238 		"\n";
2239 
2240 	if (spec.packFloat16Bit)
2241 		src << "%in0_val = OpLoad " << packType << " %in0\n"
2242 			"%in1_val = OpLoad " << packType << " %in1\n";
2243 	else
2244 		src << "%in0_val = OpLoad " << inputType1 << " %in0\n"
2245 			"%in1_val = OpLoad " << inputType2 << " %in1\n";
2246 
2247 	src << "\n";
2248 	for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
2249 	{
2250 		src << sclarComparison	(opeartions[operationNdx], operationNdx,
2251 								spec.inputs[0].varType.getBasicType(),
2252 								outputType,
2253 								spec.outputs[0].varType.getScalarSize());
2254 		src << moveBitOperation("%operation", moveBitNdx);
2255 		++moveBitNdx;
2256 	}
2257 
2258 	src << "\n"
2259 		"%out_val_final = OpLoad " << outputType << " %out\n"
2260 		"%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
2261 		"OpStore %ssbo_dst_ptr %out_val_final\n"
2262 		"\n"
2263 		"OpReturn\n"
2264 		"OpFunctionEnd\n";
2265 	return src.str();
2266 }
2267 
2268 
generateComputeShader(const ShaderSpec & spec)2269 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2270 {
2271 	if(spec.spirVShader)
2272 	{
2273 		bool	are16Bit	= false;
2274 		bool	isMediump	= false;
2275 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2276 		{
2277 			if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2278 				are16Bit = true;
2279 
2280 			if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2281 				isMediump = true;
2282 
2283 			if(isMediump && are16Bit)
2284 				break;
2285 		}
2286 
2287 		return generateSpirv(spec, are16Bit, isMediump);
2288 	}
2289 	else
2290 	{
2291 		std::ostringstream src;
2292 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2293 
2294 		if (!spec.globalDeclarations.empty())
2295 			src << spec.globalDeclarations << "\n";
2296 
2297 		src << "layout(local_size_x = 1) in;\n"
2298 			<< "\n";
2299 
2300 		declareBufferBlocks(src, spec);
2301 
2302 		src << "void main (void)\n"
2303 			<< "{\n"
2304 			<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2305 			<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2306 
2307 		generateExecBufferIo(src, spec, "invocationNdx");
2308 
2309 		src << "}\n";
2310 
2311 		return src.str();
2312 	}
2313 }
2314 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2315 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2316 {
2317 	if(shaderSpec.spirVShader)
2318 		programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2319 	else
2320 		programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2321 }
2322 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2323 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2324 {
2325 	const VkDevice					vkDevice				= m_context.getDevice();
2326 	const DeviceInterface&			vk						= m_context.getDeviceInterface();
2327 	const VkQueue					queue					= m_context.getUniversalQueue();
2328 	const deUint32					queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
2329 
2330 	DescriptorPoolBuilder			descriptorPoolBuilder;
2331 	DescriptorSetLayoutBuilder		descriptorSetLayoutBuilder;
2332 
2333 	Move<VkShaderModule>			computeShaderModule;
2334 	Move<VkPipeline>				computePipeline;
2335 	Move<VkPipelineLayout>			pipelineLayout;
2336 	Move<VkCommandPool>				cmdPool;
2337 	Move<VkDescriptorPool>			descriptorPool;
2338 	Move<VkDescriptorSetLayout>		descriptorSetLayout;
2339 	Move<VkDescriptorSet>			descriptorSet;
2340 	const deUint32					numDescriptorSets		= (m_extraResourcesLayout != 0) ? 2u : 1u;
2341 
2342 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2343 
2344 	initBuffers(numValues);
2345 
2346 	// Setup input buffer & copy data
2347 	uploadInputBuffer(inputs, numValues);
2348 
2349 	// Create command pool
2350 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2351 
2352 	// Create command buffer
2353 
2354 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2355 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2356 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2357 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2358 
2359 	descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2360 	descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2361 
2362 	const VkDescriptorSetAllocateInfo allocInfo =
2363 	{
2364 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2365 		DE_NULL,
2366 		*descriptorPool,
2367 		1u,
2368 		&*descriptorSetLayout
2369 	};
2370 
2371 	descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2372 
2373 	// Create pipeline layout
2374 	{
2375 		const VkDescriptorSetLayout			descriptorSetLayouts[]	=
2376 		{
2377 			*descriptorSetLayout,
2378 			m_extraResourcesLayout
2379 		};
2380 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
2381 		{
2382 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
2383 			DE_NULL,											// const void*					pNext;
2384 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
2385 			numDescriptorSets,									// deUint32						CdescriptorSetCount;
2386 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
2387 			0u,													// deUint32						pushConstantRangeCount;
2388 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
2389 		};
2390 
2391 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2392 	}
2393 
2394 	// Create shaders
2395 	{
2396 		computeShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2397 	}
2398 
2399 	// create pipeline
2400 	{
2401 		const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2402 		{
2403 			{
2404 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
2405 				DE_NULL,													// const void*							pNext;
2406 				(VkPipelineShaderStageCreateFlags)0u,						// VkPipelineShaderStageCreateFlags		flags;
2407 				VK_SHADER_STAGE_COMPUTE_BIT,								// VkShaderStageFlagsBit				stage;
2408 				*computeShaderModule,										// VkShaderModule						shader;
2409 				"main",														// const char*							pName;
2410 				DE_NULL														// const VkSpecializationInfo*			pSpecializationInfo;
2411 			}
2412 		};
2413 
2414 		const VkComputePipelineCreateInfo computePipelineParams =
2415 		{
2416 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType									sType;
2417 			DE_NULL,											// const void*										pNext;
2418 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
2419 			*shaderStageParams,									// VkPipelineShaderStageCreateInfo					cs;
2420 			*pipelineLayout,									// VkPipelineLayout									layout;
2421 			0u,													// VkPipeline										basePipelineHandle;
2422 			0u,													// int32_t											basePipelineIndex;
2423 		};
2424 
2425 		computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2426 	}
2427 
2428 	const int			maxValuesPerInvocation	= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2429 	int					curOffset				= 0;
2430 	const deUint32		inputStride				= getInputStride();
2431 	const deUint32		outputStride			= getOutputStride();
2432 
2433 	while (curOffset < numValues)
2434 	{
2435 		Move<VkCommandBuffer>	cmdBuffer;
2436 		const int				numToExec	= de::min(maxValuesPerInvocation, numValues-curOffset);
2437 
2438 		// Update descriptors
2439 		{
2440 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2441 
2442 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2443 			{
2444 				*m_outputBuffer,				// VkBuffer			buffer;
2445 				curOffset * outputStride,		// VkDeviceSize		offset;
2446 				numToExec * outputStride		// VkDeviceSize		range;
2447 			};
2448 
2449 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2450 
2451 			if (inputStride)
2452 			{
2453 				const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2454 				{
2455 					*m_inputBuffer,					// VkBuffer			buffer;
2456 					curOffset * inputStride,		// VkDeviceSize		offset;
2457 					numToExec * inputStride			// VkDeviceSize		range;
2458 				};
2459 
2460 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2461 			}
2462 
2463 			descriptorSetUpdateBuilder.update(vk, vkDevice);
2464 		}
2465 
2466 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2467 		beginCommandBuffer(vk, *cmdBuffer);
2468 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2469 
2470 		{
2471 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
2472 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2473 		}
2474 
2475 		vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2476 
2477 		endCommandBuffer(vk, *cmdBuffer);
2478 
2479 		curOffset += numToExec;
2480 
2481 		// Execute
2482 		submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2483 	}
2484 
2485 	// Read back data
2486 	readOutputBuffer(outputs, numValues);
2487 }
2488 
2489 // Tessellation utils
2490 
generateVertexShaderForTess(void)2491 static std::string generateVertexShaderForTess (void)
2492 {
2493 	std::ostringstream	src;
2494 	src << "#version 450\n"
2495 		<< "void main (void)\n{\n"
2496 		<< "	gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2497 		<< "}\n";
2498 
2499 	return src.str();
2500 }
2501 
2502 class TessellationExecutor : public BufferIoExecutor
2503 {
2504 public:
2505 					TessellationExecutor		(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2506 	virtual			~TessellationExecutor		(void);
2507 
2508 	void			renderTess					(deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2509 
2510 private:
2511 	const VkDescriptorSetLayout					m_extraResourcesLayout;
2512 };
2513 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2514 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2515 	: BufferIoExecutor			(context, shaderSpec)
2516 	, m_extraResourcesLayout	(extraResourcesLayout)
2517 {
2518 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2519 
2520 	if (!features.tessellationShader)
2521 		TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2522 }
2523 
~TessellationExecutor(void)2524 TessellationExecutor::~TessellationExecutor (void)
2525 {
2526 }
2527 
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2528 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2529 {
2530 	const size_t						inputBufferSize				= numValues * getInputStride();
2531 	const VkDevice						vkDevice					= m_context.getDevice();
2532 	const DeviceInterface&				vk							= m_context.getDeviceInterface();
2533 	const VkQueue						queue						= m_context.getUniversalQueue();
2534 	const deUint32						queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2535 	Allocator&							memAlloc					= m_context.getDefaultAllocator();
2536 
2537 	const tcu::UVec2					renderSize					(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2538 
2539 	Move<VkImage>						colorImage;
2540 	de::MovePtr<Allocation>				colorImageAlloc;
2541 	VkFormat							colorFormat					= VK_FORMAT_R8G8B8A8_UNORM;
2542 	Move<VkImageView>					colorImageView;
2543 
2544 	Move<VkRenderPass>					renderPass;
2545 	Move<VkFramebuffer>					framebuffer;
2546 	Move<VkPipelineLayout>				pipelineLayout;
2547 	Move<VkPipeline>					graphicsPipeline;
2548 
2549 	Move<VkShaderModule>				vertexShaderModule;
2550 	Move<VkShaderModule>				tessControlShaderModule;
2551 	Move<VkShaderModule>				tessEvalShaderModule;
2552 	Move<VkShaderModule>				fragmentShaderModule;
2553 
2554 	Move<VkCommandPool>					cmdPool;
2555 	Move<VkCommandBuffer>				cmdBuffer;
2556 
2557 	Move<VkDescriptorPool>				descriptorPool;
2558 	Move<VkDescriptorSetLayout>			descriptorSetLayout;
2559 	Move<VkDescriptorSet>				descriptorSet;
2560 	const deUint32						numDescriptorSets			= (m_extraResourcesLayout != 0) ? 2u : 1u;
2561 
2562 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2563 
2564 	// Create color image
2565 	{
2566 		const VkImageCreateInfo colorImageParams =
2567 		{
2568 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType			sType;
2569 			DE_NULL,																	// const void*				pNext;
2570 			0u,																			// VkImageCreateFlags		flags;
2571 			VK_IMAGE_TYPE_2D,															// VkImageType				imageType;
2572 			colorFormat,																// VkFormat					format;
2573 			{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D				extent;
2574 			1u,																			// deUint32					mipLevels;
2575 			1u,																			// deUint32					arraySize;
2576 			VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits	samples;
2577 			VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling			tiling;
2578 			VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags		usage;
2579 			VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode			sharingMode;
2580 			1u,																			// deUint32					queueFamilyCount;
2581 			&queueFamilyIndex,															// const deUint32*			pQueueFamilyIndices;
2582 			VK_IMAGE_LAYOUT_UNDEFINED													// VkImageLayout			initialLayout;
2583 		};
2584 
2585 		colorImage = createImage(vk, vkDevice, &colorImageParams);
2586 
2587 		// Allocate and bind color image memory
2588 		colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2589 		VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2590 	}
2591 
2592 	// Create color attachment view
2593 	{
2594 		const VkImageViewCreateInfo colorImageViewParams =
2595 		{
2596 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
2597 			DE_NULL,											// const void*				pNext;
2598 			0u,													// VkImageViewCreateFlags	flags;
2599 			*colorImage,										// VkImage					image;
2600 			VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
2601 			colorFormat,										// VkFormat					format;
2602 			{
2603 				VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
2604 				VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
2605 				VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
2606 				VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
2607 			},													// VkComponentsMapping		components;
2608 			{
2609 				VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
2610 				0u,												// deUint32					baseMipLevel;
2611 				1u,												// deUint32					mipLevels;
2612 				0u,												// deUint32					baseArraylayer;
2613 				1u												// deUint32					layerCount;
2614 			}													// VkImageSubresourceRange	subresourceRange;
2615 		};
2616 
2617 		colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2618 	}
2619 
2620 	// Create render pass
2621 	{
2622 		const VkAttachmentDescription colorAttachmentDescription =
2623 		{
2624 			0u,													// VkAttachmentDescriptorFlags	flags;
2625 			colorFormat,										// VkFormat						format;
2626 			VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits		samples;
2627 			VK_ATTACHMENT_LOAD_OP_CLEAR,						// VkAttachmentLoadOp			loadOp;
2628 			VK_ATTACHMENT_STORE_OP_STORE,						// VkAttachmentStoreOp			storeOp;
2629 			VK_ATTACHMENT_LOAD_OP_DONT_CARE,					// VkAttachmentLoadOp			stencilLoadOp;
2630 			VK_ATTACHMENT_STORE_OP_DONT_CARE,					// VkAttachmentStoreOp			stencilStoreOp;
2631 			VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout				initialLayout;
2632 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout				finalLayout
2633 		};
2634 
2635 		const VkAttachmentDescription attachments[1] =
2636 		{
2637 			colorAttachmentDescription
2638 		};
2639 
2640 		const VkAttachmentReference colorAttachmentReference =
2641 		{
2642 			0u,													// deUint32			attachment;
2643 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout	layout;
2644 		};
2645 
2646 		const VkSubpassDescription subpassDescription =
2647 		{
2648 			0u,													// VkSubpassDescriptionFlags	flags;
2649 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
2650 			0u,													// deUint32						inputCount;
2651 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
2652 			1u,													// deUint32						colorCount;
2653 			&colorAttachmentReference,							// const VkAttachmentReference*	pColorAttachments;
2654 			DE_NULL,											// const VkAttachmentReference*	pResolveAttachments;
2655 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
2656 			0u,													// deUint32						preserveCount;
2657 			DE_NULL												// const VkAttachmentReference* pPreserveAttachments;
2658 		};
2659 
2660 		const VkRenderPassCreateInfo renderPassParams =
2661 		{
2662 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
2663 			DE_NULL,											// const void*						pNext;
2664 			0u,													// VkRenderPassCreateFlags			flags;
2665 			1u,													// deUint32							attachmentCount;
2666 			attachments,										// const VkAttachmentDescription*	pAttachments;
2667 			1u,													// deUint32							subpassCount;
2668 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
2669 			0u,													// deUint32							dependencyCount;
2670 			DE_NULL												// const VkSubpassDependency*		pDependencies;
2671 		};
2672 
2673 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
2674 	}
2675 
2676 	// Create framebuffer
2677 	{
2678 		const VkFramebufferCreateInfo framebufferParams =
2679 		{
2680 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
2681 			DE_NULL,											// const void*					pNext;
2682 			0u,													// VkFramebufferCreateFlags		flags;
2683 			*renderPass,										// VkRenderPass					renderPass;
2684 			1u,													// deUint32						attachmentCount;
2685 			&*colorImageView,									// const VkAttachmentBindInfo*	pAttachments;
2686 			(deUint32)renderSize.x(),							// deUint32						width;
2687 			(deUint32)renderSize.y(),							// deUint32						height;
2688 			1u													// deUint32						layers;
2689 		};
2690 
2691 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
2692 	}
2693 
2694 	// Create descriptors
2695 	{
2696 		DescriptorPoolBuilder		descriptorPoolBuilder;
2697 		DescriptorSetLayoutBuilder	descriptorSetLayoutBuilder;
2698 
2699 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2700 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2701 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2702 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2703 
2704 		descriptorSetLayout	= descriptorSetLayoutBuilder.build(vk, vkDevice);
2705 		descriptorPool		= descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2706 
2707 		const VkDescriptorSetAllocateInfo allocInfo =
2708 		{
2709 			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2710 			DE_NULL,
2711 			*descriptorPool,
2712 			1u,
2713 			&*descriptorSetLayout
2714 		};
2715 
2716 		descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2717 		// Update descriptors
2718 		{
2719 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2720 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2721 			{
2722 				*m_outputBuffer,				// VkBuffer			buffer;
2723 				0u,								// VkDeviceSize		offset;
2724 				VK_WHOLE_SIZE					// VkDeviceSize		range;
2725 			};
2726 
2727 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2728 
2729 			VkDescriptorBufferInfo inputDescriptorBufferInfo =
2730 			{
2731 				0,							// VkBuffer			buffer;
2732 				0u,							// VkDeviceSize		offset;
2733 				VK_WHOLE_SIZE				// VkDeviceSize		range;
2734 			};
2735 
2736 			if (inputBufferSize > 0)
2737 			{
2738 				inputDescriptorBufferInfo.buffer = *m_inputBuffer;
2739 
2740 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2741 			}
2742 
2743 			descriptorSetUpdateBuilder.update(vk, vkDevice);
2744 		}
2745 	}
2746 
2747 	// Create pipeline layout
2748 	{
2749 		const VkDescriptorSetLayout			descriptorSetLayouts[]		=
2750 		{
2751 			*descriptorSetLayout,
2752 			m_extraResourcesLayout
2753 		};
2754 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2755 		{
2756 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
2757 			DE_NULL,											// const void*					pNext;
2758 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
2759 			numDescriptorSets,									// deUint32						descriptorSetCount;
2760 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
2761 			0u,													// deUint32						pushConstantRangeCount;
2762 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
2763 		};
2764 
2765 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2766 	}
2767 
2768 	// Create shader modules
2769 	{
2770 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
2771 		tessControlShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
2772 		tessEvalShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
2773 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
2774 	}
2775 
2776 	// Create pipeline
2777 	{
2778 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
2779 		{
2780 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType							sType;
2781 			DE_NULL,														// const void*								pNext;
2782 			(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags	flags;
2783 			0u,																// deUint32									bindingCount;
2784 			DE_NULL,														// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
2785 			0u,																// deUint32									attributeCount;
2786 			DE_NULL,														// const VkVertexInputAttributeDescription*	pvertexAttributeDescriptions;
2787 		};
2788 
2789 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
2790 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
2791 
2792 		graphicsPipeline = makeGraphicsPipeline(vk,									// const DeviceInterface&                        vk
2793 												vkDevice,							// const VkDevice                                device
2794 												*pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
2795 												*vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
2796 												*tessControlShaderModule,			// const VkShaderModule                          tessellationControlShaderModule
2797 												*tessEvalShaderModule,				// const VkShaderModule                          tessellationEvalShaderModule
2798 												DE_NULL,							// const VkShaderModule                          geometryShaderModule
2799 												*fragmentShaderModule,				// const VkShaderModule                          fragmentShaderModule
2800 												*renderPass,						// const VkRenderPass                            renderPass
2801 												viewports,							// const std::vector<VkViewport>&                viewports
2802 												scissors,							// const std::vector<VkRect2D>&                  scissors
2803 												VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,	// const VkPrimitiveTopology                     topology
2804 												0u,									// const deUint32                                subpass
2805 												patchControlPoints,					// const deUint32                                patchControlPoints
2806 												&vertexInputStateParams);			// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
2807 	}
2808 
2809 	// Create command pool
2810 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2811 
2812 	// Create command buffer
2813 	{
2814 		const VkClearValue clearValue = getDefaultClearColor();
2815 
2816 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2817 
2818 		beginCommandBuffer(vk, *cmdBuffer);
2819 
2820 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
2821 
2822 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
2823 
2824 		{
2825 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
2826 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2827 		}
2828 
2829 		vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
2830 
2831 		endRenderPass(vk, *cmdBuffer);
2832 		endCommandBuffer(vk, *cmdBuffer);
2833 	}
2834 
2835 	// Execute Draw
2836 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2837 }
2838 
2839 // TessControlExecutor
2840 
2841 class TessControlExecutor : public TessellationExecutor
2842 {
2843 public:
2844 						TessControlExecutor			(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2845 	virtual				~TessControlExecutor		(void);
2846 
2847 	static void			generateSources				(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2848 
2849 	virtual void		execute						(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2850 
2851 protected:
2852 	static std::string	generateTessControlShader	(const ShaderSpec& shaderSpec);
2853 };
2854 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2855 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2856 	: TessellationExecutor(context, shaderSpec, extraResourcesLayout)
2857 {
2858 }
2859 
~TessControlExecutor(void)2860 TessControlExecutor::~TessControlExecutor (void)
2861 {
2862 }
2863 
generateTessControlShader(const ShaderSpec & shaderSpec)2864 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
2865 {
2866 	std::ostringstream src;
2867 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2868 
2869 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2870 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
2871 
2872 	if (!shaderSpec.globalDeclarations.empty())
2873 		src << shaderSpec.globalDeclarations << "\n";
2874 
2875 	src << "\nlayout(vertices = 1) out;\n\n";
2876 
2877 	declareBufferBlocks(src, shaderSpec);
2878 
2879 	src << "void main (void)\n{\n";
2880 
2881 	for (int ndx = 0; ndx < 2; ndx++)
2882 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2883 
2884 	for (int ndx = 0; ndx < 4; ndx++)
2885 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2886 
2887 	src << "\n"
2888 		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
2889 
2890 	generateExecBufferIo(src, shaderSpec, "invocationId");
2891 
2892 	src << "}\n";
2893 
2894 	return src.str();
2895 }
2896 
generateEmptyTessEvalShader()2897 static std::string generateEmptyTessEvalShader ()
2898 {
2899 	std::ostringstream src;
2900 
2901 	src << "#version 450\n"
2902 		   "#extension GL_EXT_tessellation_shader : require\n\n";
2903 
2904 	src << "layout(triangles, ccw) in;\n";
2905 
2906 	src << "\nvoid main (void)\n{\n"
2907 		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
2908 		<< "}\n";
2909 
2910 	return src.str();
2911 }
2912 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2913 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2914 {
2915 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
2916 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
2917 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
2918 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
2919 }
2920 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2921 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2922 {
2923 	const deUint32	patchSize	= 3;
2924 
2925 	initBuffers(numValues);
2926 
2927 	// Setup input buffer & copy data
2928 	uploadInputBuffer(inputs, numValues);
2929 
2930 	renderTess(numValues, patchSize * numValues, patchSize, extraResources);
2931 
2932 	// Read back data
2933 	readOutputBuffer(outputs, numValues);
2934 }
2935 
2936 // TessEvaluationExecutor
2937 
2938 class TessEvaluationExecutor : public TessellationExecutor
2939 {
2940 public:
2941 						TessEvaluationExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2942 	virtual				~TessEvaluationExecutor	(void);
2943 
2944 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2945 
2946 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2947 
2948 protected:
2949 	static std::string	generateTessEvalShader	(const ShaderSpec& shaderSpec);
2950 };
2951 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2952 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2953 	: TessellationExecutor (context, shaderSpec, extraResourcesLayout)
2954 {
2955 }
2956 
~TessEvaluationExecutor(void)2957 TessEvaluationExecutor::~TessEvaluationExecutor (void)
2958 {
2959 }
2960 
generatePassthroughTessControlShader(void)2961 static std::string generatePassthroughTessControlShader (void)
2962 {
2963 	std::ostringstream src;
2964 
2965 	src << "#version 450\n"
2966 		   "#extension GL_EXT_tessellation_shader : require\n\n";
2967 
2968 	src << "layout(vertices = 1) out;\n\n";
2969 
2970 	src << "void main (void)\n{\n";
2971 
2972 	for (int ndx = 0; ndx < 2; ndx++)
2973 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2974 
2975 	for (int ndx = 0; ndx < 4; ndx++)
2976 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2977 
2978 	src << "}\n";
2979 
2980 	return src.str();
2981 }
2982 
generateTessEvalShader(const ShaderSpec & shaderSpec)2983 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
2984 {
2985 	std::ostringstream src;
2986 
2987 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2988 
2989 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2990 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
2991 
2992 	if (!shaderSpec.globalDeclarations.empty())
2993 		src << shaderSpec.globalDeclarations << "\n";
2994 
2995 	src << "\n";
2996 
2997 	src << "layout(isolines, equal_spacing) in;\n\n";
2998 
2999 	declareBufferBlocks(src, shaderSpec);
3000 
3001 	src << "void main (void)\n{\n"
3002 		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3003 		<< "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3004 
3005 	generateExecBufferIo(src, shaderSpec, "invocationId");
3006 
3007 	src	<< "}\n";
3008 
3009 	return src.str();
3010 }
3011 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3012 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3013 {
3014 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3015 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3016 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3017 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3018 }
3019 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3020 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3021 {
3022 	const int	patchSize		= 2;
3023 	const int	alignedValues	= deAlign32(numValues, patchSize);
3024 
3025 	// Initialize buffers with aligned value count to make room for padding
3026 	initBuffers(alignedValues);
3027 
3028 	// Setup input buffer & copy data
3029 	uploadInputBuffer(inputs, numValues);
3030 
3031 	renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3032 
3033 	// Read back data
3034 	readOutputBuffer(outputs, numValues);
3035 }
3036 
3037 } // anonymous
3038 
3039 // ShaderExecutor
3040 
~ShaderExecutor(void)3041 ShaderExecutor::~ShaderExecutor (void)
3042 {
3043 }
3044 
areInputs16Bit(void) const3045 bool ShaderExecutor::areInputs16Bit (void) const
3046 {
3047 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3048 	{
3049 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3050 			return true;
3051 	}
3052 	return false;
3053 }
3054 
areOutputs16Bit(void) const3055 bool ShaderExecutor::areOutputs16Bit (void) const
3056 {
3057 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3058 	{
3059 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3060 			return true;
3061 	}
3062 	return false;
3063 }
3064 
isOutput16Bit(const size_t ndx) const3065 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3066 {
3067 	if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3068 		return true;
3069 	return false;
3070 }
3071 
3072 // Utilities
3073 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3074 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3075 {
3076 	switch (shaderType)
3077 	{
3078 		case glu::SHADERTYPE_VERTEX:					VertexShaderExecutor::generateSources	(shaderSpec, dst);	break;
3079 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		TessControlExecutor::generateSources	(shaderSpec, dst);	break;
3080 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	TessEvaluationExecutor::generateSources	(shaderSpec, dst);	break;
3081 		case glu::SHADERTYPE_GEOMETRY:					GeometryShaderExecutor::generateSources	(shaderSpec, dst);	break;
3082 		case glu::SHADERTYPE_FRAGMENT:					FragmentShaderExecutor::generateSources	(shaderSpec, dst);	break;
3083 		case glu::SHADERTYPE_COMPUTE:					ComputeShaderExecutor::generateSources	(shaderSpec, dst);	break;
3084 		default:
3085 			TCU_THROW(InternalError, "Unsupported shader type");
3086 	}
3087 }
3088 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3089 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3090 {
3091 	switch (shaderType)
3092 	{
3093 		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor		(context, shaderSpec, extraResourcesLayout);
3094 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return new TessControlExecutor		(context, shaderSpec, extraResourcesLayout);
3095 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return new TessEvaluationExecutor	(context, shaderSpec, extraResourcesLayout);
3096 		case glu::SHADERTYPE_GEOMETRY:					return new GeometryShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3097 		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3098 		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3099 		default:
3100 			TCU_THROW(InternalError, "Unsupported shader type");
3101 	}
3102 }
3103 
3104 } // shaderexecutor
3105 } // vkt
3106