1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37
38 #include "gluShaderUtil.hpp"
39
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47
48 #include <map>
49 #include <sstream>
50 #include <iostream>
51
52 using std::vector;
53 using namespace vk;
54
55 namespace vkt
56 {
57 namespace shaderexecutor
58 {
59 namespace
60 {
61
62 enum
63 {
64 DEFAULT_RENDER_WIDTH = 100,
65 DEFAULT_RENDER_HEIGHT = 100,
66 };
67
68 // Common typedefs
69
70 typedef de::SharedPtr<Unique<VkImage> > VkImageSp;
71 typedef de::SharedPtr<Unique<VkImageView> > VkImageViewSp;
72 typedef de::SharedPtr<Unique<VkBuffer> > VkBufferSp;
73 typedef de::SharedPtr<Allocation> AllocationSp;
74
75 static VkFormat getAttributeFormat(const glu::DataType dataType);
76
77 // Shader utilities
78
getDefaultClearColor(void)79 static VkClearValue getDefaultClearColor (void)
80 {
81 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
82 }
83
generateEmptyFragmentSource(void)84 static std::string generateEmptyFragmentSource (void)
85 {
86 std::ostringstream src;
87
88 src << "#version 450\n"
89 "layout(location=0) out highp vec4 o_color;\n";
90
91 src << "void main (void)\n{\n";
92 src << " o_color = vec4(0.0);\n";
93 src << "}\n";
94
95 return src.str();
96 }
97
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)98 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
99 {
100 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
101 {
102 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
103 {
104 if(glu::isDataTypeVector(symIter->varType.getBasicType()))
105 {
106 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
107 {
108 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
109 }
110 }
111 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
112 {
113 int maxRow = 0;
114 int maxCol = 0;
115 switch (symIter->varType.getBasicType())
116 {
117 case glu::TYPE_FLOAT_MAT2:
118 maxRow = maxCol = 2;
119 break;
120 case glu::TYPE_FLOAT_MAT2X3:
121 maxRow = 2;
122 maxCol = 3;
123 break;
124 case glu::TYPE_FLOAT_MAT2X4:
125 maxRow = 2;
126 maxCol = 4;
127 break;
128 case glu::TYPE_FLOAT_MAT3X2:
129 maxRow = 3;
130 maxCol = 2;
131 break;
132 case glu::TYPE_FLOAT_MAT3:
133 maxRow = maxCol = 3;
134 break;
135 case glu::TYPE_FLOAT_MAT3X4:
136 maxRow = 3;
137 maxCol = 4;
138 break;
139 case glu::TYPE_FLOAT_MAT4X2:
140 maxRow = 4;
141 maxCol = 2;
142 break;
143 case glu::TYPE_FLOAT_MAT4X3:
144 maxRow = 4;
145 maxCol = 3;
146 break;
147 case glu::TYPE_FLOAT_MAT4:
148 maxRow = maxCol = 4;
149 break;
150 default:
151 DE_ASSERT(false);
152 break;
153 }
154
155 for(int i = 0; i < maxRow; i++)
156 for(int j = 0; j < maxCol; j++)
157 {
158 src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
159 }
160 }
161 else
162 {
163 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
164 }
165 }
166 }
167 }
168
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)169 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
170 {
171 std::ostringstream src;
172 int location = 0;
173
174 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
175
176 if (!shaderSpec.globalDeclarations.empty())
177 src << shaderSpec.globalDeclarations << "\n";
178
179 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
180
181 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
182 {
183 location++;
184 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
185 << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
186 }
187
188 src << "\nvoid main (void)\n{\n"
189 << " gl_Position = a_position;\n"
190 << " gl_PointSize = 1.0;\n";
191
192 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
193 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
194
195 src << "}\n";
196
197 return src.str();
198 }
199
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)200 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
201 {
202 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
203
204 std::ostringstream src;
205
206 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
207
208 if (!shaderSpec.globalDeclarations.empty())
209 src << shaderSpec.globalDeclarations << "\n";
210
211 src << "layout(location = 0) in highp vec4 a_position;\n";
212
213 int locationNumber = 1;
214 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
215 {
216 src << "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
217 }
218
219 locationNumber = 0;
220 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
221 {
222 DE_ASSERT(output->varType.isBasicType());
223
224 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
225 {
226 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
227 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
228 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
229
230 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
231 }
232 else
233 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
234 }
235
236 src << "\n"
237 << "void main (void)\n"
238 << "{\n"
239 << " gl_Position = a_position;\n"
240 << " gl_PointSize = 1.0;\n";
241
242 // Declare & fetch local input variables
243 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
244 {
245 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
246 {
247 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
248 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
249 }
250 else
251 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
252 }
253
254 // Declare local output variables
255 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
256 {
257 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
258 {
259 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
260 src << "\t" << tname << " " << output->name << ";\n";
261 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
262 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
263 }
264 else
265 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
266 }
267
268 // Operation - indented to correct level.
269 {
270 std::istringstream opSrc (shaderSpec.source);
271 std::string line;
272
273 while (std::getline(opSrc, line))
274 src << "\t" << line << "\n";
275 }
276
277 if (shaderSpec.packFloat16Bit)
278 packFloat16Bit(src, shaderSpec.outputs);
279
280 // Assignments to outputs.
281 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
282 {
283 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
284 {
285 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
286 }
287 else
288 {
289 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
290 {
291 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
292 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
293
294 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
295 }
296 else
297 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
298 }
299 }
300
301 src << "}\n";
302
303 return src.str();
304 }
305
306 struct FragmentOutputLayout
307 {
308 std::vector<const Symbol*> locationSymbols; //! Symbols by location
309 std::map<std::string, int> locationMap; //! Map from symbol name to start location
310 };
311
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)312 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
313 {
314 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
315 {
316 const Symbol& output = shaderSpec.outputs[outNdx];
317 const int location = de::lookup(outLocationMap, output.name);
318 const std::string outVarName = outputPrefix + output.name;
319 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
320
321 TCU_CHECK_INTERNAL(output.varType.isBasicType());
322
323 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
324 {
325 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
326 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
327 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
328
329 decl.varType = uintType;
330 src << decl << ";\n";
331 }
332 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
333 {
334 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
335 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
336 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
337
338 decl.varType = intType;
339 src << decl << ";\n";
340 }
341 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
342 {
343 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
344 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
345 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
346 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
347
348 decl.varType = uintType;
349 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
350 {
351 decl.name = outVarName + "_" + de::toString(vecNdx);
352 decl.layout.location = location + vecNdx;
353 src << decl << ";\n";
354 }
355 }
356 else
357 src << decl << ";\n";
358 }
359 }
360
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)361 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
362 {
363 if (isInput16Bit)
364 packFloat16Bit(src, shaderSpec.outputs);
365
366 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
367 {
368 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
369
370 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
371 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
372 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
373 {
374 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
375
376 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
377 if (useIntOutputs)
378 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
379 else
380 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
381 }
382 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
383 {
384 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
385 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
386
387 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
388 }
389 else
390 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
391 }
392 }
393
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)394 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
395 {
396 std::ostringstream src;
397
398 src <<"#version 450\n";
399
400 if (!shaderSpec.globalDeclarations.empty())
401 src << shaderSpec.globalDeclarations << "\n";
402
403 int locationNumber = 0;
404 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
405 {
406 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
407 {
408 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
409 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
410 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
411
412 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
413 }
414 else
415 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
416 }
417
418 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
419
420 src << "\nvoid main (void)\n{\n";
421
422 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
423
424 src << "}\n";
425
426 return src.str();
427 }
428
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)429 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
430 {
431 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
432
433 std::ostringstream src;
434
435 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
436
437 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
438 src << "#extension GL_EXT_geometry_shader : require\n";
439
440 if (!shaderSpec.globalDeclarations.empty())
441 src << shaderSpec.globalDeclarations << "\n";
442
443 src << "layout(points) in;\n"
444 << "layout(points, max_vertices = 1) out;\n";
445
446 int locationNumber = 0;
447 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
448 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
449
450 locationNumber = 0;
451 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
452 {
453 DE_ASSERT(output->varType.isBasicType());
454
455 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
456 {
457 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
458 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
459 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
460
461 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
462 }
463 else
464 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
465 }
466
467 src << "\n"
468 << "void main (void)\n"
469 << "{\n"
470 << " gl_Position = gl_in[0].gl_Position;\n"
471 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
472
473 // Fetch input variables
474 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
475 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
476
477 // Declare local output variables.
478 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
479 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
480
481 src << "\n";
482
483 // Operation - indented to correct level.
484 {
485 std::istringstream opSrc (shaderSpec.source);
486 std::string line;
487
488 while (std::getline(opSrc, line))
489 src << "\t" << line << "\n";
490 }
491
492 // Assignments to outputs.
493 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
494 {
495 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
496 {
497 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
498 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
499
500 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
501 }
502 else
503 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
504 }
505
506 src << " EmitVertex();\n"
507 << " EndPrimitive();\n"
508 << "}\n";
509
510 return src.str();
511 }
512
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)513 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
514 {
515 std::ostringstream src;
516 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
517 if (!shaderSpec.globalDeclarations.empty())
518 src << shaderSpec.globalDeclarations << "\n";
519
520 int locationNumber = 0;
521 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
522 {
523 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
524 }
525
526 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
527
528 src << "\nvoid main (void)\n{\n";
529
530 // Declare & fetch local input variables
531 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
532 {
533 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
534 {
535 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
536 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
537 }
538 else
539 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
540 }
541
542 // Declare output variables
543 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
544 {
545 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
546 {
547 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
548 src << "\t" << tname << " " << output->name << ";\n";
549 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
550 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
551 }
552 else
553 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
554 }
555
556 // Operation - indented to correct level.
557 {
558 std::istringstream opSrc (shaderSpec.source);
559 std::string line;
560
561 while (std::getline(opSrc, line))
562 src << "\t" << line << "\n";
563 }
564
565 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
566
567 src << "}\n";
568
569 return src.str();
570 }
571
572 // FragmentOutExecutor
573
574 class FragmentOutExecutor : public ShaderExecutor
575 {
576 public:
577 FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
578 virtual ~FragmentOutExecutor (void);
579
580 virtual void execute (int numValues,
581 const void* const* inputs,
582 void* const* outputs,
583 VkDescriptorSet extraResources);
584
585 protected:
586 const glu::ShaderType m_shaderType;
587 const FragmentOutputLayout m_outputLayout;
588
589 private:
590 void bindAttributes (int numValues,
591 const void* const* inputs);
592
593 void addAttribute (deUint32 bindingLocation,
594 VkFormat format,
595 deUint32 sizePerElement,
596 deUint32 count,
597 const void* dataPtr);
598 // reinit render data members
599 virtual void clearRenderData (void);
600
601 const VkDescriptorSetLayout m_extraResourcesLayout;
602
603 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
604 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
605 std::vector<VkBufferSp> m_vertexBuffers;
606 std::vector<AllocationSp> m_vertexBufferAllocs;
607 };
608
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)609 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
610 {
611 FragmentOutputLayout ret;
612 int location = 0;
613
614 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
615 {
616 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
617
618 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
619 de::insert(ret.locationMap, it->name, location);
620 location += numLocations;
621
622 for (int ndx = 0; ndx < numLocations; ++ndx)
623 ret.locationSymbols.push_back(&*it);
624 }
625
626 return ret;
627 }
628
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)629 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
630 : ShaderExecutor (context, shaderSpec)
631 , m_shaderType (shaderType)
632 , m_outputLayout (computeFragmentOutputLayout(m_shaderSpec.outputs))
633 , m_extraResourcesLayout (extraResourcesLayout)
634 {
635 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
636 const InstanceInterface& vki = m_context.getInstanceInterface();
637
638 // Input attributes
639 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
640 {
641 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
642 const glu::DataType basicType = symbol.varType.getBasicType();
643 const VkFormat format = getAttributeFormat(basicType);
644 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
645 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
646 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
647 }
648 }
649
~FragmentOutExecutor(void)650 FragmentOutExecutor::~FragmentOutExecutor (void)
651 {
652 }
653
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)654 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
655 {
656 std::vector<tcu::Vec2> positions(numValues);
657 for (int valNdx = 0; valNdx < numValues; valNdx++)
658 {
659 const int ix = valNdx % renderSize.x();
660 const int iy = valNdx / renderSize.x();
661 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
662 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
663
664 positions[valNdx] = tcu::Vec2(fx, fy);
665 }
666
667 return positions;
668 }
669
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)670 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
671 {
672 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
673 {
674 tcu::TextureFormat::R,
675 tcu::TextureFormat::RG,
676 tcu::TextureFormat::RGBA, // No RGB variants available.
677 tcu::TextureFormat::RGBA
678 };
679
680 const glu::DataType basicType = outputType.getBasicType();
681 const int numComps = glu::getDataTypeNumComponents(basicType);
682 tcu::TextureFormat::ChannelType channelType;
683
684 switch (glu::getDataTypeScalarType(basicType))
685 {
686 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
687 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
688 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
689 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
690 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT; break;
691 default:
692 throw tcu::InternalError("Invalid output type");
693 }
694
695 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
696
697 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
698 }
699
getAttributeFormat(const glu::DataType dataType)700 static VkFormat getAttributeFormat (const glu::DataType dataType)
701 {
702 switch (dataType)
703 {
704 case glu::TYPE_FLOAT16: return VK_FORMAT_R16_SFLOAT;
705 case glu::TYPE_FLOAT16_VEC2: return VK_FORMAT_R16G16_SFLOAT;
706 case glu::TYPE_FLOAT16_VEC3: return VK_FORMAT_R16G16B16_SFLOAT;
707 case glu::TYPE_FLOAT16_VEC4: return VK_FORMAT_R16G16B16A16_SFLOAT;
708
709 case glu::TYPE_FLOAT: return VK_FORMAT_R32_SFLOAT;
710 case glu::TYPE_FLOAT_VEC2: return VK_FORMAT_R32G32_SFLOAT;
711 case glu::TYPE_FLOAT_VEC3: return VK_FORMAT_R32G32B32_SFLOAT;
712 case glu::TYPE_FLOAT_VEC4: return VK_FORMAT_R32G32B32A32_SFLOAT;
713
714 case glu::TYPE_INT: return VK_FORMAT_R32_SINT;
715 case glu::TYPE_INT_VEC2: return VK_FORMAT_R32G32_SINT;
716 case glu::TYPE_INT_VEC3: return VK_FORMAT_R32G32B32_SINT;
717 case glu::TYPE_INT_VEC4: return VK_FORMAT_R32G32B32A32_SINT;
718
719 case glu::TYPE_UINT: return VK_FORMAT_R32_UINT;
720 case glu::TYPE_UINT_VEC2: return VK_FORMAT_R32G32_UINT;
721 case glu::TYPE_UINT_VEC3: return VK_FORMAT_R32G32B32_UINT;
722 case glu::TYPE_UINT_VEC4: return VK_FORMAT_R32G32B32A32_UINT;
723
724 case glu::TYPE_FLOAT_MAT2: return VK_FORMAT_R32G32_SFLOAT;
725 case glu::TYPE_FLOAT_MAT2X3: return VK_FORMAT_R32G32B32_SFLOAT;
726 case glu::TYPE_FLOAT_MAT2X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
727 case glu::TYPE_FLOAT_MAT3X2: return VK_FORMAT_R32G32_SFLOAT;
728 case glu::TYPE_FLOAT_MAT3: return VK_FORMAT_R32G32B32_SFLOAT;
729 case glu::TYPE_FLOAT_MAT3X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
730 case glu::TYPE_FLOAT_MAT4X2: return VK_FORMAT_R32G32_SFLOAT;
731 case glu::TYPE_FLOAT_MAT4X3: return VK_FORMAT_R32G32B32_SFLOAT;
732 case glu::TYPE_FLOAT_MAT4: return VK_FORMAT_R32G32B32A32_SFLOAT;
733 default:
734 DE_ASSERT(false);
735 return VK_FORMAT_UNDEFINED;
736 }
737 }
738
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)739 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
740 {
741 // Add binding specification
742 const deUint32 binding = (deUint32)m_vertexBindingDescriptions.size();
743 const VkVertexInputBindingDescription bindingDescription =
744 {
745 binding,
746 sizePerElement,
747 VK_VERTEX_INPUT_RATE_VERTEX
748 };
749
750 m_vertexBindingDescriptions.push_back(bindingDescription);
751
752 // Add location and format specification
753 const VkVertexInputAttributeDescription attributeDescription =
754 {
755 bindingLocation, // deUint32 location;
756 binding, // deUint32 binding;
757 format, // VkFormat format;
758 0u, // deUint32 offsetInBytes;
759 };
760
761 m_vertexAttributeDescriptions.push_back(attributeDescription);
762
763 // Upload data to buffer
764 const VkDevice vkDevice = m_context.getDevice();
765 const DeviceInterface& vk = m_context.getDeviceInterface();
766 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
767
768 const VkDeviceSize inputSize = sizePerElement * count;
769 const VkBufferCreateInfo vertexBufferParams =
770 {
771 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
772 DE_NULL, // const void* pNext;
773 0u, // VkBufferCreateFlags flags;
774 inputSize, // VkDeviceSize size;
775 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
776 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
777 1u, // deUint32 queueFamilyCount;
778 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
779 };
780
781 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
782 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
783
784 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
785
786 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
787 flushAlloc(vk, vkDevice, *alloc);
788
789 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
790 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
791 }
792
bindAttributes(int numValues,const void * const * inputs)793 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
794 {
795 // Input attributes
796 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
797 {
798 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
799 const void* ptr = inputs[inputNdx];
800 const glu::DataType basicType = symbol.varType.getBasicType();
801 const int vecSize = glu::getDataTypeScalarSize(basicType);
802 const VkFormat format = getAttributeFormat(basicType);
803 int elementSize = 0;
804 int numAttrsToAdd = 1;
805
806 if (glu::isDataTypeFloatOrVec(basicType))
807 elementSize = sizeof(float);
808 else if (glu::isDataTypeFloat16OrVec(basicType))
809 elementSize = sizeof(deUint16);
810 else if (glu::isDataTypeIntOrIVec(basicType))
811 elementSize = sizeof(int);
812 else if (glu::isDataTypeUintOrUVec(basicType))
813 elementSize = sizeof(deUint32);
814 else if (glu::isDataTypeMatrix(basicType))
815 {
816 int numRows = glu::getDataTypeMatrixNumRows(basicType);
817 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
818
819 elementSize = numRows * numCols * (int)sizeof(float);
820 numAttrsToAdd = numCols;
821 }
822 else
823 DE_ASSERT(false);
824
825 // add attributes, in case of matrix every column is binded as an attribute
826 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
827 {
828 addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
829 }
830 }
831 }
832
clearRenderData(void)833 void FragmentOutExecutor::clearRenderData (void)
834 {
835 m_vertexBindingDescriptions.clear();
836 m_vertexAttributeDescriptions.clear();
837 m_vertexBuffers.clear();
838 m_vertexBufferAllocs.clear();
839 }
840
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)841 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
842 {
843 const VkDescriptorSetLayoutCreateInfo createInfo =
844 {
845 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
846 DE_NULL,
847 (VkDescriptorSetLayoutCreateFlags)0,
848 0u,
849 DE_NULL,
850 };
851 return createDescriptorSetLayout(vkd, device, &createInfo);
852 }
853
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)854 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
855 {
856 const VkDescriptorPoolSize dummySize =
857 {
858 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
859 1u,
860 };
861 const VkDescriptorPoolCreateInfo createInfo =
862 {
863 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
864 DE_NULL,
865 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
866 1u,
867 1u,
868 &dummySize
869 };
870 return createDescriptorPool(vkd, device, &createInfo);
871 }
872
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)873 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
874 {
875 const VkDescriptorSetAllocateInfo allocInfo =
876 {
877 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
878 DE_NULL,
879 pool,
880 1u,
881 &layout,
882 };
883 return allocateDescriptorSet(vkd, device, &allocInfo);
884 }
885
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)886 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
887 {
888 const VkDevice vkDevice = m_context.getDevice();
889 const DeviceInterface& vk = m_context.getDeviceInterface();
890 const VkQueue queue = m_context.getUniversalQueue();
891 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
892 Allocator& memAlloc = m_context.getDefaultAllocator();
893
894 const deUint32 renderSizeX = de::min(static_cast<deUint32>(DEFAULT_RENDER_WIDTH), (deUint32)numValues);
895 const deUint32 renderSizeY = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
896 const tcu::UVec2 renderSize (renderSizeX, renderSizeY);
897 std::vector<tcu::Vec2> positions;
898
899 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
900
901 std::vector<VkImageSp> colorImages;
902 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
903 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
904 std::vector<AllocationSp> colorImageAllocs;
905 std::vector<VkAttachmentDescription> attachments;
906 std::vector<VkClearValue> attachmentClearValues;
907 std::vector<VkImageViewSp> colorImageViews;
908
909 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
910 std::vector<VkAttachmentReference> colorAttachmentReferences;
911
912 Move<VkRenderPass> renderPass;
913 Move<VkFramebuffer> framebuffer;
914 Move<VkPipelineLayout> pipelineLayout;
915 Move<VkPipeline> graphicsPipeline;
916
917 Move<VkShaderModule> vertexShaderModule;
918 Move<VkShaderModule> geometryShaderModule;
919 Move<VkShaderModule> fragmentShaderModule;
920
921 Move<VkCommandPool> cmdPool;
922 Move<VkCommandBuffer> cmdBuffer;
923
924 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout (createEmptyDescriptorSetLayout(vk, vkDevice));
925 Unique<VkDescriptorPool> dummyDescriptorPool (createDummyDescriptorPool(vk, vkDevice));
926 Unique<VkDescriptorSet> emptyDescriptorSet (allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
927
928 clearRenderData();
929
930 // Compute positions - 1px points are used to drive fragment shading.
931 positions = computeVertexPositions(numValues, renderSize.cast<int>());
932
933 // Bind attributes
934 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
935 bindAttributes(numValues, inputs);
936
937 // Create color images
938 {
939 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
940 {
941 VK_FALSE, // VkBool32 blendEnable;
942 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
943 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
944 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
945 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
946 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
947 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
948 (VK_COLOR_COMPONENT_R_BIT |
949 VK_COLOR_COMPONENT_G_BIT |
950 VK_COLOR_COMPONENT_B_BIT |
951 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
952 };
953
954 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
955 {
956 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
957 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
958 const bool isSigned = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
959 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960 const VkFormat colorFormat = isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT));
961
962 {
963 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
964 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
965 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
966 }
967
968 const VkImageCreateInfo colorImageParams =
969 {
970 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
971 DE_NULL, // const void* pNext;
972 0u, // VkImageCreateFlags flags;
973 VK_IMAGE_TYPE_2D, // VkImageType imageType;
974 colorFormat, // VkFormat format;
975 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
976 1u, // deUint32 mipLevels;
977 1u, // deUint32 arraySize;
978 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
979 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
980 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
981 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
982 1u, // deUint32 queueFamilyCount;
983 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
984 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
985 };
986
987 const VkAttachmentDescription colorAttachmentDescription =
988 {
989 0u, // VkAttachmentDescriptorFlags flags;
990 colorFormat, // VkFormat format;
991 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
992 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
993 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
994 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
995 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
996 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
997 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
998 };
999
1000 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1001 colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1002 attachmentClearValues.push_back(getDefaultClearColor());
1003
1004 // Allocate and bind color image memory
1005 {
1006 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1007 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1008 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1009
1010 attachments.push_back(colorAttachmentDescription);
1011 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1012
1013 const VkAttachmentReference colorAttachmentReference =
1014 {
1015 (deUint32) (colorImages.size() - 1), // deUint32 attachment;
1016 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1017 };
1018
1019 colorAttachmentReferences.push_back(colorAttachmentReference);
1020 }
1021
1022 // Create color attachment view
1023 {
1024 const VkImageViewCreateInfo colorImageViewParams =
1025 {
1026 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1027 DE_NULL, // const void* pNext;
1028 0u, // VkImageViewCreateFlags flags;
1029 colorImages.back().get()->get(), // VkImage image;
1030 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1031 colorFormat, // VkFormat format;
1032 {
1033 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1034 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1035 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1036 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1037 }, // VkComponentMapping components;
1038 {
1039 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1040 0u, // deUint32 baseMipLevel;
1041 1u, // deUint32 mipLevels;
1042 0u, // deUint32 baseArraySlice;
1043 1u // deUint32 arraySize;
1044 } // VkImageSubresourceRange subresourceRange;
1045 };
1046
1047 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1048 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1049
1050 const VkImageMemoryBarrier colorImagePreRenderBarrier =
1051 {
1052 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1053 DE_NULL, // pNext
1054 0u, // srcAccessMask
1055 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1056 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1057 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1058 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1059 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1060 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1061 colorImages.back().get()->get(), // image
1062 {
1063 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1064 0u, // baseMipLevel
1065 1u, // levelCount
1066 0u, // baseArrayLayer
1067 1u, // layerCount
1068 } // subresourceRange
1069 };
1070 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1071
1072 const VkImageMemoryBarrier colorImagePostRenderBarrier =
1073 {
1074 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1075 DE_NULL, // pNext
1076 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1077 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1078 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1079 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1080 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1081 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1082 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1083 colorImages.back().get()->get(), // image
1084 {
1085 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1086 0u, // baseMipLevel
1087 1u, // levelCount
1088 0u, // baseArrayLayer
1089 1u, // layerCount
1090 } // subresourceRange
1091 };
1092 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1093 }
1094 }
1095 }
1096
1097 // Create render pass
1098 {
1099 const VkSubpassDescription subpassDescription =
1100 {
1101 0u, // VkSubpassDescriptionFlags flags;
1102 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1103 0u, // deUint32 inputCount;
1104 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1105 (deUint32)colorImages.size(), // deUint32 colorCount;
1106 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1107 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1108 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1109 0u, // deUint32 preserveCount;
1110 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1111 };
1112
1113 const VkRenderPassCreateInfo renderPassParams =
1114 {
1115 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1116 DE_NULL, // const void* pNext;
1117 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1118 (deUint32)attachments.size(), // deUint32 attachmentCount;
1119 &attachments[0], // const VkAttachmentDescription* pAttachments;
1120 1u, // deUint32 subpassCount;
1121 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1122 0u, // deUint32 dependencyCount;
1123 DE_NULL // const VkSubpassDependency* pDependencies;
1124 };
1125
1126 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1127 }
1128
1129 // Create framebuffer
1130 {
1131 std::vector<VkImageView> views(colorImageViews.size());
1132 for (size_t i = 0; i < colorImageViews.size(); i++)
1133 {
1134 views[i] = colorImageViews[i].get()->get();
1135 }
1136
1137 const VkFramebufferCreateInfo framebufferParams =
1138 {
1139 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1140 DE_NULL, // const void* pNext;
1141 0u, // VkFramebufferCreateFlags flags;
1142 *renderPass, // VkRenderPass renderPass;
1143 (deUint32)views.size(), // deUint32 attachmentCount;
1144 &views[0], // const VkImageView* pAttachments;
1145 (deUint32)renderSize.x(), // deUint32 width;
1146 (deUint32)renderSize.y(), // deUint32 height;
1147 1u // deUint32 layers;
1148 };
1149
1150 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1151 }
1152
1153 // Create pipeline layout
1154 {
1155 const VkDescriptorSetLayout setLayouts[] =
1156 {
1157 *emptyDescriptorSetLayout,
1158 m_extraResourcesLayout
1159 };
1160 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
1161 {
1162 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1163 DE_NULL, // const void* pNext;
1164 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1165 (m_extraResourcesLayout != 0 ? 2u : 0u), // deUint32 descriptorSetCount;
1166 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1167 0u, // deUint32 pushConstantRangeCount;
1168 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1169 };
1170
1171 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1172 }
1173
1174 // Create shaders
1175 {
1176 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1177 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1178
1179 if (useGeometryShader)
1180 {
1181 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1182 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1183 else
1184 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1185 }
1186 }
1187
1188 // Create pipeline
1189 {
1190 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1191 {
1192 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1193 DE_NULL, // const void* pNext;
1194 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1195 (deUint32)m_vertexBindingDescriptions.size(), // deUint32 bindingCount;
1196 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1197 (deUint32)m_vertexAttributeDescriptions.size(), // deUint32 attributeCount;
1198 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1199 };
1200
1201 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
1202 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
1203
1204 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1205 {
1206 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1207 DE_NULL, // const void* pNext;
1208 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1209 VK_FALSE, // VkBool32 logicOpEnable;
1210 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1211 (deUint32)colorBlendAttachmentStates.size(), // deUint32 attachmentCount;
1212 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1213 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConst[4];
1214 };
1215
1216 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
1217 vkDevice, // const VkDevice device
1218 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1219 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1220 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1221 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1222 useGeometryShader ? *geometryShaderModule : DE_NULL, // const VkShaderModule geometryShaderModule
1223 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1224 *renderPass, // const VkRenderPass renderPass
1225 viewports, // const std::vector<VkViewport>& viewports
1226 scissors, // const std::vector<VkRect2D>& scissors
1227 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1228 0u, // const deUint32 subpass
1229 0u, // const deUint32 patchControlPoints
1230 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1231 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1232 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1233 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1234 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1235 }
1236
1237 // Create command pool
1238 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1239
1240 // Create command buffer
1241 {
1242 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1243
1244 beginCommandBuffer(vk, *cmdBuffer);
1245
1246 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1247 0, (const VkMemoryBarrier*)DE_NULL,
1248 0, (const VkBufferMemoryBarrier*)DE_NULL,
1249 (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1250 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1251
1252 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1253
1254 if (m_extraResourcesLayout != 0)
1255 {
1256 DE_ASSERT(extraResources != 0);
1257 const VkDescriptorSet descriptorSets[] = { *emptyDescriptorSet, extraResources };
1258 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1259 }
1260 else
1261 DE_ASSERT(extraResources == 0);
1262
1263 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1264
1265 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1266
1267 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1268 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1269 {
1270 buffers[i] = m_vertexBuffers[i].get()->get();
1271 }
1272
1273 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1274 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1275
1276 endRenderPass(vk, *cmdBuffer);
1277 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1278 0, (const VkMemoryBarrier*)DE_NULL,
1279 0, (const VkBufferMemoryBarrier*)DE_NULL,
1280 (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1281
1282 endCommandBuffer(vk, *cmdBuffer);
1283 }
1284
1285 // Execute Draw
1286 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1287
1288 // Read back result and output
1289 {
1290 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1291 const VkBufferCreateInfo readImageBufferParams =
1292 {
1293 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1294 DE_NULL, // const void* pNext;
1295 0u, // VkBufferCreateFlags flags;
1296 imageSizeBytes, // VkDeviceSize size;
1297 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1298 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1299 1u, // deUint32 queueFamilyCount;
1300 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1301 };
1302
1303 // constants for image copy
1304 Move<VkCommandPool> copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1305
1306 const VkBufferImageCopy copyParams =
1307 {
1308 0u, // VkDeviceSize bufferOffset;
1309 (deUint32)renderSize.x(), // deUint32 bufferRowLength;
1310 (deUint32)renderSize.y(), // deUint32 bufferImageHeight;
1311 {
1312 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1313 0u, // deUint32 mipLevel;
1314 0u, // deUint32 arraySlice;
1315 1u, // deUint32 arraySize;
1316 }, // VkImageSubresource imageSubresource;
1317 { 0u, 0u, 0u }, // VkOffset3D imageOffset;
1318 { renderSize.x(), renderSize.y(), 1u } // VkExtent3D imageExtent;
1319 };
1320
1321 // Read back pixels.
1322 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1323 {
1324 const Symbol& output = m_shaderSpec.outputs[outNdx];
1325 const int outSize = output.varType.getScalarSize();
1326 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1327 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1328 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1329
1330 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1331 {
1332 tcu::TextureLevel tmpBuf;
1333 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1334 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
1335 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1336 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1337
1338 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1339
1340 // Copy image to buffer
1341 {
1342
1343 Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1344
1345 beginCommandBuffer(vk, *copyCmdBuffer);
1346 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1347 endCommandBuffer(vk, *copyCmdBuffer);
1348
1349 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1350 }
1351
1352 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1353
1354 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1355
1356 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1357 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1358
1359 tcu::copy(tmpBuf.getAccess(), resultAccess);
1360
1361 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1362 {
1363 deUint16* dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1364 if (outSize == 4 && outNumLocs == 1)
1365 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1366 else
1367 {
1368 for (int valNdx = 0; valNdx < numValues; valNdx++)
1369 {
1370 const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1371 deUint16* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1372 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1373 }
1374 }
1375 }
1376 else
1377 {
1378 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1379 if (outSize == 4 && outNumLocs == 1)
1380 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1381 else
1382 {
1383 for (int valNdx = 0; valNdx < numValues; valNdx++)
1384 {
1385 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1386 deUint32* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1387 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1388 }
1389 }
1390 }
1391 }
1392 }
1393 }
1394 }
1395
1396 // VertexShaderExecutor
1397
1398 class VertexShaderExecutor : public FragmentOutExecutor
1399 {
1400 public:
1401 VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1402 virtual ~VertexShaderExecutor (void);
1403
1404 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& dst);
1405 };
1406
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1407 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1408 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1409 {
1410 }
1411
~VertexShaderExecutor(void)1412 VertexShaderExecutor::~VertexShaderExecutor (void)
1413 {
1414 }
1415
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1416 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1417 {
1418 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1419
1420 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1421 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1422 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1423 }
1424
1425 // GeometryShaderExecutor
1426
1427 class GeometryShaderExecutor : public FragmentOutExecutor
1428 {
1429 public:
1430 GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1431 virtual ~GeometryShaderExecutor (void);
1432
1433 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1434
1435 };
1436
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1437 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1438 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1439 {
1440 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1441
1442 if (!features.geometryShader)
1443 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1444 }
1445
~GeometryShaderExecutor(void)1446 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1447 {
1448 }
1449
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1450 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1451 {
1452 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1453
1454 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1455
1456 programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1457 programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1458
1459 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1460 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1461
1462 }
1463
1464 // FragmentShaderExecutor
1465
1466 class FragmentShaderExecutor : public FragmentOutExecutor
1467 {
1468 public:
1469 FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1470 virtual ~FragmentShaderExecutor (void);
1471
1472 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1473
1474 };
1475
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1476 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1477 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1478 {
1479 }
1480
~FragmentShaderExecutor(void)1481 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1482 {
1483 }
1484
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1485 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1486 {
1487 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1488
1489 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1490 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1491 programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1492 }
1493
1494 // Shared utilities for compute and tess executors
1495
getVecStd430ByteAlignment(glu::DataType type)1496 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1497 {
1498 switch (type)
1499 {
1500 case glu::TYPE_FLOAT16: return 2u;
1501 case glu::TYPE_FLOAT16_VEC2: return 4u;
1502 case glu::TYPE_FLOAT16_VEC3: return 8u;
1503 case glu::TYPE_FLOAT16_VEC4: return 8u;
1504 default: break;
1505 }
1506
1507 switch (glu::getDataTypeScalarSize(type))
1508 {
1509 case 1: return 4u;
1510 case 2: return 8u;
1511 case 3: return 16u;
1512 case 4: return 16u;
1513 default:
1514 DE_ASSERT(false);
1515 return 0u;
1516 }
1517 }
1518
1519 class BufferIoExecutor : public ShaderExecutor
1520 {
1521 public:
1522 BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec);
1523 virtual ~BufferIoExecutor (void);
1524
1525 protected:
1526 enum
1527 {
1528 INPUT_BUFFER_BINDING = 0,
1529 OUTPUT_BUFFER_BINDING = 1,
1530 };
1531
1532 void initBuffers (int numValues);
getInputBuffer(void) const1533 VkBuffer getInputBuffer (void) const { return *m_inputBuffer; }
getOutputBuffer(void) const1534 VkBuffer getOutputBuffer (void) const { return *m_outputBuffer; }
getInputStride(void) const1535 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
getOutputStride(void) const1536 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
1537
1538 void uploadInputBuffer (const void* const* inputPtrs, int numValues);
1539 void readOutputBuffer (void* const* outputPtrs, int numValues);
1540
1541 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
1542 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1543
1544 protected:
1545 Move<VkBuffer> m_inputBuffer;
1546 Move<VkBuffer> m_outputBuffer;
1547
1548 private:
1549 struct VarLayout
1550 {
1551 deUint32 offset;
1552 deUint32 stride;
1553 deUint32 matrixStride;
1554
VarLayoutvkt::shaderexecutor::__anon9d58b36f0111::BufferIoExecutor::VarLayout1555 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1556 };
1557
1558 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1559 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
1560
1561 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1562 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1563
1564 de::MovePtr<Allocation> m_inputAlloc;
1565 de::MovePtr<Allocation> m_outputAlloc;
1566
1567 vector<VarLayout> m_inputLayout;
1568 vector<VarLayout> m_outputLayout;
1569 };
1570
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1571 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1572 : ShaderExecutor(context, shaderSpec)
1573 {
1574 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1575 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1576 }
1577
~BufferIoExecutor(void)1578 BufferIoExecutor::~BufferIoExecutor (void)
1579 {
1580 }
1581
getLayoutStride(const vector<VarLayout> & layout)1582 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1583 {
1584 return layout.empty() ? 0 : layout[0].stride;
1585 }
1586
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1587 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1588 {
1589 deUint32 maxAlignment = 0;
1590 deUint32 curOffset = 0;
1591
1592 DE_ASSERT(layout != DE_NULL);
1593 DE_ASSERT(layout->empty());
1594 layout->resize(symbols.size());
1595
1596 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1597 {
1598 const Symbol& symbol = symbols[varNdx];
1599 const glu::DataType basicType = symbol.varType.getBasicType();
1600 VarLayout& layoutEntry = (*layout)[varNdx];
1601
1602 if (glu::isDataTypeScalarOrVector(basicType))
1603 {
1604 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
1605 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1606
1607 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
1608 maxAlignment = de::max(maxAlignment, alignment);
1609
1610 layoutEntry.offset = curOffset;
1611 layoutEntry.matrixStride = 0;
1612
1613 curOffset += size;
1614 }
1615 else if (glu::isDataTypeMatrix(basicType))
1616 {
1617 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1618 const glu::DataType vecType = glu::getDataTypeFloatVec(glu::getDataTypeMatrixNumRows(basicType));
1619 const deUint32 vecAlignment = isDataTypeFloat16OrVec(basicType) ? getVecStd430ByteAlignment(vecType)/2 : getVecStd430ByteAlignment(vecType);
1620
1621 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1622 maxAlignment = de::max(maxAlignment, vecAlignment);
1623
1624 layoutEntry.offset = curOffset;
1625 layoutEntry.matrixStride = vecAlignment;
1626
1627 curOffset += vecAlignment*numVecs;
1628 }
1629 else
1630 DE_ASSERT(false);
1631 }
1632
1633 {
1634 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
1635
1636 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1637 varIter->stride = totalSize;
1638 }
1639 }
1640
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1641 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1642 {
1643 // Input struct
1644 if (!spec.inputs.empty())
1645 {
1646 glu::StructType inputStruct("Inputs");
1647 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1648 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1649 src << glu::declare(&inputStruct) << ";\n";
1650 }
1651
1652 // Output struct
1653 {
1654 glu::StructType outputStruct("Outputs");
1655 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1656 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1657 src << glu::declare(&outputStruct) << ";\n";
1658 }
1659
1660 src << "\n";
1661
1662 if (!spec.inputs.empty())
1663 {
1664 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1665 << "{\n"
1666 << " Inputs inputs[];\n"
1667 << "};\n";
1668 }
1669
1670 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1671 << "{\n"
1672 << " Outputs outputs[];\n"
1673 << "};\n"
1674 << "\n";
1675 }
1676
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1677 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1678 {
1679 std::string tname;
1680 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1681 {
1682 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1683 if (f16BitTest)
1684 {
1685 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1686 }
1687 else
1688 {
1689 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1690 }
1691 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1692 }
1693
1694 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1695 {
1696 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1697 if (f16BitTest)
1698 {
1699 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1700 }
1701 else
1702 {
1703 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1704 }
1705 src << "\t" << tname << " " << symIter->name << ";\n";
1706 if (f16BitTest)
1707 {
1708 const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1709 src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1710 }
1711 }
1712
1713 src << "\n";
1714
1715 {
1716 std::istringstream opSrc (spec.source);
1717 std::string line;
1718
1719 while (std::getline(opSrc, line))
1720 src << "\t" << line << "\n";
1721 }
1722
1723 if (spec.packFloat16Bit)
1724 packFloat16Bit (src, spec.outputs);
1725
1726 src << "\n";
1727 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1728 {
1729 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1730 if(f16BitTest)
1731 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1732 else
1733 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1734 }
1735 }
1736
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1737 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1738 {
1739 if (varType.isBasicType())
1740 {
1741 const glu::DataType basicType = varType.getBasicType();
1742 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1743 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1744 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1745 const int numComps = scalarSize / numVecs;
1746
1747 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1748 {
1749 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1750 {
1751 const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1752 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1753 const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1754 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1755 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1756
1757 deMemcpy(dstPtr, srcPtr, size * numComps);
1758 }
1759 }
1760 }
1761 else
1762 throw tcu::InternalError("Unsupported type");
1763 }
1764
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1765 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1766 {
1767 if (varType.isBasicType())
1768 {
1769 const glu::DataType basicType = varType.getBasicType();
1770 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1771 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1772 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1773 const int numComps = scalarSize / numVecs;
1774
1775 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1776 {
1777 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1778 {
1779 const int size = (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
1780 const int srcOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1781 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1782 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1783 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1784
1785 deMemcpy(dstPtr, srcPtr, size * numComps);
1786 }
1787 }
1788 }
1789 else
1790 throw tcu::InternalError("Unsupported type");
1791 }
1792
uploadInputBuffer(const void * const * inputPtrs,int numValues)1793 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues)
1794 {
1795 const VkDevice vkDevice = m_context.getDevice();
1796 const DeviceInterface& vk = m_context.getDeviceInterface();
1797
1798 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1799 const int inputBufferSize = inputStride * numValues;
1800
1801 if (inputBufferSize == 0)
1802 return; // No inputs
1803
1804 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1805 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1806 {
1807 const glu::VarType& varType = m_shaderSpec.inputs[inputNdx].varType;
1808 const VarLayout& layout = m_inputLayout[inputNdx];
1809
1810 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr());
1811 }
1812
1813 flushAlloc(vk, vkDevice, *m_inputAlloc);
1814 }
1815
readOutputBuffer(void * const * outputPtrs,int numValues)1816 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1817 {
1818 const VkDevice vkDevice = m_context.getDevice();
1819 const DeviceInterface& vk = m_context.getDeviceInterface();
1820
1821 DE_ASSERT(numValues > 0); // At least some outputs are required.
1822
1823 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1824
1825 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1826 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1827 {
1828 const glu::VarType& varType = m_shaderSpec.outputs[outputNdx].varType;
1829 const VarLayout& layout = m_outputLayout[outputNdx];
1830
1831 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1832 }
1833 }
1834
initBuffers(int numValues)1835 void BufferIoExecutor::initBuffers (int numValues)
1836 {
1837 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1838 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1839 // Avoid creating zero-sized buffer/memory
1840 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
1841 const size_t outputBufferSize = numValues * outputStride;
1842
1843 // Upload data to buffer
1844 const VkDevice vkDevice = m_context.getDevice();
1845 const DeviceInterface& vk = m_context.getDeviceInterface();
1846 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1847 Allocator& memAlloc = m_context.getDefaultAllocator();
1848
1849 const VkBufferCreateInfo inputBufferParams =
1850 {
1851 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1852 DE_NULL, // const void* pNext;
1853 0u, // VkBufferCreateFlags flags;
1854 inputBufferSize, // VkDeviceSize size;
1855 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1856 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1857 1u, // deUint32 queueFamilyCount;
1858 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1859 };
1860
1861 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1862 m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1863
1864 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1865
1866 const VkBufferCreateInfo outputBufferParams =
1867 {
1868 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1869 DE_NULL, // const void* pNext;
1870 0u, // VkBufferCreateFlags flags;
1871 outputBufferSize, // VkDeviceSize size;
1872 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1873 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1874 1u, // deUint32 queueFamilyCount;
1875 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1876 };
1877
1878 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1879 m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1880
1881 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1882 }
1883
1884 // ComputeShaderExecutor
1885
1886 class ComputeShaderExecutor : public BufferIoExecutor
1887 {
1888 public:
1889 ComputeShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1890 virtual ~ComputeShaderExecutor (void);
1891
1892 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1893
1894 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1895
1896 protected:
1897 static std::string generateComputeShader (const ShaderSpec& spec);
1898
1899 private:
1900 const VkDescriptorSetLayout m_extraResourcesLayout;
1901 };
1902
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1903 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1904 : BufferIoExecutor (context, shaderSpec)
1905 , m_extraResourcesLayout (extraResourcesLayout)
1906 {
1907 }
1908
~ComputeShaderExecutor(void)1909 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1910 {
1911 }
1912
getTypeSpirv(const glu::DataType type)1913 std::string getTypeSpirv(const glu::DataType type)
1914 {
1915 switch(type)
1916 {
1917 case glu::TYPE_FLOAT16:
1918 return "%f16";
1919 case glu::TYPE_FLOAT16_VEC2:
1920 return "%v2f16";
1921 case glu::TYPE_FLOAT16_VEC3:
1922 return "%v3f16";
1923 case glu::TYPE_FLOAT16_VEC4:
1924 return "%v4f16";
1925 case glu::TYPE_FLOAT:
1926 return "%f32";
1927 case glu::TYPE_FLOAT_VEC2:
1928 return "%v2f32";
1929 case glu::TYPE_FLOAT_VEC3:
1930 return "%v3f32";
1931 case glu::TYPE_FLOAT_VEC4:
1932 return "%v4f32";
1933 case glu::TYPE_INT:
1934 return "%i32";
1935 case glu::TYPE_INT_VEC2:
1936 return "%v2i32";
1937 case glu::TYPE_INT_VEC3:
1938 return "%v3i32";
1939 case glu::TYPE_INT_VEC4:
1940 return "%v4i32";
1941 default:
1942 DE_ASSERT(0);
1943 return "";
1944 break;
1945 }
1946 }
1947
moveBitOperation(std::string variableName,const int operationNdx)1948 std::string moveBitOperation (std::string variableName, const int operationNdx)
1949 {
1950 std::ostringstream src;
1951 src << "\n"
1952 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
1953 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
1954 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
1955 return src.str();
1956 }
1957
sclarComparison(const std::string opeartion,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)1958 std::string sclarComparison(const std::string opeartion, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
1959 {
1960 std::ostringstream src;
1961 std::string boolType;
1962
1963 switch (type)
1964 {
1965 case glu::TYPE_FLOAT16:
1966 case glu::TYPE_FLOAT:
1967 src << "\n"
1968 << "%operation_result_" << operationNdx << " = " << opeartion << " %bool %in0_val %in1_val\n"
1969 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
1970 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
1971 << "%label_IF_" << operationNdx << " = OpLabel\n"
1972 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
1973 << "%out_val_" << operationNdx << " = OpLoad %i32 %out\n"
1974 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
1975 << "OpStore %out %add_if_" << operationNdx << "\n"
1976 << "OpBranch %IF_" << operationNdx << "\n"
1977 << "%IF_" << operationNdx << " = OpLabel\n";
1978 return src.str();
1979 case glu::TYPE_FLOAT16_VEC2:
1980 case glu::TYPE_FLOAT_VEC2:
1981 boolType = "%v2bool";
1982 break;
1983 case glu::TYPE_FLOAT16_VEC3:
1984 case glu::TYPE_FLOAT_VEC3:
1985 boolType = "%v3bool";
1986 break;
1987 case glu::TYPE_FLOAT16_VEC4:
1988 case glu::TYPE_FLOAT_VEC4:
1989 boolType = "%v4bool";
1990 break;
1991 default:
1992 DE_ASSERT(0);
1993 return "";
1994 break;
1995 }
1996
1997 src << "\n"
1998 << "%operation_result_" << operationNdx << " = " << opeartion << " " << boolType << " %in0_val %in1_val\n"
1999 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2000 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2001
2002 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2003 for(int ndx = 0; ndx < scalarSize; ++ndx)
2004 src << " %operation_val_" << operationNdx;
2005 src << "\n";
2006
2007 src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2008 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out\n"
2009
2010 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2011 << "OpStore %out %add_if_" << operationNdx << "\n";
2012
2013 return src.str();
2014 }
2015
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool isMediump)2016 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool isMediump)
2017 {
2018 const int operationAmount = 10;
2019 int moveBitNdx = 0;
2020 const std::string inputType1 = getTypeSpirv(spec.inputs[0].varType.getBasicType());
2021 const std::string inputType2 = getTypeSpirv(spec.inputs[1].varType.getBasicType());
2022 const std::string outputType = getTypeSpirv(spec.outputs[0].varType.getBasicType());
2023 const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2024
2025 std::string opeartions[operationAmount] =
2026 {
2027 "OpFOrdEqual",
2028 "OpFOrdGreaterThan",
2029 "OpFOrdLessThan",
2030 "OpFOrdGreaterThanEqual",
2031 "OpFOrdLessThanEqual",
2032 "OpFUnordEqual",
2033 "OpFUnordGreaterThan",
2034 "OpFUnordLessThan",
2035 "OpFUnordGreaterThanEqual",
2036 "OpFUnordLessThanEqual"
2037 };
2038
2039 std::ostringstream src;
2040 src << "; SPIR-V\n"
2041 "; Version: 1.0\n"
2042 "; Generator: Khronos Glslang Reference Front End; 4\n"
2043 "; Bound: 114\n"
2044 "; Schema: 0\n"
2045 "OpCapability Shader\n";
2046
2047 if (spec.packFloat16Bit || are16Bit)
2048 src << "OpCapability Float16\n";
2049
2050 if (are16Bit)
2051 src << "OpCapability StorageBuffer16BitAccess\n"
2052 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2053
2054 if (are16Bit)
2055 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2056
2057 src << "%1 = OpExtInstImport \"GLSL.std.450\"\n"
2058 "OpMemoryModel Logical GLSL450\n"
2059 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2060 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2061 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2062 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2063
2064 //input offset
2065 {
2066 int offset = 0;
2067 int ndx = 0;
2068 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2069 {
2070 src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2071 ++ndx;
2072 offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2073 }
2074 src << "OpDecorate %up_SSB0_IN ArrayStride "<< offset << "\n";
2075 }
2076
2077 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2078 "OpDecorate %ssboIN BufferBlock\n"
2079 "OpDecorate %ssbo_src DescriptorSet 0\n"
2080 "OpDecorate %ssbo_src Binding 0\n"
2081 "\n";
2082
2083 if (isMediump)
2084 {
2085 src << "OpMemberDecorate %SSB0_IN 1 RelaxedPrecision\n"
2086 "OpDecorate %in0 RelaxedPrecision\n"
2087 "OpMemberDecorate %SSB0_IN 0 RelaxedPrecision\n"
2088 "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2089 "OpDecorate %src_val_0_0 RelaxedPrecision\n"
2090 "OpDecorate %in1 RelaxedPrecision\n"
2091 "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2092 "OpDecorate %src_val_0_1 RelaxedPrecision\n"
2093 "OpDecorate %in0_val RelaxedPrecision\n"
2094 "OpDecorate %in1_val RelaxedPrecision\n"
2095 "OpDecorate %in0_val RelaxedPrecision\n"
2096 "OpDecorate %in1_val RelaxedPrecision\n"
2097 "OpMemberDecorate %SSB0_OUT 0 RelaxedPrecision\n";
2098 }
2099
2100 //output offset
2101 {
2102 int offset = 0;
2103 int ndx = 0;
2104 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
2105 {
2106 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2107 ++ndx;
2108 offset += (symIter->varType.getScalarSize() == 3 ? 4 : symIter->varType.getScalarSize()) * (isDataTypeFloat16OrVec(symIter->varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32));
2109 }
2110 src << "OpDecorate %up_SSB0_OUT ArrayStride " << offset << "\n";
2111 }
2112
2113 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2114 "OpDecorate %ssboOUT BufferBlock\n"
2115 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2116 "OpDecorate %ssbo_dst Binding 1\n"
2117 "\n"
2118 "%void = OpTypeVoid\n"
2119 "%bool = OpTypeBool\n"
2120 "%v2bool = OpTypeVector %bool 2\n"
2121 "%v3bool = OpTypeVector %bool 3\n"
2122 "%v4bool = OpTypeVector %bool 4\n"
2123 "%u32 = OpTypeInt 32 0\n";
2124
2125 if (!are16Bit) //f32 is not needed when shader operates only on f16
2126 src << "%f32 = OpTypeFloat 32\n"
2127 "%v2f32 = OpTypeVector %f32 2\n"
2128 "%v3f32 = OpTypeVector %f32 3\n"
2129 "%v4f32 = OpTypeVector %f32 4\n";
2130
2131 if (spec.packFloat16Bit || are16Bit)
2132 src << "%f16 = OpTypeFloat 16\n"
2133 "%v2f16 = OpTypeVector %f16 2\n"
2134 "%v3f16 = OpTypeVector %f16 3\n"
2135 "%v4f16 = OpTypeVector %f16 4\n";
2136
2137 src << "%i32 = OpTypeInt 32 1\n"
2138 "%v2i32 = OpTypeVector %i32 2\n"
2139 "%v3i32 = OpTypeVector %i32 3\n"
2140 "%v4i32 = OpTypeVector %i32 4\n"
2141 "%v3u32 = OpTypeVector %u32 3\n"
2142 "\n"
2143 "%ip_u32 = OpTypePointer Input %u32\n"
2144 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2145 "%up_float = OpTypePointer Uniform " << inputType1 << "\n"
2146 "\n"
2147 "%fun = OpTypeFunction %void\n"
2148 "%fp_u32 = OpTypePointer Function %u32\n"
2149 "%fp_i32 = OpTypePointer Function " << outputType << "\n"
2150 "%fp_f32 = OpTypePointer Function " << inputType1 << "\n"
2151 "%fp_operation = OpTypePointer Function %i32\n";
2152
2153 if (spec.packFloat16Bit)
2154 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2155
2156 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2157 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2158 "%up_i32 = OpTypePointer Uniform " << outputType << "\n"
2159 "\n"
2160 "%c_u32_0 = OpConstant %u32 0\n"
2161 "%c_u32_1 = OpConstant %u32 1\n"
2162 "%c_u32_2 = OpConstant %u32 2\n"
2163 "%c_i32_0 = OpConstant %i32 0\n"
2164 "%c_i32_1 = OpConstant %i32 1\n"
2165 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2166 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2167 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2168 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2169 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2170 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2171 "\n"
2172 "%SSB0_IN = OpTypeStruct " << inputType1 << " " << inputType2 << "\n"
2173 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2174 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2175 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2176 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2177 "\n"
2178 "%SSB0_OUT = OpTypeStruct " << outputType << "\n"
2179 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2180 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2181 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2182 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2183 "\n"
2184 "%BP_main = OpFunction %void None %fun\n"
2185 "%BP_label = OpLabel\n"
2186 "%invocationNdx = OpVariable %fp_u32 Function\n";
2187
2188 if (spec.packFloat16Bit)
2189 src << "%in0 = OpVariable %fp_f16 Function\n"
2190 "%in1 = OpVariable %fp_f16 Function\n";
2191 else
2192 src << "%in0 = OpVariable %fp_f32 Function\n"
2193 "%in1 = OpVariable %fp_f32 Function\n";
2194
2195 src << "%operation = OpVariable %fp_operation Function\n"
2196 "%out = OpVariable %fp_i32 Function\n"
2197 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2198 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2199 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2200 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2201 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2202 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2203 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2204 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2205 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2206 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2207 "\n"
2208 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2209 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2210 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2211 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2212 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2213 "OpStore %invocationNdx %add_2\n"
2214 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n"
2215 "\n"
2216 "%src_ptr_0_0 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_0\n"
2217 "%src_val_0_0 = OpLoad " << inputType1 << " %src_ptr_0_0\n";
2218
2219 if(spec.packFloat16Bit)
2220 src << "%val_f16_0_0 = OpFConvert " << packType <<" %src_val_0_0\n"
2221 "OpStore %in0 %val_f16_0_0\n";
2222 else
2223 src << "OpStore %in0 %src_val_0_0\n";
2224
2225 src << "\n"
2226 "%src_ptr_0_1 = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_1\n"
2227 "%src_val_0_1 = OpLoad " << inputType2 << " %src_ptr_0_1\n";
2228
2229 if (spec.packFloat16Bit)
2230 src << "%val_f16_0_1 = OpFConvert " << packType << " %src_val_0_1\n"
2231 "OpStore %in1 %val_f16_0_1\n";
2232 else
2233 src << "OpStore %in1 %src_val_0_1\n";
2234
2235 src << "\n"
2236 "OpStore %operation %c_i32_1\n"
2237 "OpStore %out %c_" << &outputType[1] << "_0\n"
2238 "\n";
2239
2240 if (spec.packFloat16Bit)
2241 src << "%in0_val = OpLoad " << packType << " %in0\n"
2242 "%in1_val = OpLoad " << packType << " %in1\n";
2243 else
2244 src << "%in0_val = OpLoad " << inputType1 << " %in0\n"
2245 "%in1_val = OpLoad " << inputType2 << " %in1\n";
2246
2247 src << "\n";
2248 for(int operationNdx = 0; operationNdx < operationAmount; ++operationNdx)
2249 {
2250 src << sclarComparison (opeartions[operationNdx], operationNdx,
2251 spec.inputs[0].varType.getBasicType(),
2252 outputType,
2253 spec.outputs[0].varType.getScalarSize());
2254 src << moveBitOperation("%operation", moveBitNdx);
2255 ++moveBitNdx;
2256 }
2257
2258 src << "\n"
2259 "%out_val_final = OpLoad " << outputType << " %out\n"
2260 "%ssbo_dst_ptr = OpAccessChain %up_i32 %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_0\n"
2261 "OpStore %ssbo_dst_ptr %out_val_final\n"
2262 "\n"
2263 "OpReturn\n"
2264 "OpFunctionEnd\n";
2265 return src.str();
2266 }
2267
2268
generateComputeShader(const ShaderSpec & spec)2269 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2270 {
2271 if(spec.spirVShader)
2272 {
2273 bool are16Bit = false;
2274 bool isMediump = false;
2275 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2276 {
2277 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2278 are16Bit = true;
2279
2280 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2281 isMediump = true;
2282
2283 if(isMediump && are16Bit)
2284 break;
2285 }
2286
2287 return generateSpirv(spec, are16Bit, isMediump);
2288 }
2289 else
2290 {
2291 std::ostringstream src;
2292 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2293
2294 if (!spec.globalDeclarations.empty())
2295 src << spec.globalDeclarations << "\n";
2296
2297 src << "layout(local_size_x = 1) in;\n"
2298 << "\n";
2299
2300 declareBufferBlocks(src, spec);
2301
2302 src << "void main (void)\n"
2303 << "{\n"
2304 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2305 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2306
2307 generateExecBufferIo(src, spec, "invocationNdx");
2308
2309 src << "}\n";
2310
2311 return src.str();
2312 }
2313 }
2314
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2315 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2316 {
2317 if(shaderSpec.spirVShader)
2318 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2319 else
2320 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2321 }
2322
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2323 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2324 {
2325 const VkDevice vkDevice = m_context.getDevice();
2326 const DeviceInterface& vk = m_context.getDeviceInterface();
2327 const VkQueue queue = m_context.getUniversalQueue();
2328 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2329
2330 DescriptorPoolBuilder descriptorPoolBuilder;
2331 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2332
2333 Move<VkShaderModule> computeShaderModule;
2334 Move<VkPipeline> computePipeline;
2335 Move<VkPipelineLayout> pipelineLayout;
2336 Move<VkCommandPool> cmdPool;
2337 Move<VkDescriptorPool> descriptorPool;
2338 Move<VkDescriptorSetLayout> descriptorSetLayout;
2339 Move<VkDescriptorSet> descriptorSet;
2340 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2341
2342 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2343
2344 initBuffers(numValues);
2345
2346 // Setup input buffer & copy data
2347 uploadInputBuffer(inputs, numValues);
2348
2349 // Create command pool
2350 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2351
2352 // Create command buffer
2353
2354 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2355 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2356 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2357 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2358
2359 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2360 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2361
2362 const VkDescriptorSetAllocateInfo allocInfo =
2363 {
2364 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2365 DE_NULL,
2366 *descriptorPool,
2367 1u,
2368 &*descriptorSetLayout
2369 };
2370
2371 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2372
2373 // Create pipeline layout
2374 {
2375 const VkDescriptorSetLayout descriptorSetLayouts[] =
2376 {
2377 *descriptorSetLayout,
2378 m_extraResourcesLayout
2379 };
2380 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2381 {
2382 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2383 DE_NULL, // const void* pNext;
2384 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2385 numDescriptorSets, // deUint32 CdescriptorSetCount;
2386 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2387 0u, // deUint32 pushConstantRangeCount;
2388 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2389 };
2390
2391 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2392 }
2393
2394 // Create shaders
2395 {
2396 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2397 }
2398
2399 // create pipeline
2400 {
2401 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2402 {
2403 {
2404 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2405 DE_NULL, // const void* pNext;
2406 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2407 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2408 *computeShaderModule, // VkShaderModule shader;
2409 "main", // const char* pName;
2410 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2411 }
2412 };
2413
2414 const VkComputePipelineCreateInfo computePipelineParams =
2415 {
2416 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2417 DE_NULL, // const void* pNext;
2418 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2419 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2420 *pipelineLayout, // VkPipelineLayout layout;
2421 0u, // VkPipeline basePipelineHandle;
2422 0u, // int32_t basePipelineIndex;
2423 };
2424
2425 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2426 }
2427
2428 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2429 int curOffset = 0;
2430 const deUint32 inputStride = getInputStride();
2431 const deUint32 outputStride = getOutputStride();
2432
2433 while (curOffset < numValues)
2434 {
2435 Move<VkCommandBuffer> cmdBuffer;
2436 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
2437
2438 // Update descriptors
2439 {
2440 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2441
2442 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2443 {
2444 *m_outputBuffer, // VkBuffer buffer;
2445 curOffset * outputStride, // VkDeviceSize offset;
2446 numToExec * outputStride // VkDeviceSize range;
2447 };
2448
2449 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2450
2451 if (inputStride)
2452 {
2453 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2454 {
2455 *m_inputBuffer, // VkBuffer buffer;
2456 curOffset * inputStride, // VkDeviceSize offset;
2457 numToExec * inputStride // VkDeviceSize range;
2458 };
2459
2460 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2461 }
2462
2463 descriptorSetUpdateBuilder.update(vk, vkDevice);
2464 }
2465
2466 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2467 beginCommandBuffer(vk, *cmdBuffer);
2468 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2469
2470 {
2471 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2472 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2473 }
2474
2475 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2476
2477 endCommandBuffer(vk, *cmdBuffer);
2478
2479 curOffset += numToExec;
2480
2481 // Execute
2482 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2483 }
2484
2485 // Read back data
2486 readOutputBuffer(outputs, numValues);
2487 }
2488
2489 // Tessellation utils
2490
generateVertexShaderForTess(void)2491 static std::string generateVertexShaderForTess (void)
2492 {
2493 std::ostringstream src;
2494 src << "#version 450\n"
2495 << "void main (void)\n{\n"
2496 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2497 << "}\n";
2498
2499 return src.str();
2500 }
2501
2502 class TessellationExecutor : public BufferIoExecutor
2503 {
2504 public:
2505 TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2506 virtual ~TessellationExecutor (void);
2507
2508 void renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2509
2510 private:
2511 const VkDescriptorSetLayout m_extraResourcesLayout;
2512 };
2513
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2514 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2515 : BufferIoExecutor (context, shaderSpec)
2516 , m_extraResourcesLayout (extraResourcesLayout)
2517 {
2518 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2519
2520 if (!features.tessellationShader)
2521 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2522 }
2523
~TessellationExecutor(void)2524 TessellationExecutor::~TessellationExecutor (void)
2525 {
2526 }
2527
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2528 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2529 {
2530 const size_t inputBufferSize = numValues * getInputStride();
2531 const VkDevice vkDevice = m_context.getDevice();
2532 const DeviceInterface& vk = m_context.getDeviceInterface();
2533 const VkQueue queue = m_context.getUniversalQueue();
2534 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2535 Allocator& memAlloc = m_context.getDefaultAllocator();
2536
2537 const tcu::UVec2 renderSize (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2538
2539 Move<VkImage> colorImage;
2540 de::MovePtr<Allocation> colorImageAlloc;
2541 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
2542 Move<VkImageView> colorImageView;
2543
2544 Move<VkRenderPass> renderPass;
2545 Move<VkFramebuffer> framebuffer;
2546 Move<VkPipelineLayout> pipelineLayout;
2547 Move<VkPipeline> graphicsPipeline;
2548
2549 Move<VkShaderModule> vertexShaderModule;
2550 Move<VkShaderModule> tessControlShaderModule;
2551 Move<VkShaderModule> tessEvalShaderModule;
2552 Move<VkShaderModule> fragmentShaderModule;
2553
2554 Move<VkCommandPool> cmdPool;
2555 Move<VkCommandBuffer> cmdBuffer;
2556
2557 Move<VkDescriptorPool> descriptorPool;
2558 Move<VkDescriptorSetLayout> descriptorSetLayout;
2559 Move<VkDescriptorSet> descriptorSet;
2560 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2561
2562 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2563
2564 // Create color image
2565 {
2566 const VkImageCreateInfo colorImageParams =
2567 {
2568 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
2569 DE_NULL, // const void* pNext;
2570 0u, // VkImageCreateFlags flags;
2571 VK_IMAGE_TYPE_2D, // VkImageType imageType;
2572 colorFormat, // VkFormat format;
2573 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
2574 1u, // deUint32 mipLevels;
2575 1u, // deUint32 arraySize;
2576 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2577 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
2578 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
2579 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
2580 1u, // deUint32 queueFamilyCount;
2581 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
2582 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
2583 };
2584
2585 colorImage = createImage(vk, vkDevice, &colorImageParams);
2586
2587 // Allocate and bind color image memory
2588 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2589 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2590 }
2591
2592 // Create color attachment view
2593 {
2594 const VkImageViewCreateInfo colorImageViewParams =
2595 {
2596 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
2597 DE_NULL, // const void* pNext;
2598 0u, // VkImageViewCreateFlags flags;
2599 *colorImage, // VkImage image;
2600 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
2601 colorFormat, // VkFormat format;
2602 {
2603 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
2604 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
2605 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
2606 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
2607 }, // VkComponentsMapping components;
2608 {
2609 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
2610 0u, // deUint32 baseMipLevel;
2611 1u, // deUint32 mipLevels;
2612 0u, // deUint32 baseArraylayer;
2613 1u // deUint32 layerCount;
2614 } // VkImageSubresourceRange subresourceRange;
2615 };
2616
2617 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2618 }
2619
2620 // Create render pass
2621 {
2622 const VkAttachmentDescription colorAttachmentDescription =
2623 {
2624 0u, // VkAttachmentDescriptorFlags flags;
2625 colorFormat, // VkFormat format;
2626 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
2627 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
2628 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
2629 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
2630 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
2631 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
2632 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
2633 };
2634
2635 const VkAttachmentDescription attachments[1] =
2636 {
2637 colorAttachmentDescription
2638 };
2639
2640 const VkAttachmentReference colorAttachmentReference =
2641 {
2642 0u, // deUint32 attachment;
2643 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
2644 };
2645
2646 const VkSubpassDescription subpassDescription =
2647 {
2648 0u, // VkSubpassDescriptionFlags flags;
2649 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
2650 0u, // deUint32 inputCount;
2651 DE_NULL, // const VkAttachmentReference* pInputAttachments;
2652 1u, // deUint32 colorCount;
2653 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
2654 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
2655 DE_NULL, // VkAttachmentReference depthStencilAttachment;
2656 0u, // deUint32 preserveCount;
2657 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
2658 };
2659
2660 const VkRenderPassCreateInfo renderPassParams =
2661 {
2662 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
2663 DE_NULL, // const void* pNext;
2664 0u, // VkRenderPassCreateFlags flags;
2665 1u, // deUint32 attachmentCount;
2666 attachments, // const VkAttachmentDescription* pAttachments;
2667 1u, // deUint32 subpassCount;
2668 &subpassDescription, // const VkSubpassDescription* pSubpasses;
2669 0u, // deUint32 dependencyCount;
2670 DE_NULL // const VkSubpassDependency* pDependencies;
2671 };
2672
2673 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
2674 }
2675
2676 // Create framebuffer
2677 {
2678 const VkFramebufferCreateInfo framebufferParams =
2679 {
2680 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
2681 DE_NULL, // const void* pNext;
2682 0u, // VkFramebufferCreateFlags flags;
2683 *renderPass, // VkRenderPass renderPass;
2684 1u, // deUint32 attachmentCount;
2685 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
2686 (deUint32)renderSize.x(), // deUint32 width;
2687 (deUint32)renderSize.y(), // deUint32 height;
2688 1u // deUint32 layers;
2689 };
2690
2691 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
2692 }
2693
2694 // Create descriptors
2695 {
2696 DescriptorPoolBuilder descriptorPoolBuilder;
2697 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2698
2699 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2700 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2701 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
2702 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2703
2704 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2705 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2706
2707 const VkDescriptorSetAllocateInfo allocInfo =
2708 {
2709 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2710 DE_NULL,
2711 *descriptorPool,
2712 1u,
2713 &*descriptorSetLayout
2714 };
2715
2716 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2717 // Update descriptors
2718 {
2719 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2720 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2721 {
2722 *m_outputBuffer, // VkBuffer buffer;
2723 0u, // VkDeviceSize offset;
2724 VK_WHOLE_SIZE // VkDeviceSize range;
2725 };
2726
2727 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2728
2729 VkDescriptorBufferInfo inputDescriptorBufferInfo =
2730 {
2731 0, // VkBuffer buffer;
2732 0u, // VkDeviceSize offset;
2733 VK_WHOLE_SIZE // VkDeviceSize range;
2734 };
2735
2736 if (inputBufferSize > 0)
2737 {
2738 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
2739
2740 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2741 }
2742
2743 descriptorSetUpdateBuilder.update(vk, vkDevice);
2744 }
2745 }
2746
2747 // Create pipeline layout
2748 {
2749 const VkDescriptorSetLayout descriptorSetLayouts[] =
2750 {
2751 *descriptorSetLayout,
2752 m_extraResourcesLayout
2753 };
2754 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2755 {
2756 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2757 DE_NULL, // const void* pNext;
2758 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2759 numDescriptorSets, // deUint32 descriptorSetCount;
2760 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2761 0u, // deUint32 pushConstantRangeCount;
2762 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2763 };
2764
2765 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2766 }
2767
2768 // Create shader modules
2769 {
2770 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
2771 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
2772 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
2773 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
2774 }
2775
2776 // Create pipeline
2777 {
2778 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
2779 {
2780 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
2781 DE_NULL, // const void* pNext;
2782 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
2783 0u, // deUint32 bindingCount;
2784 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
2785 0u, // deUint32 attributeCount;
2786 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
2787 };
2788
2789 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
2790 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
2791
2792 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
2793 vkDevice, // const VkDevice device
2794 *pipelineLayout, // const VkPipelineLayout pipelineLayout
2795 *vertexShaderModule, // const VkShaderModule vertexShaderModule
2796 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
2797 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
2798 DE_NULL, // const VkShaderModule geometryShaderModule
2799 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
2800 *renderPass, // const VkRenderPass renderPass
2801 viewports, // const std::vector<VkViewport>& viewports
2802 scissors, // const std::vector<VkRect2D>& scissors
2803 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
2804 0u, // const deUint32 subpass
2805 patchControlPoints, // const deUint32 patchControlPoints
2806 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
2807 }
2808
2809 // Create command pool
2810 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2811
2812 // Create command buffer
2813 {
2814 const VkClearValue clearValue = getDefaultClearColor();
2815
2816 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2817
2818 beginCommandBuffer(vk, *cmdBuffer);
2819
2820 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
2821
2822 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
2823
2824 {
2825 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2826 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2827 }
2828
2829 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
2830
2831 endRenderPass(vk, *cmdBuffer);
2832 endCommandBuffer(vk, *cmdBuffer);
2833 }
2834
2835 // Execute Draw
2836 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2837 }
2838
2839 // TessControlExecutor
2840
2841 class TessControlExecutor : public TessellationExecutor
2842 {
2843 public:
2844 TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2845 virtual ~TessControlExecutor (void);
2846
2847 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2848
2849 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2850
2851 protected:
2852 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
2853 };
2854
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2855 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2856 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
2857 {
2858 }
2859
~TessControlExecutor(void)2860 TessControlExecutor::~TessControlExecutor (void)
2861 {
2862 }
2863
generateTessControlShader(const ShaderSpec & shaderSpec)2864 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
2865 {
2866 std::ostringstream src;
2867 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2868
2869 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2870 src << "#extension GL_EXT_tessellation_shader : require\n\n";
2871
2872 if (!shaderSpec.globalDeclarations.empty())
2873 src << shaderSpec.globalDeclarations << "\n";
2874
2875 src << "\nlayout(vertices = 1) out;\n\n";
2876
2877 declareBufferBlocks(src, shaderSpec);
2878
2879 src << "void main (void)\n{\n";
2880
2881 for (int ndx = 0; ndx < 2; ndx++)
2882 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2883
2884 for (int ndx = 0; ndx < 4; ndx++)
2885 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2886
2887 src << "\n"
2888 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
2889
2890 generateExecBufferIo(src, shaderSpec, "invocationId");
2891
2892 src << "}\n";
2893
2894 return src.str();
2895 }
2896
generateEmptyTessEvalShader()2897 static std::string generateEmptyTessEvalShader ()
2898 {
2899 std::ostringstream src;
2900
2901 src << "#version 450\n"
2902 "#extension GL_EXT_tessellation_shader : require\n\n";
2903
2904 src << "layout(triangles, ccw) in;\n";
2905
2906 src << "\nvoid main (void)\n{\n"
2907 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
2908 << "}\n";
2909
2910 return src.str();
2911 }
2912
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2913 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2914 {
2915 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
2916 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
2917 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
2918 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
2919 }
2920
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2921 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2922 {
2923 const deUint32 patchSize = 3;
2924
2925 initBuffers(numValues);
2926
2927 // Setup input buffer & copy data
2928 uploadInputBuffer(inputs, numValues);
2929
2930 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
2931
2932 // Read back data
2933 readOutputBuffer(outputs, numValues);
2934 }
2935
2936 // TessEvaluationExecutor
2937
2938 class TessEvaluationExecutor : public TessellationExecutor
2939 {
2940 public:
2941 TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2942 virtual ~TessEvaluationExecutor (void);
2943
2944 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
2945
2946 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2947
2948 protected:
2949 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
2950 };
2951
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2952 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2953 : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
2954 {
2955 }
2956
~TessEvaluationExecutor(void)2957 TessEvaluationExecutor::~TessEvaluationExecutor (void)
2958 {
2959 }
2960
generatePassthroughTessControlShader(void)2961 static std::string generatePassthroughTessControlShader (void)
2962 {
2963 std::ostringstream src;
2964
2965 src << "#version 450\n"
2966 "#extension GL_EXT_tessellation_shader : require\n\n";
2967
2968 src << "layout(vertices = 1) out;\n\n";
2969
2970 src << "void main (void)\n{\n";
2971
2972 for (int ndx = 0; ndx < 2; ndx++)
2973 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
2974
2975 for (int ndx = 0; ndx < 4; ndx++)
2976 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
2977
2978 src << "}\n";
2979
2980 return src.str();
2981 }
2982
generateTessEvalShader(const ShaderSpec & shaderSpec)2983 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
2984 {
2985 std::ostringstream src;
2986
2987 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
2988
2989 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
2990 src << "#extension GL_EXT_tessellation_shader : require\n\n";
2991
2992 if (!shaderSpec.globalDeclarations.empty())
2993 src << shaderSpec.globalDeclarations << "\n";
2994
2995 src << "\n";
2996
2997 src << "layout(isolines, equal_spacing) in;\n\n";
2998
2999 declareBufferBlocks(src, shaderSpec);
3000
3001 src << "void main (void)\n{\n"
3002 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3003 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3004
3005 generateExecBufferIo(src, shaderSpec, "invocationId");
3006
3007 src << "}\n";
3008
3009 return src.str();
3010 }
3011
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3012 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3013 {
3014 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3015 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3016 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3017 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3018 }
3019
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3020 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3021 {
3022 const int patchSize = 2;
3023 const int alignedValues = deAlign32(numValues, patchSize);
3024
3025 // Initialize buffers with aligned value count to make room for padding
3026 initBuffers(alignedValues);
3027
3028 // Setup input buffer & copy data
3029 uploadInputBuffer(inputs, numValues);
3030
3031 renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3032
3033 // Read back data
3034 readOutputBuffer(outputs, numValues);
3035 }
3036
3037 } // anonymous
3038
3039 // ShaderExecutor
3040
~ShaderExecutor(void)3041 ShaderExecutor::~ShaderExecutor (void)
3042 {
3043 }
3044
areInputs16Bit(void) const3045 bool ShaderExecutor::areInputs16Bit (void) const
3046 {
3047 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3048 {
3049 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3050 return true;
3051 }
3052 return false;
3053 }
3054
areOutputs16Bit(void) const3055 bool ShaderExecutor::areOutputs16Bit (void) const
3056 {
3057 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3058 {
3059 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3060 return true;
3061 }
3062 return false;
3063 }
3064
isOutput16Bit(const size_t ndx) const3065 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3066 {
3067 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3068 return true;
3069 return false;
3070 }
3071
3072 // Utilities
3073
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3074 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3075 {
3076 switch (shaderType)
3077 {
3078 case glu::SHADERTYPE_VERTEX: VertexShaderExecutor::generateSources (shaderSpec, dst); break;
3079 case glu::SHADERTYPE_TESSELLATION_CONTROL: TessControlExecutor::generateSources (shaderSpec, dst); break;
3080 case glu::SHADERTYPE_TESSELLATION_EVALUATION: TessEvaluationExecutor::generateSources (shaderSpec, dst); break;
3081 case glu::SHADERTYPE_GEOMETRY: GeometryShaderExecutor::generateSources (shaderSpec, dst); break;
3082 case glu::SHADERTYPE_FRAGMENT: FragmentShaderExecutor::generateSources (shaderSpec, dst); break;
3083 case glu::SHADERTYPE_COMPUTE: ComputeShaderExecutor::generateSources (shaderSpec, dst); break;
3084 default:
3085 TCU_THROW(InternalError, "Unsupported shader type");
3086 }
3087 }
3088
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3089 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3090 {
3091 switch (shaderType)
3092 {
3093 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (context, shaderSpec, extraResourcesLayout);
3094 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (context, shaderSpec, extraResourcesLayout);
3095 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (context, shaderSpec, extraResourcesLayout);
3096 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (context, shaderSpec, extraResourcesLayout);
3097 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (context, shaderSpec, extraResourcesLayout);
3098 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (context, shaderSpec, extraResourcesLayout);
3099 default:
3100 TCU_THROW(InternalError, "Unsupported shader type");
3101 }
3102 }
3103
3104 } // shaderexecutor
3105 } // vkt
3106