1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 */ /*!
21 * \file
22 * \brief Subgroups Tests
23 */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsClusteredTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27
28 #include <string>
29 #include <vector>
30
31 using namespace tcu;
32 using namespace std;
33 using namespace vk;
34 using namespace vkt;
35
36 namespace
37 {
38 enum OpType
39 {
40 OPTYPE_CLUSTERED_ADD = 0,
41 OPTYPE_CLUSTERED_MUL,
42 OPTYPE_CLUSTERED_MIN,
43 OPTYPE_CLUSTERED_MAX,
44 OPTYPE_CLUSTERED_AND,
45 OPTYPE_CLUSTERED_OR,
46 OPTYPE_CLUSTERED_XOR,
47 OPTYPE_CLUSTERED_LAST
48 };
49
checkVertexPipelineStages(std::vector<const void * > datas,deUint32 width,deUint32)50 static bool checkVertexPipelineStages(std::vector<const void*> datas,
51 deUint32 width, deUint32)
52 {
53 return vkt::subgroups::check(datas, width, 1);
54 }
55
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)56 static bool checkCompute(std::vector<const void*> datas,
57 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
58 deUint32)
59 {
60 return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
61 }
62
getOpTypeName(int opType)63 std::string getOpTypeName(int opType)
64 {
65 switch (opType)
66 {
67 default:
68 DE_FATAL("Unsupported op type");
69 return "";
70 case OPTYPE_CLUSTERED_ADD:
71 return "subgroupClusteredAdd";
72 case OPTYPE_CLUSTERED_MUL:
73 return "subgroupClusteredMul";
74 case OPTYPE_CLUSTERED_MIN:
75 return "subgroupClusteredMin";
76 case OPTYPE_CLUSTERED_MAX:
77 return "subgroupClusteredMax";
78 case OPTYPE_CLUSTERED_AND:
79 return "subgroupClusteredAnd";
80 case OPTYPE_CLUSTERED_OR:
81 return "subgroupClusteredOr";
82 case OPTYPE_CLUSTERED_XOR:
83 return "subgroupClusteredXor";
84 }
85 }
86
getOpTypeOperation(int opType,vk::VkFormat format,std::string lhs,std::string rhs)87 std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
88 {
89 switch (opType)
90 {
91 default:
92 DE_FATAL("Unsupported op type");
93 return "";
94 case OPTYPE_CLUSTERED_ADD:
95 return lhs + " + " + rhs;
96 case OPTYPE_CLUSTERED_MUL:
97 return lhs + " * " + rhs;
98 case OPTYPE_CLUSTERED_MIN:
99 switch (format)
100 {
101 default:
102 return "min(" + lhs + ", " + rhs + ")";
103 case VK_FORMAT_R32_SFLOAT:
104 case VK_FORMAT_R64_SFLOAT:
105 return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
106 case VK_FORMAT_R32G32_SFLOAT:
107 case VK_FORMAT_R32G32B32_SFLOAT:
108 case VK_FORMAT_R32G32B32A32_SFLOAT:
109 case VK_FORMAT_R64G64_SFLOAT:
110 case VK_FORMAT_R64G64B64_SFLOAT:
111 case VK_FORMAT_R64G64B64A64_SFLOAT:
112 return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
113 }
114 case OPTYPE_CLUSTERED_MAX:
115 switch (format)
116 {
117 default:
118 return "max(" + lhs + ", " + rhs + ")";
119 case VK_FORMAT_R32_SFLOAT:
120 case VK_FORMAT_R64_SFLOAT:
121 return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
122 case VK_FORMAT_R32G32_SFLOAT:
123 case VK_FORMAT_R32G32B32_SFLOAT:
124 case VK_FORMAT_R32G32B32A32_SFLOAT:
125 case VK_FORMAT_R64G64_SFLOAT:
126 case VK_FORMAT_R64G64B64_SFLOAT:
127 case VK_FORMAT_R64G64B64A64_SFLOAT:
128 return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
129 }
130 case OPTYPE_CLUSTERED_AND:
131 switch (format)
132 {
133 default:
134 return lhs + " & " + rhs;
135 case VK_FORMAT_R8_USCALED:
136 return lhs + " && " + rhs;
137 case VK_FORMAT_R8G8_USCALED:
138 return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
139 case VK_FORMAT_R8G8B8_USCALED:
140 return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
141 case VK_FORMAT_R8G8B8A8_USCALED:
142 return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
143 }
144 case OPTYPE_CLUSTERED_OR:
145 switch (format)
146 {
147 default:
148 return lhs + " | " + rhs;
149 case VK_FORMAT_R8_USCALED:
150 return lhs + " || " + rhs;
151 case VK_FORMAT_R8G8_USCALED:
152 return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
153 case VK_FORMAT_R8G8B8_USCALED:
154 return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
155 case VK_FORMAT_R8G8B8A8_USCALED:
156 return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
157 }
158 case OPTYPE_CLUSTERED_XOR:
159 switch (format)
160 {
161 default:
162 return lhs + " ^ " + rhs;
163 case VK_FORMAT_R8_USCALED:
164 return lhs + " ^^ " + rhs;
165 case VK_FORMAT_R8G8_USCALED:
166 return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
167 case VK_FORMAT_R8G8B8_USCALED:
168 return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
169 case VK_FORMAT_R8G8B8A8_USCALED:
170 return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
171 }
172 }
173 }
174
getIdentity(int opType,vk::VkFormat format)175 std::string getIdentity(int opType, vk::VkFormat format)
176 {
177 bool isFloat = false;
178 bool isInt = false;
179 bool isUnsigned = false;
180
181 switch (format)
182 {
183 default:
184 DE_FATAL("Unhandled format!");
185 break;
186 case VK_FORMAT_R32_SINT:
187 case VK_FORMAT_R32G32_SINT:
188 case VK_FORMAT_R32G32B32_SINT:
189 case VK_FORMAT_R32G32B32A32_SINT:
190 isInt = true;
191 break;
192 case VK_FORMAT_R32_UINT:
193 case VK_FORMAT_R32G32_UINT:
194 case VK_FORMAT_R32G32B32_UINT:
195 case VK_FORMAT_R32G32B32A32_UINT:
196 isUnsigned = true;
197 break;
198 case VK_FORMAT_R32_SFLOAT:
199 case VK_FORMAT_R32G32_SFLOAT:
200 case VK_FORMAT_R32G32B32_SFLOAT:
201 case VK_FORMAT_R32G32B32A32_SFLOAT:
202 case VK_FORMAT_R64_SFLOAT:
203 case VK_FORMAT_R64G64_SFLOAT:
204 case VK_FORMAT_R64G64B64_SFLOAT:
205 case VK_FORMAT_R64G64B64A64_SFLOAT:
206 isFloat = true;
207 break;
208 case VK_FORMAT_R8_USCALED:
209 case VK_FORMAT_R8G8_USCALED:
210 case VK_FORMAT_R8G8B8_USCALED:
211 case VK_FORMAT_R8G8B8A8_USCALED:
212 break; // bool types are not anything
213 }
214
215 switch (opType)
216 {
217 default:
218 DE_FATAL("Unsupported op type");
219 return "";
220 case OPTYPE_CLUSTERED_ADD:
221 return subgroups::getFormatNameForGLSL(format) + "(0)";
222 case OPTYPE_CLUSTERED_MUL:
223 return subgroups::getFormatNameForGLSL(format) + "(1)";
224 case OPTYPE_CLUSTERED_MIN:
225 if (isFloat)
226 {
227 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
228 }
229 else if (isInt)
230 {
231 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
232 }
233 else if (isUnsigned)
234 {
235 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
236 }
237 else
238 {
239 DE_FATAL("Unhandled case");
240 return "";
241 }
242 case OPTYPE_CLUSTERED_MAX:
243 if (isFloat)
244 {
245 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
246 }
247 else if (isInt)
248 {
249 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
250 }
251 else if (isUnsigned)
252 {
253 return subgroups::getFormatNameForGLSL(format) + "(0)";
254 }
255 else
256 {
257 DE_FATAL("Unhandled case");
258 return "";
259 }
260 case OPTYPE_CLUSTERED_AND:
261 return subgroups::getFormatNameForGLSL(format) + "(~0)";
262 case OPTYPE_CLUSTERED_OR:
263 return subgroups::getFormatNameForGLSL(format) + "(0)";
264 case OPTYPE_CLUSTERED_XOR:
265 return subgroups::getFormatNameForGLSL(format) + "(0)";
266 }
267 }
268
getCompare(int opType,vk::VkFormat format,std::string lhs,std::string rhs)269 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
270 {
271 std::string formatName = subgroups::getFormatNameForGLSL(format);
272 switch (format)
273 {
274 default:
275 return "all(equal(" + lhs + ", " + rhs + "))";
276 case VK_FORMAT_R8_USCALED:
277 case VK_FORMAT_R32_UINT:
278 case VK_FORMAT_R32_SINT:
279 return "(" + lhs + " == " + rhs + ")";
280 case VK_FORMAT_R32_SFLOAT:
281 case VK_FORMAT_R64_SFLOAT:
282 switch (opType)
283 {
284 default:
285 return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
286 case OPTYPE_CLUSTERED_MIN:
287 case OPTYPE_CLUSTERED_MAX:
288 return "(" + lhs + " == " + rhs + ")";
289 }
290 case VK_FORMAT_R32G32_SFLOAT:
291 case VK_FORMAT_R32G32B32_SFLOAT:
292 case VK_FORMAT_R32G32B32A32_SFLOAT:
293 case VK_FORMAT_R64G64_SFLOAT:
294 case VK_FORMAT_R64G64B64_SFLOAT:
295 case VK_FORMAT_R64G64B64A64_SFLOAT:
296 switch (opType)
297 {
298 default:
299 return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
300 case OPTYPE_CLUSTERED_MIN:
301 case OPTYPE_CLUSTERED_MAX:
302 return "all(equal(" + lhs + ", " + rhs + "))";
303 }
304 }
305 }
306
307 struct CaseDefinition
308 {
309 int opType;
310 VkShaderStageFlags shaderStage;
311 VkFormat format;
312 };
313
getBodySource(CaseDefinition caseDef)314 std::string getBodySource(CaseDefinition caseDef)
315 {
316 std::ostringstream bdy;
317 bdy << " bool tempResult = true;\n";
318
319 for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
320 {
321 bdy << " {\n"
322 << " const uint clusterSize = " << i << ";\n"
323 << " if (clusterSize <= gl_SubgroupSize)\n"
324 << " {\n"
325 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
326 << getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID], clusterSize);\n"
327 << " for (uint clusterOffset = 0; clusterOffset < gl_SubgroupSize; clusterOffset += clusterSize)\n"
328 << " {\n"
329 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
330 << getIdentity(caseDef.opType, caseDef.format) << ";\n"
331 << " for (uint index = clusterOffset; index < (clusterOffset + clusterSize); index++)\n"
332 << " {\n"
333 << " if (subgroupBallotBitExtract(mask, index))\n"
334 << " {\n"
335 << " ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
336 << " }\n"
337 << " }\n"
338 << " if ((clusterOffset <= gl_SubgroupInvocationID) && (gl_SubgroupInvocationID < (clusterOffset + clusterSize)))\n"
339 << " {\n"
340 << " if (!" << getCompare(caseDef.opType, caseDef.format, "ref", "op") << ")\n"
341 << " {\n"
342 << " tempResult = false;\n"
343 << " }\n"
344 << " }\n"
345 << " }\n"
346 << " }\n"
347 << " }\n";
348 }
349 return bdy.str();
350 }
351
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)352 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
353 {
354 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
355
356 subgroups::setFragmentShaderFrameBuffer(programCollection);
357
358 if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
359 subgroups::setVertexShaderFrameBuffer(programCollection);
360
361 std::string bdy = getBodySource(caseDef);
362
363 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
364 {
365 std::ostringstream vertexSrc;
366 vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450 )<< "\n"
367 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
368 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
369 << "layout(location = 0) in highp vec4 in_position;\n"
370 << "layout(location = 0) out float out_color;\n"
371 << "layout(set = 0, binding = 0) uniform Buffer1\n"
372 << "{\n"
373 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
374 << "};\n"
375 << "\n"
376 << "void main (void)\n"
377 << "{\n"
378 << " uvec4 mask = subgroupBallot(true);\n"
379 << bdy
380 << " out_color = float(tempResult ? 1 : 0);\n"
381 << " gl_Position = in_position;\n"
382 << " gl_PointSize = 1.0f;\n"
383 << "}\n";
384 programCollection.glslSources.add("vert")
385 << glu::VertexSource(vertexSrc.str()) <<buildOptions;
386 }
387 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
388 {
389 std::ostringstream geometry;
390
391 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
392 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
393 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
394 << "layout(points) in;\n"
395 << "layout(points, max_vertices = 1) out;\n"
396 << "layout(location = 0) out float out_color;\n"
397 << "layout(set = 0, binding = 0) uniform Buffer1\n"
398 << "{\n"
399 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
400 << "};\n"
401 << "\n"
402 << "void main (void)\n"
403 << "{\n"
404 << " uvec4 mask = subgroupBallot(true);\n"
405 << bdy
406 << " out_color = tempResult ? 1.0 : 0.0;\n"
407 << " gl_Position = gl_in[0].gl_Position;\n"
408 << " EmitVertex();\n"
409 << " EndPrimitive();\n"
410 << "}\n";
411
412 programCollection.glslSources.add("geometry")
413 << glu::GeometrySource(geometry.str()) << buildOptions;
414 }
415 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
416 {
417 std::ostringstream controlSource;
418
419 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
420 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
421 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
422 << "layout(vertices = 2) out;\n"
423 << "layout(location = 0) out float out_color[];\n"
424 << "layout(set = 0, binding = 0) uniform Buffer1\n"
425 << "{\n"
426 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
427 << "};\n"
428 << "\n"
429 << "void main (void)\n"
430 << "{\n"
431 << " if (gl_InvocationID == 0)\n"
432 <<" {\n"
433 << " gl_TessLevelOuter[0] = 1.0f;\n"
434 << " gl_TessLevelOuter[1] = 1.0f;\n"
435 << " }\n"
436 << " uvec4 mask = subgroupBallot(true);\n"
437 << bdy
438 << " out_color[gl_InvocationID] = tempResult ? 1.0 : 0.0;\n"
439 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
440 << "}\n";
441
442 programCollection.glslSources.add("tesc")
443 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
444 subgroups::setTesEvalShaderFrameBuffer(programCollection);
445 }
446 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
447 {
448 std::ostringstream evaluationSource;
449
450 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
451 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
452 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
453 << "layout(isolines, equal_spacing, ccw ) in;\n"
454 << "layout(location = 0) out float out_color;\n"
455 << "layout(set = 0, binding = 0) uniform Buffer1\n"
456 << "{\n"
457 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
458 << "};\n"
459 << "\n"
460 << "void main (void)\n"
461 << "{\n"
462 << " uvec4 mask = subgroupBallot(true);\n"
463 << bdy
464 << " out_color = tempResult ? 1.0 : 0.0;\n"
465 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
466 << "}\n";
467
468 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
469 programCollection.glslSources.add("tese")
470 << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
471 }
472 else
473 {
474 DE_FATAL("Unsupported shader stage");
475 }
476 }
477
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)478 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
479 {
480 std::string bdy = getBodySource(caseDef);
481
482 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
483 {
484 std::ostringstream src;
485
486 src << "#version 450\n"
487 << "#extension GL_KHR_shader_subgroup_clustered: enable\n"
488 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
489 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
490 "local_size_z_id = 2) in;\n"
491 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
492 << "{\n"
493 << " uint result[];\n"
494 << "};\n"
495 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
496 << "{\n"
497 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
498 << "};\n"
499 << "\n"
500 << "void main (void)\n"
501 << "{\n"
502 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
503 << " highp uint offset = globalSize.x * ((globalSize.y * "
504 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
505 "gl_GlobalInvocationID.x;\n"
506 << " uvec4 mask = subgroupBallot(true);\n"
507 << bdy
508 << " result[offset] = tempResult ? 1 : 0;\n"
509 << "}\n";
510
511 programCollection.glslSources.add("comp")
512 << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
513 }
514 else
515 {
516 {
517 const string vertex =
518 "#version 450\n"
519 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
520 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
521 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
522 "{\n"
523 " uint result[];\n"
524 "};\n"
525 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
526 "{\n"
527 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
528 "};\n"
529 "\n"
530 "void main (void)\n"
531 "{\n"
532 " uvec4 mask = subgroupBallot(true);\n"
533 + bdy +
534 " result[gl_VertexIndex] = tempResult ? 1 : 0;\n"
535 " float pixelSize = 2.0f/1024.0f;\n"
536 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
537 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
538 " gl_PointSize = 1.0f;\n"
539 "}\n";
540
541 programCollection.glslSources.add("vert")
542 << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
543 }
544
545 {
546 const string tesc =
547 "#version 450\n"
548 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
549 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
550 "layout(vertices=1) out;\n"
551 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
552 "{\n"
553 " uint result[];\n"
554 "};\n"
555 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
556 "{\n"
557 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
558 "};\n"
559 "\n"
560 "void main (void)\n"
561 "{\n"
562 " uvec4 mask = subgroupBallot(true);\n"
563 + bdy +
564 " result[gl_PrimitiveID] = tempResult ? 1 : 0;\n"
565 " if (gl_InvocationID == 0)\n"
566 " {\n"
567 " gl_TessLevelOuter[0] = 1.0f;\n"
568 " gl_TessLevelOuter[1] = 1.0f;\n"
569 " }\n"
570 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
571 "}\n";
572
573 programCollection.glslSources.add("tesc")
574 << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
575 }
576
577 {
578 const string tese =
579 "#version 450\n"
580 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
581 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
582 "layout(isolines) in;\n"
583 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
584 "{\n"
585 " uint result[];\n"
586 "};\n"
587 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
588 "{\n"
589 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
590 "};\n"
591 "\n"
592 "void main (void)\n"
593 "{\n"
594 " uvec4 mask = subgroupBallot(true);\n"
595 + bdy +
596 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult ? 1 : 0;\n"
597 " float pixelSize = 2.0f/1024.0f;\n"
598 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
599 "}\n";
600 programCollection.glslSources.add("tese")
601 << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
602 }
603
604 {
605 const string geometry =
606 "#version 450\n"
607 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
608 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
609 "layout(${TOPOLOGY}) in;\n"
610 "layout(points, max_vertices = 1) out;\n"
611 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
612 "{\n"
613 " uint result[];\n"
614 "};\n"
615 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
616 "{\n"
617 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
618 "};\n"
619 "\n"
620 "void main (void)\n"
621 "{\n"
622 " uvec4 mask = subgroupBallot(true);\n"
623 + bdy +
624 " result[gl_PrimitiveIDIn] = tempResult ? 1 : 0;\n"
625 " gl_Position = gl_in[0].gl_Position;\n"
626 " EmitVertex();\n"
627 " EndPrimitive();\n"
628 "}\n";
629 subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u), programCollection.glslSources);
630 }
631
632 {
633 const string fragment =
634 "#version 450\n"
635 "#extension GL_KHR_shader_subgroup_clustered: enable\n"
636 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
637 "layout(location = 0) out uint result;\n"
638 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
639 "{\n"
640 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
641 "};\n"
642 "void main (void)\n"
643 "{\n"
644 " uvec4 mask = subgroupBallot(true);\n"
645 + bdy +
646 " result = tempResult ? 1 : 0;\n"
647 "}\n";
648 programCollection.glslSources.add("fragment")
649 << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
650 }
651
652 subgroups::addNoSubgroupShader(programCollection);
653 }
654 }
655
supportedCheck(Context & context,CaseDefinition caseDef)656 void supportedCheck (Context& context, CaseDefinition caseDef)
657 {
658 if (!subgroups::isSubgroupSupported(context))
659 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
660
661 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
662 TCU_THROW(NotSupportedError, "Device does not support subgroup clustered operations");
663
664 if (subgroups::isDoubleFormat(caseDef.format) &&
665 !subgroups::isDoubleSupportedForDevice(context))
666 {
667 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
668 }
669 }
670
noSSBOtest(Context & context,const CaseDefinition caseDef)671 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
672 {
673 if (!subgroups::areSubgroupOperationsSupportedForStage(
674 context, caseDef.shaderStage))
675 {
676 if (subgroups::areSubgroupOperationsRequiredForStage(
677 caseDef.shaderStage))
678 {
679 return tcu::TestStatus::fail(
680 "Shader stage " +
681 subgroups::getShaderStageName(caseDef.shaderStage) +
682 " is required to support subgroup operations!");
683 }
684 else
685 {
686 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
687 }
688 }
689
690 subgroups::SSBOData inputData;
691 inputData.format = caseDef.format;
692 inputData.layout = subgroups::SSBOData::LayoutStd140;
693 inputData.numElements = subgroups::maxSupportedSubgroupSize();
694 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
695
696 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
697 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
698 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
699 return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
700 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
701 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
702 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
703 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
704 else
705 TCU_THROW(InternalError, "Unhandled shader stage");
706 }
707
test(Context & context,const CaseDefinition caseDef)708 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
709 {
710 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
711 {
712 if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
713 {
714 return tcu::TestStatus::fail(
715 "Shader stage " +
716 subgroups::getShaderStageName(caseDef.shaderStage) +
717 " is required to support subgroup operations!");
718 }
719 subgroups::SSBOData inputData;
720 inputData.format = caseDef.format;
721 inputData.layout = subgroups::SSBOData::LayoutStd430;
722 inputData.numElements = subgroups::maxSupportedSubgroupSize();
723 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
724
725 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
726 }
727 else
728 {
729 VkPhysicalDeviceSubgroupProperties subgroupProperties;
730 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
731 subgroupProperties.pNext = DE_NULL;
732
733 VkPhysicalDeviceProperties2 properties;
734 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
735 properties.pNext = &subgroupProperties;
736
737 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
738
739 VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
740
741 if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
742 {
743 if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
744 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
745 else
746 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
747 }
748
749 if ((VkShaderStageFlagBits)0u == stages)
750 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
751
752 subgroups::SSBOData inputData;
753 inputData.format = caseDef.format;
754 inputData.layout = subgroups::SSBOData::LayoutStd430;
755 inputData.numElements = subgroups::maxSupportedSubgroupSize();
756 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
757 inputData.binding = 4u;
758 inputData.stages = stages;
759
760 return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
761 }
762 }
763 }
764
765 namespace vkt
766 {
767 namespace subgroups
768 {
createSubgroupsClusteredTests(tcu::TestContext & testCtx)769 tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx)
770 {
771 de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
772 testCtx, "graphics", "Subgroup clustered category tests: graphics"));
773 de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
774 testCtx, "compute", "Subgroup clustered category tests: compute"));
775 de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
776 testCtx, "framebuffer", "Subgroup clustered category tests: framebuffer"));
777
778 const VkShaderStageFlags stages[] =
779 {
780 VK_SHADER_STAGE_VERTEX_BIT,
781 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
782 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
783 VK_SHADER_STAGE_GEOMETRY_BIT
784 };
785
786 const VkFormat formats[] =
787 {
788 VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
789 VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
790 VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
791 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
792 VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
793 VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
794 VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
795 VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
796 VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
797 };
798
799 for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
800 {
801 const VkFormat format = formats[formatIndex];
802
803 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_CLUSTERED_LAST; ++opTypeIndex)
804 {
805 bool isBool = false;
806 bool isFloat = false;
807
808 switch (format)
809 {
810 default:
811 break;
812 case VK_FORMAT_R32_SFLOAT:
813 case VK_FORMAT_R32G32_SFLOAT:
814 case VK_FORMAT_R32G32B32_SFLOAT:
815 case VK_FORMAT_R32G32B32A32_SFLOAT:
816 case VK_FORMAT_R64_SFLOAT:
817 case VK_FORMAT_R64G64_SFLOAT:
818 case VK_FORMAT_R64G64B64_SFLOAT:
819 case VK_FORMAT_R64G64B64A64_SFLOAT:
820 isFloat = true;
821 break;
822 case VK_FORMAT_R8_USCALED:
823 case VK_FORMAT_R8G8_USCALED:
824 case VK_FORMAT_R8G8B8_USCALED:
825 case VK_FORMAT_R8G8B8A8_USCALED:
826 isBool = true;
827 break;
828 }
829
830 bool isBitwiseOp = false;
831
832 switch (opTypeIndex)
833 {
834 default:
835 break;
836 case OPTYPE_CLUSTERED_AND:
837 case OPTYPE_CLUSTERED_OR:
838 case OPTYPE_CLUSTERED_XOR:
839 isBitwiseOp = true;
840 break;
841 }
842
843 if (isFloat && isBitwiseOp)
844 {
845 // Skip float with bitwise category.
846 continue;
847 }
848
849 if (isBool && !isBitwiseOp)
850 {
851 // Skip bool when its not the bitwise category.
852 continue;
853 }
854
855 const std::string name = de::toLower(getOpTypeName(opTypeIndex))
856 +"_" + subgroups::getFormatNameForGLSL(format);
857
858 {
859 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
860 addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
861 }
862
863 {
864 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
865 addFunctionCaseWithPrograms(graphicGroup.get(), name,
866 "", supportedCheck, initPrograms, test, caseDef);
867 }
868
869 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
870 {
871 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
872 addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
873 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
874 }
875 }
876 }
877 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
878 testCtx, "clustered", "Subgroup clustered category tests"));
879
880 group->addChild(graphicGroup.release());
881 group->addChild(computeGroup.release());
882 group->addChild(framebufferGroup.release());
883
884 return group.release();
885 }
886
887 } // subgroups
888 } // vkt
889