1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 * Copyright (c) 2018 NVIDIA Corporation
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 */ /*!
22 * \file
23 * \brief Subgroups Tests
24 */ /*--------------------------------------------------------------------*/
25
26 #include "vktSubgroupsPartitionedTests.hpp"
27 #include "vktSubgroupsTestsUtils.hpp"
28
29 #include <string>
30 #include <vector>
31
32 using namespace tcu;
33 using namespace std;
34 using namespace vk;
35 using namespace vkt;
36
37 namespace
38 {
39 enum OpType
40 {
41 OPTYPE_ADD = 0,
42 OPTYPE_MUL,
43 OPTYPE_MIN,
44 OPTYPE_MAX,
45 OPTYPE_AND,
46 OPTYPE_OR,
47 OPTYPE_XOR,
48 OPTYPE_INCLUSIVE_ADD,
49 OPTYPE_INCLUSIVE_MUL,
50 OPTYPE_INCLUSIVE_MIN,
51 OPTYPE_INCLUSIVE_MAX,
52 OPTYPE_INCLUSIVE_AND,
53 OPTYPE_INCLUSIVE_OR,
54 OPTYPE_INCLUSIVE_XOR,
55 OPTYPE_EXCLUSIVE_ADD,
56 OPTYPE_EXCLUSIVE_MUL,
57 OPTYPE_EXCLUSIVE_MIN,
58 OPTYPE_EXCLUSIVE_MAX,
59 OPTYPE_EXCLUSIVE_AND,
60 OPTYPE_EXCLUSIVE_OR,
61 OPTYPE_EXCLUSIVE_XOR,
62 OPTYPE_LAST
63 };
64
checkVertexPipelineStages(std::vector<const void * > datas,deUint32 width,deUint32)65 static bool checkVertexPipelineStages(std::vector<const void*> datas,
66 deUint32 width, deUint32)
67 {
68 const deUint32* data =
69 reinterpret_cast<const deUint32*>(datas[0]);
70 for (deUint32 x = 0; x < width; ++x)
71 {
72 deUint32 val = data[x];
73
74 if (0xFFFFFF != val)
75 {
76 return false;
77 }
78 }
79
80 return true;
81 }
82
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)83 static bool checkCompute(std::vector<const void*> datas,
84 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
85 deUint32)
86 {
87 const deUint32* data =
88 reinterpret_cast<const deUint32*>(datas[0]);
89
90 for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
91 {
92 for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
93 {
94 for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
95 {
96 for (deUint32 lX = 0; lX < localSize[0]; ++lX)
97 {
98 for (deUint32 lY = 0; lY < localSize[1]; ++lY)
99 {
100 for (deUint32 lZ = 0; lZ < localSize[2];
101 ++lZ)
102 {
103 const deUint32 globalInvocationX =
104 nX * localSize[0] + lX;
105 const deUint32 globalInvocationY =
106 nY * localSize[1] + lY;
107 const deUint32 globalInvocationZ =
108 nZ * localSize[2] + lZ;
109
110 const deUint32 globalSizeX =
111 numWorkgroups[0] * localSize[0];
112 const deUint32 globalSizeY =
113 numWorkgroups[1] * localSize[1];
114
115 const deUint32 offset =
116 globalSizeX *
117 ((globalSizeY *
118 globalInvocationZ) +
119 globalInvocationY) +
120 globalInvocationX;
121
122 if (0xFFFFFF != data[offset])
123 {
124 return false;
125 }
126 }
127 }
128 }
129 }
130 }
131 }
132
133 return true;
134 }
135
getOpTypeName(int opType)136 std::string getOpTypeName(int opType)
137 {
138 switch (opType)
139 {
140 default:
141 DE_FATAL("Unsupported op type");
142 return "";
143 case OPTYPE_ADD:
144 return "subgroupAdd";
145 case OPTYPE_MUL:
146 return "subgroupMul";
147 case OPTYPE_MIN:
148 return "subgroupMin";
149 case OPTYPE_MAX:
150 return "subgroupMax";
151 case OPTYPE_AND:
152 return "subgroupAnd";
153 case OPTYPE_OR:
154 return "subgroupOr";
155 case OPTYPE_XOR:
156 return "subgroupXor";
157 case OPTYPE_INCLUSIVE_ADD:
158 return "subgroupInclusiveAdd";
159 case OPTYPE_INCLUSIVE_MUL:
160 return "subgroupInclusiveMul";
161 case OPTYPE_INCLUSIVE_MIN:
162 return "subgroupInclusiveMin";
163 case OPTYPE_INCLUSIVE_MAX:
164 return "subgroupInclusiveMax";
165 case OPTYPE_INCLUSIVE_AND:
166 return "subgroupInclusiveAnd";
167 case OPTYPE_INCLUSIVE_OR:
168 return "subgroupInclusiveOr";
169 case OPTYPE_INCLUSIVE_XOR:
170 return "subgroupInclusiveXor";
171 case OPTYPE_EXCLUSIVE_ADD:
172 return "subgroupExclusiveAdd";
173 case OPTYPE_EXCLUSIVE_MUL:
174 return "subgroupExclusiveMul";
175 case OPTYPE_EXCLUSIVE_MIN:
176 return "subgroupExclusiveMin";
177 case OPTYPE_EXCLUSIVE_MAX:
178 return "subgroupExclusiveMax";
179 case OPTYPE_EXCLUSIVE_AND:
180 return "subgroupExclusiveAnd";
181 case OPTYPE_EXCLUSIVE_OR:
182 return "subgroupExclusiveOr";
183 case OPTYPE_EXCLUSIVE_XOR:
184 return "subgroupExclusiveXor";
185 }
186 }
187
getOpTypeNamePartitioned(int opType)188 std::string getOpTypeNamePartitioned(int opType)
189 {
190 switch (opType)
191 {
192 default:
193 DE_FATAL("Unsupported op type");
194 return "";
195 case OPTYPE_ADD:
196 return "subgroupPartitionedAddNV";
197 case OPTYPE_MUL:
198 return "subgroupPartitionedMulNV";
199 case OPTYPE_MIN:
200 return "subgroupPartitionedMinNV";
201 case OPTYPE_MAX:
202 return "subgroupPartitionedMaxNV";
203 case OPTYPE_AND:
204 return "subgroupPartitionedAndNV";
205 case OPTYPE_OR:
206 return "subgroupPartitionedOrNV";
207 case OPTYPE_XOR:
208 return "subgroupPartitionedXorNV";
209 case OPTYPE_INCLUSIVE_ADD:
210 return "subgroupPartitionedInclusiveAddNV";
211 case OPTYPE_INCLUSIVE_MUL:
212 return "subgroupPartitionedInclusiveMulNV";
213 case OPTYPE_INCLUSIVE_MIN:
214 return "subgroupPartitionedInclusiveMinNV";
215 case OPTYPE_INCLUSIVE_MAX:
216 return "subgroupPartitionedInclusiveMaxNV";
217 case OPTYPE_INCLUSIVE_AND:
218 return "subgroupPartitionedInclusiveAndNV";
219 case OPTYPE_INCLUSIVE_OR:
220 return "subgroupPartitionedInclusiveOrNV";
221 case OPTYPE_INCLUSIVE_XOR:
222 return "subgroupPartitionedInclusiveXorNV";
223 case OPTYPE_EXCLUSIVE_ADD:
224 return "subgroupPartitionedExclusiveAddNV";
225 case OPTYPE_EXCLUSIVE_MUL:
226 return "subgroupPartitionedExclusiveMulNV";
227 case OPTYPE_EXCLUSIVE_MIN:
228 return "subgroupPartitionedExclusiveMinNV";
229 case OPTYPE_EXCLUSIVE_MAX:
230 return "subgroupPartitionedExclusiveMaxNV";
231 case OPTYPE_EXCLUSIVE_AND:
232 return "subgroupPartitionedExclusiveAndNV";
233 case OPTYPE_EXCLUSIVE_OR:
234 return "subgroupPartitionedExclusiveOrNV";
235 case OPTYPE_EXCLUSIVE_XOR:
236 return "subgroupPartitionedExclusiveXorNV";
237 }
238 }
239
getIdentity(int opType,vk::VkFormat format)240 std::string getIdentity(int opType, vk::VkFormat format)
241 {
242 bool isFloat = false;
243 bool isInt = false;
244 bool isUnsigned = false;
245
246 switch (format)
247 {
248 default:
249 DE_FATAL("Unhandled format!");
250 return "";
251 case VK_FORMAT_R32_SINT:
252 case VK_FORMAT_R32G32_SINT:
253 case VK_FORMAT_R32G32B32_SINT:
254 case VK_FORMAT_R32G32B32A32_SINT:
255 isInt = true;
256 break;
257 case VK_FORMAT_R32_UINT:
258 case VK_FORMAT_R32G32_UINT:
259 case VK_FORMAT_R32G32B32_UINT:
260 case VK_FORMAT_R32G32B32A32_UINT:
261 isUnsigned = true;
262 break;
263 case VK_FORMAT_R32_SFLOAT:
264 case VK_FORMAT_R32G32_SFLOAT:
265 case VK_FORMAT_R32G32B32_SFLOAT:
266 case VK_FORMAT_R32G32B32A32_SFLOAT:
267 case VK_FORMAT_R64_SFLOAT:
268 case VK_FORMAT_R64G64_SFLOAT:
269 case VK_FORMAT_R64G64B64_SFLOAT:
270 case VK_FORMAT_R64G64B64A64_SFLOAT:
271 isFloat = true;
272 break;
273 case VK_FORMAT_R8_USCALED:
274 case VK_FORMAT_R8G8_USCALED:
275 case VK_FORMAT_R8G8B8_USCALED:
276 case VK_FORMAT_R8G8B8A8_USCALED:
277 break; // bool types are not anything
278 }
279
280 switch (opType)
281 {
282 default:
283 DE_FATAL("Unsupported op type");
284 return "";
285 case OPTYPE_ADD:
286 case OPTYPE_INCLUSIVE_ADD:
287 case OPTYPE_EXCLUSIVE_ADD:
288 return subgroups::getFormatNameForGLSL(format) + "(0)";
289 case OPTYPE_MUL:
290 case OPTYPE_INCLUSIVE_MUL:
291 case OPTYPE_EXCLUSIVE_MUL:
292 return subgroups::getFormatNameForGLSL(format) + "(1)";
293 case OPTYPE_MIN:
294 case OPTYPE_INCLUSIVE_MIN:
295 case OPTYPE_EXCLUSIVE_MIN:
296 if (isFloat)
297 {
298 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
299 }
300 else if (isInt)
301 {
302 return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
303 }
304 else if (isUnsigned)
305 {
306 return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
307 }
308 else
309 {
310 DE_FATAL("Unhandled case");
311 return "";
312 }
313 case OPTYPE_MAX:
314 case OPTYPE_INCLUSIVE_MAX:
315 case OPTYPE_EXCLUSIVE_MAX:
316 if (isFloat)
317 {
318 return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
319 }
320 else if (isInt)
321 {
322 return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
323 }
324 else if (isUnsigned)
325 {
326 return subgroups::getFormatNameForGLSL(format) + "(0)";
327 }
328 else
329 {
330 DE_FATAL("Unhandled case");
331 return "";
332 }
333 case OPTYPE_AND:
334 case OPTYPE_INCLUSIVE_AND:
335 case OPTYPE_EXCLUSIVE_AND:
336 return subgroups::getFormatNameForGLSL(format) + "(~0)";
337 case OPTYPE_OR:
338 case OPTYPE_INCLUSIVE_OR:
339 case OPTYPE_EXCLUSIVE_OR:
340 return subgroups::getFormatNameForGLSL(format) + "(0)";
341 case OPTYPE_XOR:
342 case OPTYPE_INCLUSIVE_XOR:
343 case OPTYPE_EXCLUSIVE_XOR:
344 return subgroups::getFormatNameForGLSL(format) + "(0)";
345 }
346 }
347
getCompare(int opType,vk::VkFormat format,std::string lhs,std::string rhs)348 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
349 {
350 std::string formatName = subgroups::getFormatNameForGLSL(format);
351 switch (format)
352 {
353 default:
354 return "all(equal(" + lhs + ", " + rhs + "))";
355 case VK_FORMAT_R8_USCALED:
356 case VK_FORMAT_R32_UINT:
357 case VK_FORMAT_R32_SINT:
358 return "(" + lhs + " == " + rhs + ")";
359 case VK_FORMAT_R32_SFLOAT:
360 case VK_FORMAT_R64_SFLOAT:
361 switch (opType)
362 {
363 default:
364 return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
365 case OPTYPE_MIN:
366 case OPTYPE_INCLUSIVE_MIN:
367 case OPTYPE_EXCLUSIVE_MIN:
368 case OPTYPE_MAX:
369 case OPTYPE_INCLUSIVE_MAX:
370 case OPTYPE_EXCLUSIVE_MAX:
371 return "(" + lhs + " == " + rhs + ")";
372 }
373 case VK_FORMAT_R32G32_SFLOAT:
374 case VK_FORMAT_R32G32B32_SFLOAT:
375 case VK_FORMAT_R32G32B32A32_SFLOAT:
376 case VK_FORMAT_R64G64_SFLOAT:
377 case VK_FORMAT_R64G64B64_SFLOAT:
378 case VK_FORMAT_R64G64B64A64_SFLOAT:
379 switch (opType)
380 {
381 default:
382 return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
383 case OPTYPE_MIN:
384 case OPTYPE_INCLUSIVE_MIN:
385 case OPTYPE_EXCLUSIVE_MIN:
386 case OPTYPE_MAX:
387 case OPTYPE_INCLUSIVE_MAX:
388 case OPTYPE_EXCLUSIVE_MAX:
389 return "all(equal(" + lhs + ", " + rhs + "))";
390 }
391 }
392 }
393
394 struct CaseDefinition
395 {
396 int opType;
397 VkShaderStageFlags shaderStage;
398 VkFormat format;
399 };
400
getTestString(const CaseDefinition & caseDef)401 string getTestString(const CaseDefinition &caseDef)
402 {
403 // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
404 // conversion overflow in framebuffer tests.
405 string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
406 string bdy =
407 " uint tempResult = 0;\n"
408 " uint id = gl_SubgroupInvocationID;\n";
409
410 // Test the case where the partition has a single subset with all invocations in it.
411 // This should generate the same result as the non-partitioned function.
412 bdy +=
413 " uvec4 allBallot = mask;\n"
414 " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
415 " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
416 " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
417 " tempResult |= 0x1;\n"
418 " }\n";
419
420 // The definition of a partition doesn't forbid bits corresponding to inactive
421 // invocations being in the subset with active invocations. In other words, test that
422 // bits corresponding to inactive invocations are ignored.
423 bdy +=
424 " if (0 == (gl_SubgroupInvocationID % 2)) {\n"
425 " " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
426 " " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
427 " if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
428 " tempResult |= 0x2;\n"
429 " }\n"
430 " } else {\n"
431 " tempResult |= 0x2;\n"
432 " }\n";
433
434 // Test the case where the partition has each invocation in a unique subset. For
435 // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
436 string expectedSelfResult = "data[gl_SubgroupInvocationID]";
437 if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
438 caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
439 expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
440 }
441
442 bdy +=
443 " uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
444 " " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
445 " if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
446 " tempResult |= 0x4;\n"
447 " }\n";
448
449 // Test "random" partitions based on a hash of the invocation id.
450 // This "hash" function produces interesting/randomish partitions.
451 static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
452
453 bdy +=
454 " for (uint N = 1; N < 16; ++N) {\n"
455 " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
456 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
457 " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
458 " for (uint i = 0; i < N; ++i) {\n"
459 " " + fmt + " iFmt = " + fmt + "(i);\n"
460 " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
461 " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
462 " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
463 " }\n"
464 " }\n"
465 " }\n"
466 // tests in flow control:
467 " if (1 == (gl_SubgroupInvocationID % 2)) {\n"
468 " for (uint N = 1; N < 7; ++N) {\n"
469 " " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
470 " uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
471 " " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
472 " for (uint i = 0; i < N; ++i) {\n"
473 " " + fmt + " iFmt = " + fmt + "(i);\n"
474 " if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
475 " " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
476 " tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
477 " }\n"
478 " }\n"
479 " }\n"
480 " } else {\n"
481 " tempResult |= 0xFC0000;\n"
482 " }\n"
483 ;
484
485 return bdy;
486 }
487
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)488 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
489 {
490 const vk::ShaderBuildOptions buildOptions (programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
491 std::ostringstream bdy;
492
493 subgroups::setFragmentShaderFrameBuffer(programCollection);
494
495 if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
496 subgroups::setVertexShaderFrameBuffer(programCollection);
497
498 bdy << getTestString(caseDef);
499
500 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
501 {
502 std::ostringstream vertexSrc;
503 vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
504 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
505 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
506 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
507 << "layout(location = 0) in highp vec4 in_position;\n"
508 << "layout(location = 0) out float out_color;\n"
509 << "layout(set = 0, binding = 0) uniform Buffer1\n"
510 << "{\n"
511 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
512 << "};\n"
513 << "\n"
514 << "void main (void)\n"
515 << "{\n"
516 << " uvec4 mask = subgroupBallot(true);\n"
517 << bdy.str()
518 << " out_color = float(tempResult);\n"
519 << " gl_Position = in_position;\n"
520 << " gl_PointSize = 1.0f;\n"
521 << "}\n";
522 programCollection.glslSources.add("vert")
523 << glu::VertexSource(vertexSrc.str()) << buildOptions;
524 }
525 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
526 {
527 std::ostringstream geometry;
528
529 geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
530 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
531 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
532 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
533 << "layout(points) in;\n"
534 << "layout(points, max_vertices = 1) out;\n"
535 << "layout(location = 0) out float out_color;\n"
536 << "layout(set = 0, binding = 0) uniform Buffer\n"
537 << "{\n"
538 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
539 << "};\n"
540 << "\n"
541 << "void main (void)\n"
542 << "{\n"
543 << " uvec4 mask = subgroupBallot(true);\n"
544 << bdy.str()
545 << " out_color = float(tempResult);\n"
546 << " gl_Position = gl_in[0].gl_Position;\n"
547 << " EmitVertex();\n"
548 << " EndPrimitive();\n"
549 << "}\n";
550
551 programCollection.glslSources.add("geometry")
552 << glu::GeometrySource(geometry.str()) << buildOptions;
553 }
554 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
555 {
556 std::ostringstream controlSource;
557 controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
558 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
559 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
560 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
561 << "layout(vertices = 2) out;\n"
562 << "layout(location = 0) out float out_color[];\n"
563 << "layout(set = 0, binding = 0) uniform Buffer1\n"
564 << "{\n"
565 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
566 << "};\n"
567 << "\n"
568 << "void main (void)\n"
569 << "{\n"
570 << " if (gl_InvocationID == 0)\n"
571 <<" {\n"
572 << " gl_TessLevelOuter[0] = 1.0f;\n"
573 << " gl_TessLevelOuter[1] = 1.0f;\n"
574 << " }\n"
575 << " uvec4 mask = subgroupBallot(true);\n"
576 << bdy.str()
577 << " out_color[gl_InvocationID] = float(tempResult);"
578 << " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
579 << "}\n";
580
581
582 programCollection.glslSources.add("tesc")
583 << glu::TessellationControlSource(controlSource.str()) << buildOptions;
584 subgroups::setTesEvalShaderFrameBuffer(programCollection);
585 }
586 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
587 {
588
589 std::ostringstream evaluationSource;
590 evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
591 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
592 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
593 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
594 << "layout(isolines, equal_spacing, ccw ) in;\n"
595 << "layout(location = 0) out float out_color;\n"
596 << "layout(set = 0, binding = 0) uniform Buffer1\n"
597 << "{\n"
598 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
599 << "};\n"
600 << "\n"
601 << "void main (void)\n"
602 << "{\n"
603 << " uvec4 mask = subgroupBallot(true);\n"
604 << bdy.str()
605 << " out_color = float(tempResult);\n"
606 << " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
607 << "}\n";
608
609 subgroups::setTesCtrlShaderFrameBuffer(programCollection);
610 programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
611 }
612 else
613 {
614 DE_FATAL("Unsupported shader stage");
615 }
616 }
617
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)618 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
619 {
620 const string bdy = getTestString(caseDef);
621
622 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
623 {
624 std::ostringstream src;
625
626 src << "#version 450\n"
627 << "#extension GL_NV_shader_subgroup_partitioned: enable\n"
628 << "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
629 << "#extension GL_KHR_shader_subgroup_ballot: enable\n"
630 << "layout (local_size_x_id = 0, local_size_y_id = 1, "
631 "local_size_z_id = 2) in;\n"
632 << "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
633 << "{\n"
634 << " uint result[];\n"
635 << "};\n"
636 << "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
637 << "{\n"
638 << " " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
639 << "};\n"
640 << "\n"
641 << "void main (void)\n"
642 << "{\n"
643 << " uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
644 << " highp uint offset = globalSize.x * ((globalSize.y * "
645 "gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
646 "gl_GlobalInvocationID.x;\n"
647 << " uvec4 mask = subgroupBallot(true);\n"
648 << bdy
649 << " result[offset] = tempResult;\n"
650 << "}\n";
651
652 programCollection.glslSources.add("comp")
653 << glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
654 }
655 else
656 {
657 {
658 const std::string vertex =
659 "#version 450\n"
660 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
661 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
662 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
663 "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
664 "{\n"
665 " uint result[];\n"
666 "};\n"
667 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
668 "{\n"
669 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
670 "};\n"
671 "\n"
672 "void main (void)\n"
673 "{\n"
674 " uvec4 mask = subgroupBallot(true);\n"
675 + bdy+
676 " result[gl_VertexIndex] = tempResult;\n"
677 " float pixelSize = 2.0f/1024.0f;\n"
678 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
679 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
680 " gl_PointSize = 1.0f;\n"
681 "}\n";
682 programCollection.glslSources.add("vert")
683 << glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
684 }
685
686 {
687 const std::string tesc =
688 "#version 450\n"
689 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
690 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
691 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
692 "layout(vertices=1) out;\n"
693 "layout(set = 0, binding = 1, std430) buffer Buffer1\n"
694 "{\n"
695 " uint result[];\n"
696 "};\n"
697 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
698 "{\n"
699 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
700 "};\n"
701 "\n"
702 "void main (void)\n"
703 "{\n"
704 " uvec4 mask = subgroupBallot(true);\n"
705 + bdy +
706 " result[gl_PrimitiveID] = tempResult;\n"
707 " if (gl_InvocationID == 0)\n"
708 " {\n"
709 " gl_TessLevelOuter[0] = 1.0f;\n"
710 " gl_TessLevelOuter[1] = 1.0f;\n"
711 " }\n"
712 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
713 "}\n";
714 programCollection.glslSources.add("tesc")
715 << glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
716 }
717
718 {
719 const std::string tese =
720 "#version 450\n"
721 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
722 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
723 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
724 "layout(isolines) in;\n"
725 "layout(set = 0, binding = 2, std430) buffer Buffer1\n"
726 "{\n"
727 " uint result[];\n"
728 "};\n"
729 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
730 "{\n"
731 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
732 "};\n"
733 "\n"
734 "void main (void)\n"
735 "{\n"
736 " uvec4 mask = subgroupBallot(true);\n"
737 + bdy +
738 " result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
739 " float pixelSize = 2.0f/1024.0f;\n"
740 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
741 "}\n";
742 programCollection.glslSources.add("tese")
743 << glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
744 }
745
746 {
747 const std::string geometry =
748 "#version 450\n"
749 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
750 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
751 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
752 "layout(${TOPOLOGY}) in;\n"
753 "layout(points, max_vertices = 1) out;\n"
754 "layout(set = 0, binding = 3, std430) buffer Buffer1\n"
755 "{\n"
756 " uint result[];\n"
757 "};\n"
758 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
759 "{\n"
760 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
761 "};\n"
762 "\n"
763 "void main (void)\n"
764 "{\n"
765 " uvec4 mask = subgroupBallot(true);\n"
766 + bdy +
767 " result[gl_PrimitiveIDIn] = tempResult;\n"
768 " gl_Position = gl_in[0].gl_Position;\n"
769 " EmitVertex();\n"
770 " EndPrimitive();\n"
771 "}\n";
772 subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
773 programCollection.glslSources);
774 }
775
776 {
777 const std::string fragment =
778 "#version 450\n"
779 "#extension GL_NV_shader_subgroup_partitioned: enable\n"
780 "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
781 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
782 "layout(location = 0) out uint result;\n"
783 "layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
784 "{\n"
785 " " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
786 "};\n"
787 "void main (void)\n"
788 "{\n"
789 " uvec4 mask = subgroupBallot(true);\n"
790 + bdy +
791 " result = tempResult;\n"
792 "}\n";
793 programCollection.glslSources.add("fragment")
794 << glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
795 }
796 subgroups::addNoSubgroupShader(programCollection);
797 }
798 }
799
supportedCheck(Context & context,CaseDefinition caseDef)800 void supportedCheck (Context& context, CaseDefinition caseDef)
801 {
802 if (!subgroups::isSubgroupSupported(context))
803 TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
804
805 if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
806 {
807 TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
808 }
809
810 if (subgroups::isDoubleFormat(caseDef.format) &&
811 !subgroups::isDoubleSupportedForDevice(context))
812 {
813 TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
814 }
815 }
816
noSSBOtest(Context & context,const CaseDefinition caseDef)817 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
818 {
819 if (!subgroups::areSubgroupOperationsSupportedForStage(
820 context, caseDef.shaderStage))
821 {
822 if (subgroups::areSubgroupOperationsRequiredForStage(
823 caseDef.shaderStage))
824 {
825 return tcu::TestStatus::fail(
826 "Shader stage " +
827 subgroups::getShaderStageName(caseDef.shaderStage) +
828 " is required to support subgroup operations!");
829 }
830 else
831 {
832 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
833 }
834 }
835
836 subgroups::SSBOData inputData;
837 inputData.format = caseDef.format;
838 inputData.layout = subgroups::SSBOData::LayoutStd140;
839 inputData.numElements = subgroups::maxSupportedSubgroupSize();
840 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
841
842 if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
843 return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
844 else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
845 return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
846 else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
847 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
848 else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
849 return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
850 else
851 TCU_THROW(InternalError, "Unhandled shader stage");
852 }
853
checkShaderStages(Context & context,const CaseDefinition & caseDef)854 bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
855 {
856 if (!subgroups::areSubgroupOperationsSupportedForStage(
857 context, caseDef.shaderStage))
858 {
859 if (subgroups::areSubgroupOperationsRequiredForStage(
860 caseDef.shaderStage))
861 {
862 return false;
863 }
864 else
865 {
866 TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
867 }
868 }
869 return true;
870 }
871
test(Context & context,const CaseDefinition caseDef)872 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
873 {
874 if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
875 {
876 if(!checkShaderStages(context,caseDef))
877 {
878 return tcu::TestStatus::fail(
879 "Shader stage " +
880 subgroups::getShaderStageName(caseDef.shaderStage) +
881 " is required to support subgroup operations!");
882 }
883 subgroups::SSBOData inputData;
884 inputData.format = caseDef.format;
885 inputData.layout = subgroups::SSBOData::LayoutStd430;
886 inputData.numElements = subgroups::maxSupportedSubgroupSize();
887 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
888
889 return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
890 }
891 else
892 {
893 VkPhysicalDeviceSubgroupProperties subgroupProperties;
894 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
895 subgroupProperties.pNext = DE_NULL;
896
897 VkPhysicalDeviceProperties2 properties;
898 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
899 properties.pNext = &subgroupProperties;
900
901 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
902
903 VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage & subgroupProperties.supportedStages);
904
905 if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
906 {
907 if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
908 TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
909 else
910 stages = VK_SHADER_STAGE_FRAGMENT_BIT;
911 }
912
913 if ((VkShaderStageFlagBits)0u == stages)
914 TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
915
916 subgroups::SSBOData inputData;
917 inputData.format = caseDef.format;
918 inputData.layout = subgroups::SSBOData::LayoutStd430;
919 inputData.numElements = subgroups::maxSupportedSubgroupSize();
920 inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
921 inputData.binding = 4u;
922 inputData.stages = stages;
923
924 return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
925 1, checkVertexPipelineStages, stages);
926 }
927 }
928 }
929
930 namespace vkt
931 {
932 namespace subgroups
933 {
createSubgroupsPartitionedTests(tcu::TestContext & testCtx)934 tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
935 {
936 de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
937 testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
938
939 const VkShaderStageFlags stages[] =
940 {
941 VK_SHADER_STAGE_VERTEX_BIT,
942 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
943 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
944 VK_SHADER_STAGE_GEOMETRY_BIT,
945 };
946
947 const VkFormat formats[] =
948 {
949 VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
950 VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
951 VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
952 VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
953 VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
954 VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
955 VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
956 VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
957 VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
958 };
959
960 for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
961 {
962 const VkFormat format = formats[formatIndex];
963
964 for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
965 {
966 bool isBool = false;
967 bool isFloat = false;
968
969 switch (format)
970 {
971 default:
972 break;
973 case VK_FORMAT_R32_SFLOAT:
974 case VK_FORMAT_R32G32_SFLOAT:
975 case VK_FORMAT_R32G32B32_SFLOAT:
976 case VK_FORMAT_R32G32B32A32_SFLOAT:
977 case VK_FORMAT_R64_SFLOAT:
978 case VK_FORMAT_R64G64_SFLOAT:
979 case VK_FORMAT_R64G64B64_SFLOAT:
980 case VK_FORMAT_R64G64B64A64_SFLOAT:
981 isFloat = true;
982 break;
983 case VK_FORMAT_R8_USCALED:
984 case VK_FORMAT_R8G8_USCALED:
985 case VK_FORMAT_R8G8B8_USCALED:
986 case VK_FORMAT_R8G8B8A8_USCALED:
987 isBool = true;
988 break;
989 }
990
991 bool isBitwiseOp = false;
992
993 switch (opTypeIndex)
994 {
995 default:
996 break;
997 case OPTYPE_AND:
998 case OPTYPE_INCLUSIVE_AND:
999 case OPTYPE_EXCLUSIVE_AND:
1000 case OPTYPE_OR:
1001 case OPTYPE_INCLUSIVE_OR:
1002 case OPTYPE_EXCLUSIVE_OR:
1003 case OPTYPE_XOR:
1004 case OPTYPE_INCLUSIVE_XOR:
1005 case OPTYPE_EXCLUSIVE_XOR:
1006 isBitwiseOp = true;
1007 break;
1008 }
1009
1010 if (isFloat && isBitwiseOp)
1011 {
1012 // Skip float with bitwise category.
1013 continue;
1014 }
1015
1016 if (isBool && !isBitwiseOp)
1017 {
1018 // Skip bool when its not the bitwise category.
1019 continue;
1020 }
1021 std::string op = getOpTypeName(opTypeIndex);
1022
1023 {
1024 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
1025 addFunctionCaseWithPrograms(group.get(),
1026 de::toLower(op) + "_" +
1027 subgroups::getFormatNameForGLSL(format) +
1028 "_" + getShaderStageName(caseDef.shaderStage),
1029 "", supportedCheck, initPrograms, test, caseDef);
1030 }
1031
1032 {
1033 const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
1034 addFunctionCaseWithPrograms(group.get(),
1035 de::toLower(op) + "_" +
1036 subgroups::getFormatNameForGLSL(format) +
1037 "_graphic",
1038 "", supportedCheck, initPrograms, test, caseDef);
1039 }
1040
1041 for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1042 {
1043 const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
1044 addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
1045 "_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
1046 supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
1047 }
1048 }
1049 }
1050
1051 return group.release();
1052 }
1053
1054 } // subgroups
1055 } // vkt
1056
1057