1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  * Copyright (c) 2018 NVIDIA Corporation
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  */ /*!
22  * \file
23  * \brief Subgroups Tests
24  */ /*--------------------------------------------------------------------*/
25 
26 #include "vktSubgroupsPartitionedTests.hpp"
27 #include "vktSubgroupsTestsUtils.hpp"
28 
29 #include <string>
30 #include <vector>
31 
32 using namespace tcu;
33 using namespace std;
34 using namespace vk;
35 using namespace vkt;
36 
37 namespace
38 {
39 enum OpType
40 {
41 	OPTYPE_ADD = 0,
42 	OPTYPE_MUL,
43 	OPTYPE_MIN,
44 	OPTYPE_MAX,
45 	OPTYPE_AND,
46 	OPTYPE_OR,
47 	OPTYPE_XOR,
48 	OPTYPE_INCLUSIVE_ADD,
49 	OPTYPE_INCLUSIVE_MUL,
50 	OPTYPE_INCLUSIVE_MIN,
51 	OPTYPE_INCLUSIVE_MAX,
52 	OPTYPE_INCLUSIVE_AND,
53 	OPTYPE_INCLUSIVE_OR,
54 	OPTYPE_INCLUSIVE_XOR,
55 	OPTYPE_EXCLUSIVE_ADD,
56 	OPTYPE_EXCLUSIVE_MUL,
57 	OPTYPE_EXCLUSIVE_MIN,
58 	OPTYPE_EXCLUSIVE_MAX,
59 	OPTYPE_EXCLUSIVE_AND,
60 	OPTYPE_EXCLUSIVE_OR,
61 	OPTYPE_EXCLUSIVE_XOR,
62 	OPTYPE_LAST
63 };
64 
checkVertexPipelineStages(std::vector<const void * > datas,deUint32 width,deUint32)65 static bool checkVertexPipelineStages(std::vector<const void*> datas,
66 									  deUint32 width, deUint32)
67 {
68 	const deUint32* data =
69 		reinterpret_cast<const deUint32*>(datas[0]);
70 	for (deUint32 x = 0; x < width; ++x)
71 	{
72 		deUint32 val = data[x];
73 
74 		if (0xFFFFFF != val)
75 		{
76 			return false;
77 		}
78 	}
79 
80 	return true;
81 }
82 
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)83 static bool checkCompute(std::vector<const void*> datas,
84 						 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
85 						 deUint32)
86 {
87 	const deUint32* data =
88 		reinterpret_cast<const deUint32*>(datas[0]);
89 
90 	for (deUint32 nX = 0; nX < numWorkgroups[0]; ++nX)
91 	{
92 		for (deUint32 nY = 0; nY < numWorkgroups[1]; ++nY)
93 		{
94 			for (deUint32 nZ = 0; nZ < numWorkgroups[2]; ++nZ)
95 			{
96 				for (deUint32 lX = 0; lX < localSize[0]; ++lX)
97 				{
98 					for (deUint32 lY = 0; lY < localSize[1]; ++lY)
99 					{
100 						for (deUint32 lZ = 0; lZ < localSize[2];
101 								++lZ)
102 						{
103 							const deUint32 globalInvocationX =
104 								nX * localSize[0] + lX;
105 							const deUint32 globalInvocationY =
106 								nY * localSize[1] + lY;
107 							const deUint32 globalInvocationZ =
108 								nZ * localSize[2] + lZ;
109 
110 							const deUint32 globalSizeX =
111 								numWorkgroups[0] * localSize[0];
112 							const deUint32 globalSizeY =
113 								numWorkgroups[1] * localSize[1];
114 
115 							const deUint32 offset =
116 								globalSizeX *
117 								((globalSizeY *
118 								  globalInvocationZ) +
119 								 globalInvocationY) +
120 								globalInvocationX;
121 
122 							if (0xFFFFFF != data[offset])
123 							{
124 								return false;
125 							}
126 						}
127 					}
128 				}
129 			}
130 		}
131 	}
132 
133 	return true;
134 }
135 
getOpTypeName(int opType)136 std::string getOpTypeName(int opType)
137 {
138 	switch (opType)
139 	{
140 		default:
141 			DE_FATAL("Unsupported op type");
142 			return "";
143 		case OPTYPE_ADD:
144 			return "subgroupAdd";
145 		case OPTYPE_MUL:
146 			return "subgroupMul";
147 		case OPTYPE_MIN:
148 			return "subgroupMin";
149 		case OPTYPE_MAX:
150 			return "subgroupMax";
151 		case OPTYPE_AND:
152 			return "subgroupAnd";
153 		case OPTYPE_OR:
154 			return "subgroupOr";
155 		case OPTYPE_XOR:
156 			return "subgroupXor";
157 		case OPTYPE_INCLUSIVE_ADD:
158 			return "subgroupInclusiveAdd";
159 		case OPTYPE_INCLUSIVE_MUL:
160 			return "subgroupInclusiveMul";
161 		case OPTYPE_INCLUSIVE_MIN:
162 			return "subgroupInclusiveMin";
163 		case OPTYPE_INCLUSIVE_MAX:
164 			return "subgroupInclusiveMax";
165 		case OPTYPE_INCLUSIVE_AND:
166 			return "subgroupInclusiveAnd";
167 		case OPTYPE_INCLUSIVE_OR:
168 			return "subgroupInclusiveOr";
169 		case OPTYPE_INCLUSIVE_XOR:
170 			return "subgroupInclusiveXor";
171 		case OPTYPE_EXCLUSIVE_ADD:
172 			return "subgroupExclusiveAdd";
173 		case OPTYPE_EXCLUSIVE_MUL:
174 			return "subgroupExclusiveMul";
175 		case OPTYPE_EXCLUSIVE_MIN:
176 			return "subgroupExclusiveMin";
177 		case OPTYPE_EXCLUSIVE_MAX:
178 			return "subgroupExclusiveMax";
179 		case OPTYPE_EXCLUSIVE_AND:
180 			return "subgroupExclusiveAnd";
181 		case OPTYPE_EXCLUSIVE_OR:
182 			return "subgroupExclusiveOr";
183 		case OPTYPE_EXCLUSIVE_XOR:
184 			return "subgroupExclusiveXor";
185 	}
186 }
187 
getOpTypeNamePartitioned(int opType)188 std::string getOpTypeNamePartitioned(int opType)
189 {
190 	switch (opType)
191 	{
192 		default:
193 			DE_FATAL("Unsupported op type");
194 			return "";
195 		case OPTYPE_ADD:
196 			return "subgroupPartitionedAddNV";
197 		case OPTYPE_MUL:
198 			return "subgroupPartitionedMulNV";
199 		case OPTYPE_MIN:
200 			return "subgroupPartitionedMinNV";
201 		case OPTYPE_MAX:
202 			return "subgroupPartitionedMaxNV";
203 		case OPTYPE_AND:
204 			return "subgroupPartitionedAndNV";
205 		case OPTYPE_OR:
206 			return "subgroupPartitionedOrNV";
207 		case OPTYPE_XOR:
208 			return "subgroupPartitionedXorNV";
209 		case OPTYPE_INCLUSIVE_ADD:
210 			return "subgroupPartitionedInclusiveAddNV";
211 		case OPTYPE_INCLUSIVE_MUL:
212 			return "subgroupPartitionedInclusiveMulNV";
213 		case OPTYPE_INCLUSIVE_MIN:
214 			return "subgroupPartitionedInclusiveMinNV";
215 		case OPTYPE_INCLUSIVE_MAX:
216 			return "subgroupPartitionedInclusiveMaxNV";
217 		case OPTYPE_INCLUSIVE_AND:
218 			return "subgroupPartitionedInclusiveAndNV";
219 		case OPTYPE_INCLUSIVE_OR:
220 			return "subgroupPartitionedInclusiveOrNV";
221 		case OPTYPE_INCLUSIVE_XOR:
222 			return "subgroupPartitionedInclusiveXorNV";
223 		case OPTYPE_EXCLUSIVE_ADD:
224 			return "subgroupPartitionedExclusiveAddNV";
225 		case OPTYPE_EXCLUSIVE_MUL:
226 			return "subgroupPartitionedExclusiveMulNV";
227 		case OPTYPE_EXCLUSIVE_MIN:
228 			return "subgroupPartitionedExclusiveMinNV";
229 		case OPTYPE_EXCLUSIVE_MAX:
230 			return "subgroupPartitionedExclusiveMaxNV";
231 		case OPTYPE_EXCLUSIVE_AND:
232 			return "subgroupPartitionedExclusiveAndNV";
233 		case OPTYPE_EXCLUSIVE_OR:
234 			return "subgroupPartitionedExclusiveOrNV";
235 		case OPTYPE_EXCLUSIVE_XOR:
236 			return "subgroupPartitionedExclusiveXorNV";
237 	}
238 }
239 
getIdentity(int opType,vk::VkFormat format)240 std::string getIdentity(int opType, vk::VkFormat format)
241 {
242 	bool isFloat = false;
243 	bool isInt = false;
244 	bool isUnsigned = false;
245 
246 	switch (format)
247 	{
248 		default:
249 			DE_FATAL("Unhandled format!");
250 			return "";
251 		case VK_FORMAT_R32_SINT:
252 		case VK_FORMAT_R32G32_SINT:
253 		case VK_FORMAT_R32G32B32_SINT:
254 		case VK_FORMAT_R32G32B32A32_SINT:
255 			isInt = true;
256 			break;
257 		case VK_FORMAT_R32_UINT:
258 		case VK_FORMAT_R32G32_UINT:
259 		case VK_FORMAT_R32G32B32_UINT:
260 		case VK_FORMAT_R32G32B32A32_UINT:
261 			isUnsigned = true;
262 			break;
263 		case VK_FORMAT_R32_SFLOAT:
264 		case VK_FORMAT_R32G32_SFLOAT:
265 		case VK_FORMAT_R32G32B32_SFLOAT:
266 		case VK_FORMAT_R32G32B32A32_SFLOAT:
267 		case VK_FORMAT_R64_SFLOAT:
268 		case VK_FORMAT_R64G64_SFLOAT:
269 		case VK_FORMAT_R64G64B64_SFLOAT:
270 		case VK_FORMAT_R64G64B64A64_SFLOAT:
271 			isFloat = true;
272 			break;
273 		case VK_FORMAT_R8_USCALED:
274 		case VK_FORMAT_R8G8_USCALED:
275 		case VK_FORMAT_R8G8B8_USCALED:
276 		case VK_FORMAT_R8G8B8A8_USCALED:
277 			break; // bool types are not anything
278 	}
279 
280 	switch (opType)
281 	{
282 		default:
283 			DE_FATAL("Unsupported op type");
284 			return "";
285 		case OPTYPE_ADD:
286 		case OPTYPE_INCLUSIVE_ADD:
287 		case OPTYPE_EXCLUSIVE_ADD:
288 			return subgroups::getFormatNameForGLSL(format) + "(0)";
289 		case OPTYPE_MUL:
290 		case OPTYPE_INCLUSIVE_MUL:
291 		case OPTYPE_EXCLUSIVE_MUL:
292 			return subgroups::getFormatNameForGLSL(format) + "(1)";
293 		case OPTYPE_MIN:
294 		case OPTYPE_INCLUSIVE_MIN:
295 		case OPTYPE_EXCLUSIVE_MIN:
296 			if (isFloat)
297 			{
298 				return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
299 			}
300 			else if (isInt)
301 			{
302 				return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
303 			}
304 			else if (isUnsigned)
305 			{
306 				return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
307 			}
308 			else
309 			{
310 				DE_FATAL("Unhandled case");
311 				return "";
312 			}
313 		case OPTYPE_MAX:
314 		case OPTYPE_INCLUSIVE_MAX:
315 		case OPTYPE_EXCLUSIVE_MAX:
316 			if (isFloat)
317 			{
318 				return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
319 			}
320 			else if (isInt)
321 			{
322 				return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
323 			}
324 			else if (isUnsigned)
325 			{
326 				return subgroups::getFormatNameForGLSL(format) + "(0)";
327 			}
328 			else
329 			{
330 				DE_FATAL("Unhandled case");
331 				return "";
332 			}
333 		case OPTYPE_AND:
334 		case OPTYPE_INCLUSIVE_AND:
335 		case OPTYPE_EXCLUSIVE_AND:
336 			return subgroups::getFormatNameForGLSL(format) + "(~0)";
337 		case OPTYPE_OR:
338 		case OPTYPE_INCLUSIVE_OR:
339 		case OPTYPE_EXCLUSIVE_OR:
340 			return subgroups::getFormatNameForGLSL(format) + "(0)";
341 		case OPTYPE_XOR:
342 		case OPTYPE_INCLUSIVE_XOR:
343 		case OPTYPE_EXCLUSIVE_XOR:
344 			return subgroups::getFormatNameForGLSL(format) + "(0)";
345 	}
346 }
347 
getCompare(int opType,vk::VkFormat format,std::string lhs,std::string rhs)348 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
349 {
350 	std::string formatName = subgroups::getFormatNameForGLSL(format);
351 	switch (format)
352 	{
353 		default:
354 			return "all(equal(" + lhs + ", " + rhs + "))";
355 		case VK_FORMAT_R8_USCALED:
356 		case VK_FORMAT_R32_UINT:
357 		case VK_FORMAT_R32_SINT:
358 			return "(" + lhs + " == " + rhs + ")";
359 		case VK_FORMAT_R32_SFLOAT:
360 		case VK_FORMAT_R64_SFLOAT:
361 			switch (opType)
362 			{
363 				default:
364 					return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
365 				case OPTYPE_MIN:
366 				case OPTYPE_INCLUSIVE_MIN:
367 				case OPTYPE_EXCLUSIVE_MIN:
368 				case OPTYPE_MAX:
369 				case OPTYPE_INCLUSIVE_MAX:
370 				case OPTYPE_EXCLUSIVE_MAX:
371 					return "(" + lhs + " == " + rhs + ")";
372 			}
373 		case VK_FORMAT_R32G32_SFLOAT:
374 		case VK_FORMAT_R32G32B32_SFLOAT:
375 		case VK_FORMAT_R32G32B32A32_SFLOAT:
376 		case VK_FORMAT_R64G64_SFLOAT:
377 		case VK_FORMAT_R64G64B64_SFLOAT:
378 		case VK_FORMAT_R64G64B64A64_SFLOAT:
379 			switch (opType)
380 			{
381 				default:
382 					return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
383 				case OPTYPE_MIN:
384 				case OPTYPE_INCLUSIVE_MIN:
385 				case OPTYPE_EXCLUSIVE_MIN:
386 				case OPTYPE_MAX:
387 				case OPTYPE_INCLUSIVE_MAX:
388 				case OPTYPE_EXCLUSIVE_MAX:
389 					return "all(equal(" + lhs + ", " + rhs + "))";
390 			}
391 	}
392 }
393 
394 struct CaseDefinition
395 {
396 	int					opType;
397 	VkShaderStageFlags	shaderStage;
398 	VkFormat			format;
399 };
400 
getTestString(const CaseDefinition & caseDef)401 string getTestString(const CaseDefinition &caseDef)
402 {
403     // NOTE: tempResult can't have anything in bits 31:24 to avoid int->float
404     // conversion overflow in framebuffer tests.
405     string fmt = subgroups::getFormatNameForGLSL(caseDef.format);
406 	string bdy =
407 		"  uint tempResult = 0;\n"
408 		"  uint id = gl_SubgroupInvocationID;\n";
409 
410     // Test the case where the partition has a single subset with all invocations in it.
411     // This should generate the same result as the non-partitioned function.
412     bdy +=
413         "  uvec4 allBallot = mask;\n"
414         "  " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
415         "  " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
416         "  if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
417         "      tempResult |= 0x1;\n"
418         "  }\n";
419 
420     // The definition of a partition doesn't forbid bits corresponding to inactive
421     // invocations being in the subset with active invocations. In other words, test that
422     // bits corresponding to inactive invocations are ignored.
423     bdy +=
424 	    "  if (0 == (gl_SubgroupInvocationID % 2)) {\n"
425         "    " + fmt + " allResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], allBallot);\n"
426         "    " + fmt + " refResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
427         "    if (" + getCompare(caseDef.opType, caseDef.format, "allResult", "refResult") + ") {\n"
428         "        tempResult |= 0x2;\n"
429         "    }\n"
430         "  } else {\n"
431         "    tempResult |= 0x2;\n"
432         "  }\n";
433 
434     // Test the case where the partition has each invocation in a unique subset. For
435     // exclusive ops, the result is identity. For reduce/inclusive, it's the original value.
436     string expectedSelfResult = "data[gl_SubgroupInvocationID]";
437     if (caseDef.opType >= OPTYPE_EXCLUSIVE_ADD &&
438         caseDef.opType <= OPTYPE_EXCLUSIVE_XOR) {
439         expectedSelfResult = getIdentity(caseDef.opType, caseDef.format);
440     }
441 
442     bdy +=
443         "  uvec4 selfBallot = subgroupPartitionNV(gl_SubgroupInvocationID);\n"
444         "  " + fmt + " selfResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], selfBallot);\n"
445         "  if (" + getCompare(caseDef.opType, caseDef.format, "selfResult", expectedSelfResult) + ") {\n"
446         "      tempResult |= 0x4;\n"
447         "  }\n";
448 
449     // Test "random" partitions based on a hash of the invocation id.
450     // This "hash" function produces interesting/randomish partitions.
451     static const char *idhash = "((id%N)+(id%(N+1))-(id%2)+(id/2))%((N+1)/2)";
452 
453     bdy +=
454 		"  for (uint N = 1; N < 16; ++N) {\n"
455 		"    " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
456 		"    uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
457 		"    " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
458 		"      for (uint i = 0; i < N; ++i) {\n"
459 		"        " + fmt + " iFmt = " + fmt + "(i);\n"
460         "        if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
461         "          " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
462         "          tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x4 << N) : 0;\n"
463         "        }\n"
464         "      }\n"
465         "  }\n"
466         // tests in flow control:
467 		"  if (1 == (gl_SubgroupInvocationID % 2)) {\n"
468         "    for (uint N = 1; N < 7; ++N) {\n"
469 		"      " + fmt + " idhashFmt = " + fmt + "(" + idhash + ");\n"
470 		"      uvec4 partitionBallot = subgroupPartitionNV(idhashFmt) & mask;\n"
471         "      " + fmt + " partitionedResult = " + getOpTypeNamePartitioned(caseDef.opType) + "(data[gl_SubgroupInvocationID], partitionBallot);\n"
472         "        for (uint i = 0; i < N; ++i) {\n"
473 		"          " + fmt + " iFmt = " + fmt + "(i);\n"
474         "          if (" + getCompare(caseDef.opType, caseDef.format, "idhashFmt", "iFmt") + ") {\n"
475         "            " + fmt + " subsetResult = " + getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID]);\n"
476         "            tempResult |= " + getCompare(caseDef.opType, caseDef.format, "partitionedResult", "subsetResult") + " ? (0x20000 << N) : 0;\n"
477         "          }\n"
478         "        }\n"
479         "    }\n"
480         "  } else {\n"
481         "    tempResult |= 0xFC0000;\n"
482         "  }\n"
483         ;
484 
485     return bdy;
486 }
487 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)488 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
489 {
490 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
491 	std::ostringstream				bdy;
492 
493 	subgroups::setFragmentShaderFrameBuffer(programCollection);
494 
495 	if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
496 		subgroups::setVertexShaderFrameBuffer(programCollection);
497 
498 	bdy << getTestString(caseDef);
499 
500 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
501 	{
502 		std::ostringstream vertexSrc;
503 		vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
504 			<< "#extension GL_NV_shader_subgroup_partitioned: enable\n"
505 			<< "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
506 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
507 			<< "layout(location = 0) in highp vec4 in_position;\n"
508 			<< "layout(location = 0) out float out_color;\n"
509 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
510 			<< "{\n"
511 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
512 			<< "};\n"
513 			<< "\n"
514 			<< "void main (void)\n"
515 			<< "{\n"
516 			<< "  uvec4 mask = subgroupBallot(true);\n"
517 			<< bdy.str()
518 			<< "  out_color = float(tempResult);\n"
519 			<< "  gl_Position = in_position;\n"
520 			<< "  gl_PointSize = 1.0f;\n"
521 			<< "}\n";
522 		programCollection.glslSources.add("vert")
523 			<< glu::VertexSource(vertexSrc.str()) << buildOptions;
524 	}
525 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
526 	{
527 		std::ostringstream geometry;
528 
529 		geometry << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
530 			<< "#extension GL_NV_shader_subgroup_partitioned: enable\n"
531 			<< "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
532 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
533 			<< "layout(points) in;\n"
534 			<< "layout(points, max_vertices = 1) out;\n"
535 			<< "layout(location = 0) out float out_color;\n"
536 			<< "layout(set = 0, binding = 0) uniform Buffer\n"
537 			<< "{\n"
538 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
539 			<< "};\n"
540 			<< "\n"
541 			<< "void main (void)\n"
542 			<< "{\n"
543 			<< "  uvec4 mask = subgroupBallot(true);\n"
544 			<< bdy.str()
545 			<< "  out_color = float(tempResult);\n"
546 			<< "  gl_Position = gl_in[0].gl_Position;\n"
547 			<< "  EmitVertex();\n"
548 			<< "  EndPrimitive();\n"
549 			<< "}\n";
550 
551 		programCollection.glslSources.add("geometry")
552 				<< glu::GeometrySource(geometry.str()) << buildOptions;
553 	}
554 	else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
555 	{
556 		std::ostringstream controlSource;
557 		controlSource  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
558 			<< "#extension GL_NV_shader_subgroup_partitioned: enable\n"
559 			<< "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
560 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
561 			<< "layout(vertices = 2) out;\n"
562 			<< "layout(location = 0) out float out_color[];\n"
563 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
564 			<< "{\n"
565 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
566 			<< "};\n"
567 			<< "\n"
568 			<< "void main (void)\n"
569 			<< "{\n"
570 			<< "  if (gl_InvocationID == 0)\n"
571 			<<"  {\n"
572 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
573 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
574 			<< "  }\n"
575 			<< "  uvec4 mask = subgroupBallot(true);\n"
576 			<< bdy.str()
577 			<< "  out_color[gl_InvocationID] = float(tempResult);"
578 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
579 			<< "}\n";
580 
581 
582 		programCollection.glslSources.add("tesc")
583 			<< glu::TessellationControlSource(controlSource.str()) << buildOptions;
584 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
585 	}
586 	else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
587 	{
588 
589 		std::ostringstream evaluationSource;
590 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
591 			<< "#extension GL_NV_shader_subgroup_partitioned: enable\n"
592 			<< "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
593 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
594 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
595 			<< "layout(location = 0) out float out_color;\n"
596 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
597 			<< "{\n"
598 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
599 			<< "};\n"
600 			<< "\n"
601 			<< "void main (void)\n"
602 			<< "{\n"
603 			<< "  uvec4 mask = subgroupBallot(true);\n"
604 			<< bdy.str()
605 			<< "  out_color = float(tempResult);\n"
606 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
607 			<< "}\n";
608 
609 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
610 		programCollection.glslSources.add("tese") << glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
611 	}
612 	else
613 	{
614 		DE_FATAL("Unsupported shader stage");
615 	}
616 }
617 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)618 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
619 {
620 	const string bdy = getTestString(caseDef);
621 
622 	if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
623 	{
624 		std::ostringstream src;
625 
626 		src << "#version 450\n"
627 			<< "#extension GL_NV_shader_subgroup_partitioned: enable\n"
628 			<< "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
629 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
630 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
631 			"local_size_z_id = 2) in;\n"
632 			<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
633 			<< "{\n"
634 			<< "  uint result[];\n"
635 			<< "};\n"
636 			<< "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
637 			<< "{\n"
638 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
639 			<< "};\n"
640 			<< "\n"
641 			<< "void main (void)\n"
642 			<< "{\n"
643 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
644 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
645 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
646 			"gl_GlobalInvocationID.x;\n"
647 			<< "  uvec4 mask = subgroupBallot(true);\n"
648 			<< bdy
649 			<< "  result[offset] = tempResult;\n"
650 			<< "}\n";
651 
652 		programCollection.glslSources.add("comp")
653 				<< glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
654 	}
655 	else
656 	{
657 		{
658 			const std::string vertex =
659 				"#version 450\n"
660 				"#extension GL_NV_shader_subgroup_partitioned: enable\n"
661 			    "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
662 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
663 				"layout(set = 0, binding = 0, std430) buffer Buffer1\n"
664 				"{\n"
665 				"  uint result[];\n"
666 				"};\n"
667 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
668 				"{\n"
669 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
670 				"};\n"
671 				"\n"
672 				"void main (void)\n"
673 				"{\n"
674 				"  uvec4 mask = subgroupBallot(true);\n"
675 				+ bdy+
676 				"  result[gl_VertexIndex] = tempResult;\n"
677 				"  float pixelSize = 2.0f/1024.0f;\n"
678 				"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
679 				"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
680 				"  gl_PointSize = 1.0f;\n"
681 				"}\n";
682 			programCollection.glslSources.add("vert")
683 					<< glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
684 		}
685 
686 		{
687 			const std::string tesc =
688 				"#version 450\n"
689 				"#extension GL_NV_shader_subgroup_partitioned: enable\n"
690 			    "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
691 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
692 				"layout(vertices=1) out;\n"
693 				"layout(set = 0, binding = 1, std430) buffer Buffer1\n"
694 				"{\n"
695 				"  uint result[];\n"
696 				"};\n"
697 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
698 				"{\n"
699 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
700 				"};\n"
701 				"\n"
702 				"void main (void)\n"
703 				"{\n"
704 				"  uvec4 mask = subgroupBallot(true);\n"
705 				+ bdy +
706 				"  result[gl_PrimitiveID] = tempResult;\n"
707 				"  if (gl_InvocationID == 0)\n"
708 				"  {\n"
709 				"    gl_TessLevelOuter[0] = 1.0f;\n"
710 				"    gl_TessLevelOuter[1] = 1.0f;\n"
711 				"  }\n"
712 				"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
713 				"}\n";
714 			programCollection.glslSources.add("tesc")
715 				<< glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
716 		}
717 
718 		{
719 			const std::string tese =
720 				"#version 450\n"
721 				"#extension GL_NV_shader_subgroup_partitioned: enable\n"
722 			    "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
723 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
724 				"layout(isolines) in;\n"
725 				"layout(set = 0, binding = 2, std430) buffer Buffer1\n"
726 				"{\n"
727 				"  uint result[];\n"
728 				"};\n"
729 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
730 				"{\n"
731 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
732 				"};\n"
733 				"\n"
734 				"void main (void)\n"
735 				"{\n"
736 				"  uvec4 mask = subgroupBallot(true);\n"
737 				+ bdy +
738 				"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult;\n"
739 				"  float pixelSize = 2.0f/1024.0f;\n"
740 				"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
741 				"}\n";
742 			programCollection.glslSources.add("tese")
743 				<< glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
744 		}
745 
746 		{
747 			const std::string geometry =
748 				"#version 450\n"
749 				"#extension GL_NV_shader_subgroup_partitioned: enable\n"
750 			    "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
751 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
752 				"layout(${TOPOLOGY}) in;\n"
753 				"layout(points, max_vertices = 1) out;\n"
754 				"layout(set = 0, binding = 3, std430) buffer Buffer1\n"
755 				"{\n"
756 				"  uint result[];\n"
757 				"};\n"
758 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
759 				"{\n"
760 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
761 				"};\n"
762 				"\n"
763 				"void main (void)\n"
764 				"{\n"
765 				"  uvec4 mask = subgroupBallot(true);\n"
766 				 + bdy +
767 				"  result[gl_PrimitiveIDIn] = tempResult;\n"
768 				"  gl_Position = gl_in[0].gl_Position;\n"
769 				"  EmitVertex();\n"
770 				"  EndPrimitive();\n"
771 				"}\n";
772 			subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u),
773 													  programCollection.glslSources);
774 		}
775 
776 		{
777 			const std::string fragment =
778 				"#version 450\n"
779 				"#extension GL_NV_shader_subgroup_partitioned: enable\n"
780 			    "#extension GL_KHR_shader_subgroup_arithmetic: enable\n"
781 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
782 				"layout(location = 0) out uint result;\n"
783 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
784 				"{\n"
785 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
786 				"};\n"
787 				"void main (void)\n"
788 				"{\n"
789 				"  uvec4 mask = subgroupBallot(true);\n"
790 				+ bdy +
791 				"  result = tempResult;\n"
792 				"}\n";
793 			programCollection.glslSources.add("fragment")
794 				<< glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
795 		}
796 		subgroups::addNoSubgroupShader(programCollection);
797 	}
798 }
799 
supportedCheck(Context & context,CaseDefinition caseDef)800 void supportedCheck (Context& context, CaseDefinition caseDef)
801 {
802 	if (!subgroups::isSubgroupSupported(context))
803 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
804 
805 	if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV))
806 	{
807 		TCU_THROW(NotSupportedError, "Device does not support subgroup partitioned operations");
808 	}
809 
810 	if (subgroups::isDoubleFormat(caseDef.format) &&
811 			!subgroups::isDoubleSupportedForDevice(context))
812 	{
813 		TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
814 	}
815 }
816 
noSSBOtest(Context & context,const CaseDefinition caseDef)817 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
818 {
819 	if (!subgroups::areSubgroupOperationsSupportedForStage(
820 				context, caseDef.shaderStage))
821 	{
822 		if (subgroups::areSubgroupOperationsRequiredForStage(
823 					caseDef.shaderStage))
824 		{
825 			return tcu::TestStatus::fail(
826 					   "Shader stage " +
827 					   subgroups::getShaderStageName(caseDef.shaderStage) +
828 					   " is required to support subgroup operations!");
829 		}
830 		else
831 		{
832 			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
833 		}
834 	}
835 
836 	subgroups::SSBOData inputData;
837 	inputData.format = caseDef.format;
838 	inputData.layout = subgroups::SSBOData::LayoutStd140;
839 	inputData.numElements = subgroups::maxSupportedSubgroupSize();
840 	inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
841 
842 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
843 		return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
844 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
845 		return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
846 	else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
847 		return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
848 	else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
849 		return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
850 	else
851 		TCU_THROW(InternalError, "Unhandled shader stage");
852 }
853 
checkShaderStages(Context & context,const CaseDefinition & caseDef)854 bool checkShaderStages (Context& context, const CaseDefinition& caseDef)
855 {
856 	if (!subgroups::areSubgroupOperationsSupportedForStage(
857 				context, caseDef.shaderStage))
858 	{
859 		if (subgroups::areSubgroupOperationsRequiredForStage(
860 					caseDef.shaderStage))
861 		{
862 			return false;
863 		}
864 		else
865 		{
866 			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
867 		}
868 	}
869 	return true;
870 }
871 
test(Context & context,const CaseDefinition caseDef)872 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
873 {
874 	if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
875 	{
876 		if(!checkShaderStages(context,caseDef))
877 		{
878 			return tcu::TestStatus::fail(
879 							"Shader stage " +
880 							subgroups::getShaderStageName(caseDef.shaderStage) +
881 							" is required to support subgroup operations!");
882 		}
883 		subgroups::SSBOData inputData;
884 		inputData.format = caseDef.format;
885 		inputData.layout = subgroups::SSBOData::LayoutStd430;
886 		inputData.numElements = subgroups::maxSupportedSubgroupSize();
887 		inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
888 
889 		return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
890 	}
891 	else
892 	{
893 		VkPhysicalDeviceSubgroupProperties subgroupProperties;
894 		subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
895 		subgroupProperties.pNext = DE_NULL;
896 
897 		VkPhysicalDeviceProperties2 properties;
898 		properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
899 		properties.pNext = &subgroupProperties;
900 
901 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
902 
903 		VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
904 
905 		if ( VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
906 		{
907 			if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
908 				TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
909 			else
910 				stages = VK_SHADER_STAGE_FRAGMENT_BIT;
911 		}
912 
913 		if ((VkShaderStageFlagBits)0u == stages)
914 			TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
915 
916 		subgroups::SSBOData inputData;
917 		inputData.format			= caseDef.format;
918 		inputData.layout			= subgroups::SSBOData::LayoutStd430;
919 		inputData.numElements		= subgroups::maxSupportedSubgroupSize();
920 		inputData.initializeType	= subgroups::SSBOData::InitializeNonZero;
921 		inputData.binding			= 4u;
922 		inputData.stages			= stages;
923 
924 		return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData,
925 										 1, checkVertexPipelineStages, stages);
926 	}
927 }
928 }
929 
930 namespace vkt
931 {
932 namespace subgroups
933 {
createSubgroupsPartitionedTests(tcu::TestContext & testCtx)934 tcu::TestCaseGroup* createSubgroupsPartitionedTests(tcu::TestContext& testCtx)
935 {
936 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
937 			testCtx, "partitioned", "NV_shader_subgroup_partitioned category tests"));
938 
939 	const VkShaderStageFlags stages[] =
940 	{
941 		VK_SHADER_STAGE_VERTEX_BIT,
942 		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
943 		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
944 		VK_SHADER_STAGE_GEOMETRY_BIT,
945 	};
946 
947 	const VkFormat formats[] =
948 	{
949 		VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
950 		VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
951 		VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
952 		VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
953 		VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
954 		VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
955 		VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
956 		VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
957 		VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
958 	};
959 
960 	for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
961 	{
962 		const VkFormat format = formats[formatIndex];
963 
964 		for (int opTypeIndex = 0; opTypeIndex < OPTYPE_LAST; ++opTypeIndex)
965 		{
966 			bool isBool = false;
967 			bool isFloat = false;
968 
969 			switch (format)
970 			{
971 				default:
972 					break;
973 				case VK_FORMAT_R32_SFLOAT:
974 				case VK_FORMAT_R32G32_SFLOAT:
975 				case VK_FORMAT_R32G32B32_SFLOAT:
976 				case VK_FORMAT_R32G32B32A32_SFLOAT:
977 				case VK_FORMAT_R64_SFLOAT:
978 				case VK_FORMAT_R64G64_SFLOAT:
979 				case VK_FORMAT_R64G64B64_SFLOAT:
980 				case VK_FORMAT_R64G64B64A64_SFLOAT:
981 					isFloat = true;
982 					break;
983 				case VK_FORMAT_R8_USCALED:
984 				case VK_FORMAT_R8G8_USCALED:
985 				case VK_FORMAT_R8G8B8_USCALED:
986 				case VK_FORMAT_R8G8B8A8_USCALED:
987 					isBool = true;
988 					break;
989 			}
990 
991 			bool isBitwiseOp = false;
992 
993 			switch (opTypeIndex)
994 			{
995 				default:
996 					break;
997 				case OPTYPE_AND:
998 				case OPTYPE_INCLUSIVE_AND:
999 				case OPTYPE_EXCLUSIVE_AND:
1000 				case OPTYPE_OR:
1001 				case OPTYPE_INCLUSIVE_OR:
1002 				case OPTYPE_EXCLUSIVE_OR:
1003 				case OPTYPE_XOR:
1004 				case OPTYPE_INCLUSIVE_XOR:
1005 				case OPTYPE_EXCLUSIVE_XOR:
1006 					isBitwiseOp = true;
1007 					break;
1008 			}
1009 
1010 			if (isFloat && isBitwiseOp)
1011 			{
1012 				// Skip float with bitwise category.
1013 				continue;
1014 			}
1015 
1016 			if (isBool && !isBitwiseOp)
1017 			{
1018 				// Skip bool when its not the bitwise category.
1019 				continue;
1020 			}
1021 			std::string op = getOpTypeName(opTypeIndex);
1022 
1023 			{
1024 				const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
1025 				addFunctionCaseWithPrograms(group.get(),
1026 											de::toLower(op) + "_" +
1027 											subgroups::getFormatNameForGLSL(format) +
1028 											"_" + getShaderStageName(caseDef.shaderStage),
1029 											"", supportedCheck, initPrograms, test, caseDef);
1030 			}
1031 
1032 			{
1033 				const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
1034 				addFunctionCaseWithPrograms(group.get(),
1035 											de::toLower(op) + "_" +
1036 											subgroups::getFormatNameForGLSL(format) +
1037 											"_graphic",
1038 											"", supportedCheck, initPrograms, test, caseDef);
1039 			}
1040 
1041 			for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
1042 			{
1043 				const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
1044 				addFunctionCaseWithPrograms(group.get(), de::toLower(op) + "_" + subgroups::getFormatNameForGLSL(format) +
1045 											"_" + getShaderStageName(caseDef.shaderStage) + "_framebuffer", "",
1046 											supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
1047 			}
1048 		}
1049 	}
1050 
1051 	return group.release();
1052 }
1053 
1054 } // subgroups
1055 } // vkt
1056 
1057