1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief Subgroups Tests
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsClusteredTests.hpp"
26 #include "vktSubgroupsTestsUtils.hpp"
27 
28 #include <string>
29 #include <vector>
30 
31 using namespace tcu;
32 using namespace std;
33 using namespace vk;
34 using namespace vkt;
35 
36 namespace
37 {
38 enum OpType
39 {
40 	OPTYPE_CLUSTERED_ADD = 0,
41 	OPTYPE_CLUSTERED_MUL,
42 	OPTYPE_CLUSTERED_MIN,
43 	OPTYPE_CLUSTERED_MAX,
44 	OPTYPE_CLUSTERED_AND,
45 	OPTYPE_CLUSTERED_OR,
46 	OPTYPE_CLUSTERED_XOR,
47 	OPTYPE_CLUSTERED_LAST
48 };
49 
checkVertexPipelineStages(std::vector<const void * > datas,deUint32 width,deUint32)50 static bool checkVertexPipelineStages(std::vector<const void*> datas,
51 									  deUint32 width, deUint32)
52 {
53 	return vkt::subgroups::check(datas, width, 1);
54 }
55 
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32)56 static bool checkCompute(std::vector<const void*> datas,
57 						 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
58 						 deUint32)
59 {
60 	return vkt::subgroups::checkCompute(datas, numWorkgroups, localSize, 1);
61 }
62 
getOpTypeName(int opType)63 std::string getOpTypeName(int opType)
64 {
65 	switch (opType)
66 	{
67 		default:
68 			DE_FATAL("Unsupported op type");
69 			return "";
70 		case OPTYPE_CLUSTERED_ADD:
71 			return "subgroupClusteredAdd";
72 		case OPTYPE_CLUSTERED_MUL:
73 			return "subgroupClusteredMul";
74 		case OPTYPE_CLUSTERED_MIN:
75 			return "subgroupClusteredMin";
76 		case OPTYPE_CLUSTERED_MAX:
77 			return "subgroupClusteredMax";
78 		case OPTYPE_CLUSTERED_AND:
79 			return "subgroupClusteredAnd";
80 		case OPTYPE_CLUSTERED_OR:
81 			return "subgroupClusteredOr";
82 		case OPTYPE_CLUSTERED_XOR:
83 			return "subgroupClusteredXor";
84 	}
85 }
86 
getOpTypeOperation(int opType,vk::VkFormat format,std::string lhs,std::string rhs)87 std::string getOpTypeOperation(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
88 {
89 	switch (opType)
90 	{
91 		default:
92 			DE_FATAL("Unsupported op type");
93 			return "";
94 		case OPTYPE_CLUSTERED_ADD:
95 			return lhs + " + " + rhs;
96 		case OPTYPE_CLUSTERED_MUL:
97 			return lhs + " * " + rhs;
98 		case OPTYPE_CLUSTERED_MIN:
99 			switch (format)
100 			{
101 				default:
102 					return "min(" + lhs + ", " + rhs + ")";
103 				case VK_FORMAT_R32_SFLOAT:
104 				case VK_FORMAT_R64_SFLOAT:
105 					return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : min(" + lhs + ", " + rhs + ")))";
106 				case VK_FORMAT_R32G32_SFLOAT:
107 				case VK_FORMAT_R32G32B32_SFLOAT:
108 				case VK_FORMAT_R32G32B32A32_SFLOAT:
109 				case VK_FORMAT_R64G64_SFLOAT:
110 				case VK_FORMAT_R64G64B64_SFLOAT:
111 				case VK_FORMAT_R64G64B64A64_SFLOAT:
112 					return "mix(mix(min(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
113 			}
114 		case OPTYPE_CLUSTERED_MAX:
115 			switch (format)
116 			{
117 				default:
118 					return "max(" + lhs + ", " + rhs + ")";
119 				case VK_FORMAT_R32_SFLOAT:
120 				case VK_FORMAT_R64_SFLOAT:
121 					return "(isnan(" + lhs + ") ? " + rhs + " : (isnan(" + rhs + ") ? " + lhs + " : max(" + lhs + ", " + rhs + ")))";
122 				case VK_FORMAT_R32G32_SFLOAT:
123 				case VK_FORMAT_R32G32B32_SFLOAT:
124 				case VK_FORMAT_R32G32B32A32_SFLOAT:
125 				case VK_FORMAT_R64G64_SFLOAT:
126 				case VK_FORMAT_R64G64B64_SFLOAT:
127 				case VK_FORMAT_R64G64B64A64_SFLOAT:
128 					return "mix(mix(max(" + lhs + ", " + rhs + "), " + lhs + ", isnan(" + rhs + ")), " + rhs + ", isnan(" + lhs + "))";
129 			}
130 		case OPTYPE_CLUSTERED_AND:
131 			switch (format)
132 			{
133 				default:
134 					return lhs + " & " + rhs;
135 				case VK_FORMAT_R8_USCALED:
136 					return lhs + " && " + rhs;
137 				case VK_FORMAT_R8G8_USCALED:
138 					return "bvec2(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y)";
139 				case VK_FORMAT_R8G8B8_USCALED:
140 					return "bvec3(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z)";
141 				case VK_FORMAT_R8G8B8A8_USCALED:
142 					return "bvec4(" + lhs + ".x && " + rhs + ".x, " + lhs + ".y && " + rhs + ".y, " + lhs + ".z && " + rhs + ".z, " + lhs + ".w && " + rhs + ".w)";
143 			}
144 		case OPTYPE_CLUSTERED_OR:
145 			switch (format)
146 			{
147 				default:
148 					return lhs + " | " + rhs;
149 				case VK_FORMAT_R8_USCALED:
150 					return lhs + " || " + rhs;
151 				case VK_FORMAT_R8G8_USCALED:
152 					return "bvec2(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y)";
153 				case VK_FORMAT_R8G8B8_USCALED:
154 					return "bvec3(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z)";
155 				case VK_FORMAT_R8G8B8A8_USCALED:
156 					return "bvec4(" + lhs + ".x || " + rhs + ".x, " + lhs + ".y || " + rhs + ".y, " + lhs + ".z || " + rhs + ".z, " + lhs + ".w || " + rhs + ".w)";
157 			}
158 		case OPTYPE_CLUSTERED_XOR:
159 			switch (format)
160 			{
161 				default:
162 					return lhs + " ^ " + rhs;
163 				case VK_FORMAT_R8_USCALED:
164 					return lhs + " ^^ " + rhs;
165 				case VK_FORMAT_R8G8_USCALED:
166 					return "bvec2(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y)";
167 				case VK_FORMAT_R8G8B8_USCALED:
168 					return "bvec3(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z)";
169 				case VK_FORMAT_R8G8B8A8_USCALED:
170 					return "bvec4(" + lhs + ".x ^^ " + rhs + ".x, " + lhs + ".y ^^ " + rhs + ".y, " + lhs + ".z ^^ " + rhs + ".z, " + lhs + ".w ^^ " + rhs + ".w)";
171 			}
172 	}
173 }
174 
getIdentity(int opType,vk::VkFormat format)175 std::string getIdentity(int opType, vk::VkFormat format)
176 {
177 	bool isFloat = false;
178 	bool isInt = false;
179 	bool isUnsigned = false;
180 
181 	switch (format)
182 	{
183 		default:
184 			DE_FATAL("Unhandled format!");
185 			break;
186 		case VK_FORMAT_R32_SINT:
187 		case VK_FORMAT_R32G32_SINT:
188 		case VK_FORMAT_R32G32B32_SINT:
189 		case VK_FORMAT_R32G32B32A32_SINT:
190 			isInt = true;
191 			break;
192 		case VK_FORMAT_R32_UINT:
193 		case VK_FORMAT_R32G32_UINT:
194 		case VK_FORMAT_R32G32B32_UINT:
195 		case VK_FORMAT_R32G32B32A32_UINT:
196 			isUnsigned = true;
197 			break;
198 		case VK_FORMAT_R32_SFLOAT:
199 		case VK_FORMAT_R32G32_SFLOAT:
200 		case VK_FORMAT_R32G32B32_SFLOAT:
201 		case VK_FORMAT_R32G32B32A32_SFLOAT:
202 		case VK_FORMAT_R64_SFLOAT:
203 		case VK_FORMAT_R64G64_SFLOAT:
204 		case VK_FORMAT_R64G64B64_SFLOAT:
205 		case VK_FORMAT_R64G64B64A64_SFLOAT:
206 			isFloat = true;
207 			break;
208 		case VK_FORMAT_R8_USCALED:
209 		case VK_FORMAT_R8G8_USCALED:
210 		case VK_FORMAT_R8G8B8_USCALED:
211 		case VK_FORMAT_R8G8B8A8_USCALED:
212 			break; // bool types are not anything
213 	}
214 
215 	switch (opType)
216 	{
217 		default:
218 			DE_FATAL("Unsupported op type");
219 			return "";
220 		case OPTYPE_CLUSTERED_ADD:
221 			return subgroups::getFormatNameForGLSL(format) + "(0)";
222 		case OPTYPE_CLUSTERED_MUL:
223 			return subgroups::getFormatNameForGLSL(format) + "(1)";
224 		case OPTYPE_CLUSTERED_MIN:
225 			if (isFloat)
226 			{
227 				return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0x7f800000))";
228 			}
229 			else if (isInt)
230 			{
231 				return subgroups::getFormatNameForGLSL(format) + "(0x7fffffff)";
232 			}
233 			else if (isUnsigned)
234 			{
235 				return subgroups::getFormatNameForGLSL(format) + "(0xffffffffu)";
236 			}
237 			else
238 			{
239 				DE_FATAL("Unhandled case");
240 				return "";
241 			}
242 		case OPTYPE_CLUSTERED_MAX:
243 			if (isFloat)
244 			{
245 				return subgroups::getFormatNameForGLSL(format) + "(intBitsToFloat(0xff800000))";
246 			}
247 			else if (isInt)
248 			{
249 				return subgroups::getFormatNameForGLSL(format) + "(0x80000000)";
250 			}
251 			else if (isUnsigned)
252 			{
253 				return subgroups::getFormatNameForGLSL(format) + "(0)";
254 			}
255 			else
256 			{
257 				DE_FATAL("Unhandled case");
258 				return "";
259 			}
260 		case OPTYPE_CLUSTERED_AND:
261 			return subgroups::getFormatNameForGLSL(format) + "(~0)";
262 		case OPTYPE_CLUSTERED_OR:
263 			return subgroups::getFormatNameForGLSL(format) + "(0)";
264 		case OPTYPE_CLUSTERED_XOR:
265 			return subgroups::getFormatNameForGLSL(format) + "(0)";
266 	}
267 }
268 
getCompare(int opType,vk::VkFormat format,std::string lhs,std::string rhs)269 std::string getCompare(int opType, vk::VkFormat format, std::string lhs, std::string rhs)
270 {
271 	std::string formatName = subgroups::getFormatNameForGLSL(format);
272 	switch (format)
273 	{
274 		default:
275 			return "all(equal(" + lhs + ", " + rhs + "))";
276 		case VK_FORMAT_R8_USCALED:
277 		case VK_FORMAT_R32_UINT:
278 		case VK_FORMAT_R32_SINT:
279 			return "(" + lhs + " == " + rhs + ")";
280 		case VK_FORMAT_R32_SFLOAT:
281 		case VK_FORMAT_R64_SFLOAT:
282 			switch (opType)
283 			{
284 				default:
285 					return "(abs(" + lhs + " - " + rhs + ") < 0.00001)";
286 				case OPTYPE_CLUSTERED_MIN:
287 				case OPTYPE_CLUSTERED_MAX:
288 					return "(" + lhs + " == " + rhs + ")";
289 			}
290 		case VK_FORMAT_R32G32_SFLOAT:
291 		case VK_FORMAT_R32G32B32_SFLOAT:
292 		case VK_FORMAT_R32G32B32A32_SFLOAT:
293 		case VK_FORMAT_R64G64_SFLOAT:
294 		case VK_FORMAT_R64G64B64_SFLOAT:
295 		case VK_FORMAT_R64G64B64A64_SFLOAT:
296 			switch (opType)
297 			{
298 				default:
299 					return "all(lessThan(abs(" + lhs + " - " + rhs + "), " + formatName + "(0.00001)))";
300 				case OPTYPE_CLUSTERED_MIN:
301 				case OPTYPE_CLUSTERED_MAX:
302 					return "all(equal(" + lhs + ", " + rhs + "))";
303 			}
304 	}
305 }
306 
307 struct CaseDefinition
308 {
309 	int					opType;
310 	VkShaderStageFlags	shaderStage;
311 	VkFormat			format;
312 };
313 
getBodySource(CaseDefinition caseDef)314 std::string getBodySource(CaseDefinition caseDef)
315 {
316 	std::ostringstream bdy;
317 	bdy << "  bool tempResult = true;\n";
318 
319 	for (deUint32 i = 1; i <= subgroups::maxSupportedSubgroupSize(); i *= 2)
320 	{
321 		bdy	<< "  {\n"
322 			<< "    const uint clusterSize = " << i << ";\n"
323 			<< "    if (clusterSize <= gl_SubgroupSize)\n"
324 			<< "    {\n"
325 			<< "      " << subgroups::getFormatNameForGLSL(caseDef.format) << " op = "
326 			<< getOpTypeName(caseDef.opType) + "(data[gl_SubgroupInvocationID], clusterSize);\n"
327 			<< "      for (uint clusterOffset = 0; clusterOffset < gl_SubgroupSize; clusterOffset += clusterSize)\n"
328 			<< "      {\n"
329 			<< "        " << subgroups::getFormatNameForGLSL(caseDef.format) << " ref = "
330 			<< getIdentity(caseDef.opType, caseDef.format) << ";\n"
331 			<< "        for (uint index = clusterOffset; index < (clusterOffset + clusterSize); index++)\n"
332 			<< "        {\n"
333 			<< "          if (subgroupBallotBitExtract(mask, index))\n"
334 			<< "          {\n"
335 			<< "            ref = " << getOpTypeOperation(caseDef.opType, caseDef.format, "ref", "data[index]") << ";\n"
336 			<< "          }\n"
337 			<< "        }\n"
338 			<< "        if ((clusterOffset <= gl_SubgroupInvocationID) && (gl_SubgroupInvocationID < (clusterOffset + clusterSize)))\n"
339 			<< "        {\n"
340 			<< "          if (!" << getCompare(caseDef.opType, caseDef.format, "ref", "op") << ")\n"
341 			<< "          {\n"
342 			<< "            tempResult = false;\n"
343 			<< "          }\n"
344 			<< "        }\n"
345 			<< "      }\n"
346 			<< "    }\n"
347 			<< "  }\n";
348 	}
349 	return bdy.str();
350 }
351 
initFrameBufferPrograms(SourceCollections & programCollection,CaseDefinition caseDef)352 void initFrameBufferPrograms (SourceCollections& programCollection, CaseDefinition caseDef)
353 {
354 	const vk::ShaderBuildOptions	buildOptions	(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
355 
356 	subgroups::setFragmentShaderFrameBuffer(programCollection);
357 
358 	if (VK_SHADER_STAGE_VERTEX_BIT != caseDef.shaderStage)
359 		subgroups::setVertexShaderFrameBuffer(programCollection);
360 
361 	std::string bdy = getBodySource(caseDef);
362 
363 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
364 	{
365 		std::ostringstream				vertexSrc;
366 		vertexSrc << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450 )<< "\n"
367 			<< "#extension GL_KHR_shader_subgroup_clustered: enable\n"
368 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
369 			<< "layout(location = 0) in highp vec4 in_position;\n"
370 			<< "layout(location = 0) out float out_color;\n"
371 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
372 			<< "{\n"
373 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
374 			<< "};\n"
375 			<< "\n"
376 			<< "void main (void)\n"
377 			<< "{\n"
378 			<< "  uvec4 mask = subgroupBallot(true);\n"
379 			<< bdy
380 			<< "  out_color = float(tempResult ? 1 : 0);\n"
381 			<< "  gl_Position = in_position;\n"
382 			<< "  gl_PointSize = 1.0f;\n"
383 			<< "}\n";
384 		programCollection.glslSources.add("vert")
385 			<< glu::VertexSource(vertexSrc.str()) <<buildOptions;
386 	}
387 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
388 	{
389 		std::ostringstream geometry;
390 
391 		geometry  << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
392 			<< "#extension GL_KHR_shader_subgroup_clustered: enable\n"
393 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
394 			<< "layout(points) in;\n"
395 			<< "layout(points, max_vertices = 1) out;\n"
396 			<< "layout(location = 0) out float out_color;\n"
397 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
398 			<< "{\n"
399 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
400 			<< "};\n"
401 			<< "\n"
402 			<< "void main (void)\n"
403 			<< "{\n"
404 			<< "  uvec4 mask = subgroupBallot(true);\n"
405 			<< bdy
406 			<< "  out_color = tempResult ? 1.0 : 0.0;\n"
407 			<< "  gl_Position = gl_in[0].gl_Position;\n"
408 			<< "  EmitVertex();\n"
409 			<< "  EndPrimitive();\n"
410 			<< "}\n";
411 
412 		programCollection.glslSources.add("geometry")
413 			<< glu::GeometrySource(geometry.str()) << buildOptions;
414 	}
415 	else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
416 	{
417 		std::ostringstream controlSource;
418 
419 		controlSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
420 			<< "#extension GL_KHR_shader_subgroup_clustered: enable\n"
421 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
422 			<< "layout(vertices = 2) out;\n"
423 			<< "layout(location = 0) out float out_color[];\n"
424 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
425 			<< "{\n"
426 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
427 			<< "};\n"
428 			<< "\n"
429 			<< "void main (void)\n"
430 			<< "{\n"
431 			<< "  if (gl_InvocationID == 0)\n"
432 			<<"  {\n"
433 			<< "    gl_TessLevelOuter[0] = 1.0f;\n"
434 			<< "    gl_TessLevelOuter[1] = 1.0f;\n"
435 			<< "  }\n"
436 			<< "  uvec4 mask = subgroupBallot(true);\n"
437 			<< bdy
438 			<< "  out_color[gl_InvocationID] = tempResult ? 1.0 : 0.0;\n"
439 			<< "  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
440 			<< "}\n";
441 
442 		programCollection.glslSources.add("tesc")
443 			<< glu::TessellationControlSource(controlSource.str()) << buildOptions;
444 		subgroups::setTesEvalShaderFrameBuffer(programCollection);
445 	}
446 	else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
447 	{
448 		std::ostringstream evaluationSource;
449 
450 		evaluationSource << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450)<<"\n"
451 			<< "#extension GL_KHR_shader_subgroup_clustered: enable\n"
452 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
453 			<< "layout(isolines, equal_spacing, ccw ) in;\n"
454 			<< "layout(location = 0) out float out_color;\n"
455 			<< "layout(set = 0, binding = 0) uniform Buffer1\n"
456 			<< "{\n"
457 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[" << subgroups::maxSupportedSubgroupSize() << "];\n"
458 			<< "};\n"
459 			<< "\n"
460 			<< "void main (void)\n"
461 			<< "{\n"
462 			<< "  uvec4 mask = subgroupBallot(true);\n"
463 			<< bdy
464 			<< "  out_color = tempResult ? 1.0 : 0.0;\n"
465 			<< "  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
466 			<< "}\n";
467 
468 		subgroups::setTesCtrlShaderFrameBuffer(programCollection);
469 		programCollection.glslSources.add("tese")
470 			<< glu::TessellationEvaluationSource(evaluationSource.str()) << buildOptions;
471 	}
472 	else
473 	{
474 		DE_FATAL("Unsupported shader stage");
475 	}
476 }
477 
initPrograms(SourceCollections & programCollection,CaseDefinition caseDef)478 void initPrograms(SourceCollections& programCollection, CaseDefinition caseDef)
479 {
480 	std::string bdy = getBodySource(caseDef);
481 
482 	if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
483 	{
484 		std::ostringstream src;
485 
486 		src << "#version 450\n"
487 			<< "#extension GL_KHR_shader_subgroup_clustered: enable\n"
488 			<< "#extension GL_KHR_shader_subgroup_ballot: enable\n"
489 			<< "layout (local_size_x_id = 0, local_size_y_id = 1, "
490 			"local_size_z_id = 2) in;\n"
491 			<< "layout(set = 0, binding = 0, std430) buffer Buffer1\n"
492 			<< "{\n"
493 			<< "  uint result[];\n"
494 			<< "};\n"
495 			<< "layout(set = 0, binding = 1, std430) buffer Buffer2\n"
496 			<< "{\n"
497 			<< "  " << subgroups::getFormatNameForGLSL(caseDef.format) << " data[];\n"
498 			<< "};\n"
499 			<< "\n"
500 			<< "void main (void)\n"
501 			<< "{\n"
502 			<< "  uvec3 globalSize = gl_NumWorkGroups * gl_WorkGroupSize;\n"
503 			<< "  highp uint offset = globalSize.x * ((globalSize.y * "
504 			"gl_GlobalInvocationID.z) + gl_GlobalInvocationID.y) + "
505 			"gl_GlobalInvocationID.x;\n"
506 			<< "  uvec4 mask = subgroupBallot(true);\n"
507 			<< bdy
508 			<< "  result[offset] = tempResult ? 1 : 0;\n"
509 			<< "}\n";
510 
511 		programCollection.glslSources.add("comp")
512 				<< glu::ComputeSource(src.str()) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
513 	}
514 	else
515 	{
516 		{
517 			const string vertex =
518 				"#version 450\n"
519 				"#extension GL_KHR_shader_subgroup_clustered: enable\n"
520 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
521 				"layout(set = 0, binding = 0, std430) buffer Buffer1\n"
522 				"{\n"
523 				"  uint result[];\n"
524 				"};\n"
525 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
526 				"{\n"
527 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
528 				"};\n"
529 				"\n"
530 				"void main (void)\n"
531 				"{\n"
532 				"  uvec4 mask = subgroupBallot(true);\n"
533 				+ bdy +
534 				"  result[gl_VertexIndex] = tempResult ? 1 : 0;\n"
535 				"  float pixelSize = 2.0f/1024.0f;\n"
536 				"  float pixelPosition = pixelSize/2.0f - 1.0f;\n"
537 				"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
538 				"  gl_PointSize = 1.0f;\n"
539 				"}\n";
540 
541 			programCollection.glslSources.add("vert")
542 				<< glu::VertexSource(vertex) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
543 		}
544 
545 		{
546 			const string tesc =
547 			"#version 450\n"
548 			"#extension GL_KHR_shader_subgroup_clustered: enable\n"
549 			"#extension GL_KHR_shader_subgroup_ballot: enable\n"
550 			"layout(vertices=1) out;\n"
551 			"layout(set = 0, binding = 1, std430) buffer Buffer1\n"
552 			"{\n"
553 			"  uint result[];\n"
554 			"};\n"
555 			"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
556 			"{\n"
557 			"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
558 			"};\n"
559 			"\n"
560 			"void main (void)\n"
561 			"{\n"
562 			"  uvec4 mask = subgroupBallot(true);\n"
563 			+ bdy +
564 			"  result[gl_PrimitiveID] = tempResult ? 1 : 0;\n"
565 			"  if (gl_InvocationID == 0)\n"
566 			"  {\n"
567 			"    gl_TessLevelOuter[0] = 1.0f;\n"
568 			"    gl_TessLevelOuter[1] = 1.0f;\n"
569 			"  }\n"
570 			"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
571 			"}\n";
572 
573 			programCollection.glslSources.add("tesc")
574 					<< glu::TessellationControlSource(tesc) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
575 		}
576 
577 		{
578 			const string tese =
579 				"#version 450\n"
580 				"#extension GL_KHR_shader_subgroup_clustered: enable\n"
581 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
582 				"layout(isolines) in;\n"
583 				"layout(set = 0, binding = 2, std430) buffer Buffer1\n"
584 				"{\n"
585 				"  uint result[];\n"
586 				"};\n"
587 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
588 				"{\n"
589 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
590 				"};\n"
591 				"\n"
592 				"void main (void)\n"
593 				"{\n"
594 				"  uvec4 mask = subgroupBallot(true);\n"
595 				+ bdy +
596 				"  result[gl_PrimitiveID * 2 + uint(gl_TessCoord.x + 0.5)] = tempResult ? 1 : 0;\n"
597 				"  float pixelSize = 2.0f/1024.0f;\n"
598 				"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
599 				"}\n";
600 			programCollection.glslSources.add("tese")
601 					<< glu::TessellationEvaluationSource(tese) << vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
602 		}
603 
604 		{
605 			const string geometry =
606 				"#version 450\n"
607 				"#extension GL_KHR_shader_subgroup_clustered: enable\n"
608 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
609 				"layout(${TOPOLOGY}) in;\n"
610 				"layout(points, max_vertices = 1) out;\n"
611 				"layout(set = 0, binding = 3, std430) buffer Buffer1\n"
612 				"{\n"
613 				"  uint result[];\n"
614 				"};\n"
615 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
616 				"{\n"
617 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
618 				"};\n"
619 				"\n"
620 				"void main (void)\n"
621 				"{\n"
622 				"  uvec4 mask = subgroupBallot(true);\n"
623 				+ bdy +
624 				"  result[gl_PrimitiveIDIn] = tempResult ? 1 : 0;\n"
625 				"  gl_Position = gl_in[0].gl_Position;\n"
626 				"  EmitVertex();\n"
627 				"  EndPrimitive();\n"
628 				"}\n";
629 			subgroups::addGeometryShadersFromTemplate(geometry, vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u), programCollection.glslSources);
630 		}
631 
632 		{
633 			const string fragment =
634 				"#version 450\n"
635 				"#extension GL_KHR_shader_subgroup_clustered: enable\n"
636 				"#extension GL_KHR_shader_subgroup_ballot: enable\n"
637 				"layout(location = 0) out uint result;\n"
638 				"layout(set = 0, binding = 4, std430) readonly buffer Buffer2\n"
639 				"{\n"
640 				"  " + subgroups::getFormatNameForGLSL(caseDef.format) + " data[];\n"
641 				"};\n"
642 				"void main (void)\n"
643 				"{\n"
644 				"  uvec4 mask = subgroupBallot(true);\n"
645 				+ bdy +
646 				"  result = tempResult ? 1 : 0;\n"
647 				"}\n";
648 			programCollection.glslSources.add("fragment")
649 				<< glu::FragmentSource(fragment)<< vk::ShaderBuildOptions(programCollection.usedVulkanVersion, vk::SPIRV_VERSION_1_3, 0u);
650 		}
651 
652 		subgroups::addNoSubgroupShader(programCollection);
653 	}
654 }
655 
supportedCheck(Context & context,CaseDefinition caseDef)656 void supportedCheck (Context& context, CaseDefinition caseDef)
657 {
658 	if (!subgroups::isSubgroupSupported(context))
659 		TCU_THROW(NotSupportedError, "Subgroup operations are not supported");
660 
661 	if (!subgroups::isSubgroupFeatureSupportedForDevice(context, VK_SUBGROUP_FEATURE_CLUSTERED_BIT))
662 		TCU_THROW(NotSupportedError, "Device does not support subgroup clustered operations");
663 
664 	if (subgroups::isDoubleFormat(caseDef.format) &&
665 			!subgroups::isDoubleSupportedForDevice(context))
666 	{
667 		TCU_THROW(NotSupportedError, "Device does not support subgroup double operations");
668 	}
669 }
670 
noSSBOtest(Context & context,const CaseDefinition caseDef)671 tcu::TestStatus noSSBOtest (Context& context, const CaseDefinition caseDef)
672 {
673 	if (!subgroups::areSubgroupOperationsSupportedForStage(
674 				context, caseDef.shaderStage))
675 	{
676 		if (subgroups::areSubgroupOperationsRequiredForStage(
677 					caseDef.shaderStage))
678 		{
679 			return tcu::TestStatus::fail(
680 					   "Shader stage " +
681 					   subgroups::getShaderStageName(caseDef.shaderStage) +
682 					   " is required to support subgroup operations!");
683 		}
684 		else
685 		{
686 			TCU_THROW(NotSupportedError, "Device does not support subgroup operations for this stage");
687 		}
688 	}
689 
690 	subgroups::SSBOData inputData;
691 	inputData.format = caseDef.format;
692 	inputData.layout = subgroups::SSBOData::LayoutStd140;
693 	inputData.numElements = subgroups::maxSupportedSubgroupSize();
694 	inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
695 
696 	if (VK_SHADER_STAGE_VERTEX_BIT == caseDef.shaderStage)
697 		return subgroups::makeVertexFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
698 	else if (VK_SHADER_STAGE_GEOMETRY_BIT == caseDef.shaderStage)
699 		return subgroups::makeGeometryFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages);
700 	else if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT == caseDef.shaderStage)
701 		return subgroups::makeTessellationEvaluationFrameBufferTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
702 	else if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT == caseDef.shaderStage)
703 		return subgroups::makeTessellationEvaluationFrameBufferTest(context,  VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
704 	else
705 		TCU_THROW(InternalError, "Unhandled shader stage");
706 }
707 
test(Context & context,const CaseDefinition caseDef)708 tcu::TestStatus test(Context& context, const CaseDefinition caseDef)
709 {
710 	if (VK_SHADER_STAGE_COMPUTE_BIT == caseDef.shaderStage)
711 	{
712 		if (!subgroups::areSubgroupOperationsSupportedForStage(context, caseDef.shaderStage))
713 		{
714 				return tcu::TestStatus::fail(
715 						   "Shader stage " +
716 						   subgroups::getShaderStageName(caseDef.shaderStage) +
717 						   " is required to support subgroup operations!");
718 		}
719 		subgroups::SSBOData inputData;
720 		inputData.format = caseDef.format;
721 		inputData.layout = subgroups::SSBOData::LayoutStd430;
722 		inputData.numElements = subgroups::maxSupportedSubgroupSize();
723 		inputData.initializeType = subgroups::SSBOData::InitializeNonZero;
724 
725 		return subgroups::makeComputeTest(context, VK_FORMAT_R32_UINT, &inputData, 1, checkCompute);
726 	}
727 	else
728 	{
729 		VkPhysicalDeviceSubgroupProperties subgroupProperties;
730 		subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
731 		subgroupProperties.pNext = DE_NULL;
732 
733 		VkPhysicalDeviceProperties2 properties;
734 		properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
735 		properties.pNext = &subgroupProperties;
736 
737 		context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
738 
739 		VkShaderStageFlagBits stages = (VkShaderStageFlagBits)(caseDef.shaderStage  & subgroupProperties.supportedStages);
740 
741 		if (VK_SHADER_STAGE_FRAGMENT_BIT != stages && !subgroups::isVertexSSBOSupportedForDevice(context))
742 		{
743 			if ( (stages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
744 				TCU_THROW(NotSupportedError, "Device does not support vertex stage SSBO writes");
745 			else
746 				stages = VK_SHADER_STAGE_FRAGMENT_BIT;
747 		}
748 
749 		if ((VkShaderStageFlagBits)0u == stages)
750 			TCU_THROW(NotSupportedError, "Subgroup operations are not supported for any graphic shader");
751 
752 		subgroups::SSBOData inputData;
753 		inputData.format			= caseDef.format;
754 		inputData.layout			= subgroups::SSBOData::LayoutStd430;
755 		inputData.numElements		= subgroups::maxSupportedSubgroupSize();
756 		inputData.initializeType	= subgroups::SSBOData::InitializeNonZero;
757 		inputData.binding			= 4u;
758 		inputData.stages			= stages;
759 
760 		return subgroups::allStages(context, VK_FORMAT_R32_UINT, &inputData, 1, checkVertexPipelineStages, stages);
761 	}
762 }
763 }
764 
765 namespace vkt
766 {
767 namespace subgroups
768 {
createSubgroupsClusteredTests(tcu::TestContext & testCtx)769 tcu::TestCaseGroup* createSubgroupsClusteredTests(tcu::TestContext& testCtx)
770 {
771 	de::MovePtr<tcu::TestCaseGroup> graphicGroup(new tcu::TestCaseGroup(
772 		testCtx, "graphics", "Subgroup clustered category tests: graphics"));
773 	de::MovePtr<tcu::TestCaseGroup> computeGroup(new tcu::TestCaseGroup(
774 		testCtx, "compute", "Subgroup clustered category tests: compute"));
775 	de::MovePtr<tcu::TestCaseGroup> framebufferGroup(new tcu::TestCaseGroup(
776 		testCtx, "framebuffer", "Subgroup clustered category tests: framebuffer"));
777 
778 	const VkShaderStageFlags stages[] =
779 	{
780 		VK_SHADER_STAGE_VERTEX_BIT,
781 		VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
782 		VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
783 		VK_SHADER_STAGE_GEOMETRY_BIT
784 	};
785 
786 	const VkFormat formats[] =
787 	{
788 		VK_FORMAT_R32_SINT, VK_FORMAT_R32G32_SINT, VK_FORMAT_R32G32B32_SINT,
789 		VK_FORMAT_R32G32B32A32_SINT, VK_FORMAT_R32_UINT, VK_FORMAT_R32G32_UINT,
790 		VK_FORMAT_R32G32B32_UINT, VK_FORMAT_R32G32B32A32_UINT,
791 		VK_FORMAT_R32_SFLOAT, VK_FORMAT_R32G32_SFLOAT,
792 		VK_FORMAT_R32G32B32_SFLOAT, VK_FORMAT_R32G32B32A32_SFLOAT,
793 		VK_FORMAT_R64_SFLOAT, VK_FORMAT_R64G64_SFLOAT,
794 		VK_FORMAT_R64G64B64_SFLOAT, VK_FORMAT_R64G64B64A64_SFLOAT,
795 		VK_FORMAT_R8_USCALED, VK_FORMAT_R8G8_USCALED,
796 		VK_FORMAT_R8G8B8_USCALED, VK_FORMAT_R8G8B8A8_USCALED,
797 	};
798 
799 	for (int formatIndex = 0; formatIndex < DE_LENGTH_OF_ARRAY(formats); ++formatIndex)
800 	{
801 		const VkFormat format = formats[formatIndex];
802 
803 		for (int opTypeIndex = 0; opTypeIndex < OPTYPE_CLUSTERED_LAST; ++opTypeIndex)
804 		{
805 			bool isBool = false;
806 			bool isFloat = false;
807 
808 			switch (format)
809 			{
810 				default:
811 					break;
812 				case VK_FORMAT_R32_SFLOAT:
813 				case VK_FORMAT_R32G32_SFLOAT:
814 				case VK_FORMAT_R32G32B32_SFLOAT:
815 				case VK_FORMAT_R32G32B32A32_SFLOAT:
816 				case VK_FORMAT_R64_SFLOAT:
817 				case VK_FORMAT_R64G64_SFLOAT:
818 				case VK_FORMAT_R64G64B64_SFLOAT:
819 				case VK_FORMAT_R64G64B64A64_SFLOAT:
820 					isFloat = true;
821 					break;
822 				case VK_FORMAT_R8_USCALED:
823 				case VK_FORMAT_R8G8_USCALED:
824 				case VK_FORMAT_R8G8B8_USCALED:
825 				case VK_FORMAT_R8G8B8A8_USCALED:
826 					isBool = true;
827 					break;
828 			}
829 
830 			bool isBitwiseOp = false;
831 
832 			switch (opTypeIndex)
833 			{
834 				default:
835 					break;
836 				case OPTYPE_CLUSTERED_AND:
837 				case OPTYPE_CLUSTERED_OR:
838 				case OPTYPE_CLUSTERED_XOR:
839 					isBitwiseOp = true;
840 					break;
841 			}
842 
843 			if (isFloat && isBitwiseOp)
844 			{
845 				// Skip float with bitwise category.
846 				continue;
847 			}
848 
849 			if (isBool && !isBitwiseOp)
850 			{
851 				// Skip bool when its not the bitwise category.
852 				continue;
853 			}
854 
855 			const std::string name = de::toLower(getOpTypeName(opTypeIndex))
856 				+"_" + subgroups::getFormatNameForGLSL(format);
857 
858 			{
859 				const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_COMPUTE_BIT, format};
860 				addFunctionCaseWithPrograms(computeGroup.get(), name, "", supportedCheck, initPrograms, test, caseDef);
861 			}
862 
863 			{
864 				const CaseDefinition caseDef = {opTypeIndex, VK_SHADER_STAGE_ALL_GRAPHICS, format};
865 				addFunctionCaseWithPrograms(graphicGroup.get(), name,
866 										"", supportedCheck, initPrograms, test, caseDef);
867 			}
868 
869 			for (int stageIndex = 0; stageIndex < DE_LENGTH_OF_ARRAY(stages); ++stageIndex)
870 			{
871 				const CaseDefinition caseDef = {opTypeIndex, stages[stageIndex], format};
872 				addFunctionCaseWithPrograms(framebufferGroup.get(), name +"_" + getShaderStageName(caseDef.shaderStage), "",
873 											supportedCheck, initFrameBufferPrograms, noSSBOtest, caseDef);
874 			}
875 		}
876 	}
877 	de::MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(
878 		testCtx, "clustered", "Subgroup clustered category tests"));
879 
880 	group->addChild(graphicGroup.release());
881 	group->addChild(computeGroup.release());
882 	group->addChild(framebufferGroup.release());
883 
884 	return group.release();
885 }
886 
887 } // subgroups
888 } // vkt
889