1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2017 The Khronos Group Inc.
6  * Copyright (c) 2017 Codeplay Software Ltd.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  */ /*!
21  * \file
22  * \brief Subgroups Tests Utils
23  */ /*--------------------------------------------------------------------*/
24 
25 #include "vktSubgroupsTestsUtils.hpp"
26 #include "deRandom.hpp"
27 #include "tcuCommandLine.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkImageUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34 
35 using namespace tcu;
36 using namespace std;
37 using namespace vk;
38 using namespace vkt;
39 
40 namespace
41 {
getFormatSizeInBytes(const VkFormat format)42 deUint32 getFormatSizeInBytes(const VkFormat format)
43 {
44 	switch (format)
45 	{
46 		default:
47 			DE_FATAL("Unhandled format!");
48 			return 0;
49 		case VK_FORMAT_R32_SINT:
50 		case VK_FORMAT_R32_UINT:
51 			return sizeof(deInt32);
52 		case VK_FORMAT_R32G32_SINT:
53 		case VK_FORMAT_R32G32_UINT:
54 			return static_cast<deUint32>(sizeof(deInt32) * 2);
55 		case VK_FORMAT_R32G32B32_SINT:
56 		case VK_FORMAT_R32G32B32_UINT:
57 		case VK_FORMAT_R32G32B32A32_SINT:
58 		case VK_FORMAT_R32G32B32A32_UINT:
59 			return static_cast<deUint32>(sizeof(deInt32) * 4);
60 		case VK_FORMAT_R32_SFLOAT:
61 			return 4;
62 		case VK_FORMAT_R32G32_SFLOAT:
63 			return 8;
64 		case VK_FORMAT_R32G32B32_SFLOAT:
65 			return 16;
66 		case VK_FORMAT_R32G32B32A32_SFLOAT:
67 			return 16;
68 		case VK_FORMAT_R64_SFLOAT:
69 			return 8;
70 		case VK_FORMAT_R64G64_SFLOAT:
71 			return 16;
72 		case VK_FORMAT_R64G64B64_SFLOAT:
73 			return 32;
74 		case VK_FORMAT_R64G64B64A64_SFLOAT:
75 			return 32;
76 		// The below formats are used to represent bool and bvec* types. These
77 		// types are passed to the shader as int and ivec* types, before the
78 		// calculations are done as booleans. We need a distinct type here so
79 		// that the shader generators can switch on it and generate the correct
80 		// shader source for testing.
81 		case VK_FORMAT_R8_USCALED:
82 			return sizeof(deInt32);
83 		case VK_FORMAT_R8G8_USCALED:
84 			return static_cast<deUint32>(sizeof(deInt32) * 2);
85 		case VK_FORMAT_R8G8B8_USCALED:
86 		case VK_FORMAT_R8G8B8A8_USCALED:
87 			return static_cast<deUint32>(sizeof(deInt32) * 4);
88 	}
89 }
90 
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)91 deUint32 getElementSizeInBytes(
92 	const VkFormat format,
93 	const subgroups::SSBOData::InputDataLayoutType layout)
94 {
95 	deUint32 bytes = getFormatSizeInBytes(format);
96 	if (layout == subgroups::SSBOData::LayoutStd140)
97 		return bytes < 16 ? 16 : bytes;
98 	else
99 		return bytes;
100 }
101 
makePipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout)102 Move<VkPipelineLayout> makePipelineLayout(
103 	Context& context, const VkDescriptorSetLayout descriptorSetLayout)
104 {
105 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
106 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
107 		DE_NULL,			  // const void*            pNext;
108 		0u,					  // VkPipelineLayoutCreateFlags    flags;
109 		1u,					  // deUint32             setLayoutCount;
110 		&descriptorSetLayout, // const VkDescriptorSetLayout*   pSetLayouts;
111 		0u,					  // deUint32             pushConstantRangeCount;
112 		DE_NULL, // const VkPushConstantRange*   pPushConstantRanges;
113 	};
114 	return createPipelineLayout(context.getDeviceInterface(),
115 								context.getDevice(), &pipelineLayoutParams);
116 }
117 
makeRenderPass(Context & context,VkFormat format)118 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
119 {
120 	VkAttachmentReference colorReference = {
121 		0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
122 	};
123 
124 	const VkSubpassDescription subpassDescription = {0u,
125 													 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
126 													 DE_NULL, DE_NULL, 0, DE_NULL
127 													};
128 
129 	const VkSubpassDependency subpassDependencies[2] = {
130 		{   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
131 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
132 			VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
133 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
134 			VK_DEPENDENCY_BY_REGION_BIT
135 		},
136 		{   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
137 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
138 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
139 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
140 			VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
141 		},
142 	};
143 
144 	VkAttachmentDescription attachmentDescription = {0u, format,
145 													 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
146 													 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
147 													 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
148 													 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
149 													};
150 
151 	const VkRenderPassCreateInfo renderPassCreateInfo = {
152 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
153 		&attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
154 	};
155 
156 	return createRenderPass(context.getDeviceInterface(), context.getDevice(),
157 							&renderPassCreateInfo);
158 }
159 
makeFramebuffer(Context & context,const VkRenderPass renderPass,const VkImageView imageView,deUint32 width,deUint32 height)160 Move<VkFramebuffer> makeFramebuffer(Context& context,
161 									const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
162 									deUint32 height)
163 {
164 	const VkFramebufferCreateInfo framebufferCreateInfo = {
165 		VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
166 		&imageView, width, height, 1
167 	};
168 
169 	return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
170 							 &framebufferCreateInfo);
171 }
172 
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT)173 Move<VkPipeline> makeGraphicsPipeline(Context&									context,
174 									  const VkPipelineLayout					pipelineLayout,
175 									  const VkShaderStageFlags					stages,
176 									  const VkShaderModule						vertexShaderModule,
177 									  const VkShaderModule						fragmentShaderModule,
178 									  const VkShaderModule						geometryShaderModule,
179 									  const VkShaderModule						tessellationControlModule,
180 									  const VkShaderModule						tessellationEvaluationModule,
181 									  const VkRenderPass						renderPass,
182 									  const VkPrimitiveTopology					topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
183 									  const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
184 									  const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
185 									  const bool								frameBufferTests = false,
186 									  const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
187 {
188 	std::vector<VkViewport>	noViewports;
189 	std::vector<VkRect2D>	noScissors;
190 
191 	const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
192 	{
193 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
194 		DE_NULL,													// const void*									pNext;
195 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
196 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
197 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
198 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
199 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
200 	};
201 
202 	const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
203 	const VkColorComponentFlags colorComponent =
204 												numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
205 												numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
206 												numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
207 												VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
208 
209 	const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
210 	{
211 		VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
212 		VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
213 		colorComponent
214 	};
215 
216 	const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
217 	{
218 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
219 		VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
220 		{ 0.0f, 0.0f, 0.0f, 0.0f }
221 	};
222 
223 	const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
224 
225 	return vk::makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
226 									context.getDevice(),			// const VkDevice                                device
227 									pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
228 									vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
229 									tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
230 									tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
231 									geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
232 									fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
233 									renderPass,						// const VkRenderPass                            renderPass
234 									noViewports,					// const std::vector<VkViewport>&                viewports
235 									noScissors,						// const std::vector<VkRect2D>&                  scissors
236 									topology,						// const VkPrimitiveTopology                     topology
237 									0u,								// const deUint32                                subpass
238 									patchControlPoints,				// const deUint32                                patchControlPoints
239 									&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
240 									DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
241 									DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
242 									DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
243 									&colorBlendStateCreateInfo);	// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
244 }
245 
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ)246 Move<VkPipeline> makeComputePipeline(Context& context,
247 									 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
248 									 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
249 {
250 	const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
251 
252 	const vk::VkSpecializationMapEntry entries[3] =
253 	{
254 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
255 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
256 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
257 	};
258 
259 	const vk::VkSpecializationInfo info =
260 	{
261 		/* mapEntryCount = */ 3,
262 		/* pMapEntries   = */ entries,
263 		/* dataSize      = */ sizeof(localSize),
264 		/* pData         = */ localSize
265 	};
266 
267 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
268 	{
269 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
270 		DE_NULL,												// const void*						pNext;
271 		0u,														// VkPipelineShaderStageCreateFlags	flags;
272 		VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits			stage;
273 		shaderModule,											// VkShaderModule					module;
274 		"main",													// const char*						pName;
275 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
276 	};
277 
278 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
279 	{
280 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
281 		DE_NULL,										// const void*						pNext;
282 		0u,												// VkPipelineCreateFlags			flags;
283 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
284 		pipelineLayout,									// VkPipelineLayout					layout;
285 		DE_NULL,										// VkPipeline						basePipelineHandle;
286 		0,												// deInt32							basePipelineIndex;
287 	};
288 
289 	return createComputePipeline(context.getDeviceInterface(),
290 								 context.getDevice(), DE_NULL, &pipelineCreateInfo);
291 }
292 
makeDescriptorSet(Context & context,const VkDescriptorPool descriptorPool,const VkDescriptorSetLayout setLayout)293 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
294 										const VkDescriptorPool descriptorPool,
295 										const VkDescriptorSetLayout setLayout)
296 {
297 	const VkDescriptorSetAllocateInfo allocateParams =
298 	{
299 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
300 		// sType;
301 		DE_NULL,		// const void*          pNext;
302 		descriptorPool, // VkDescriptorPool       descriptorPool;
303 		1u,				// deUint32           setLayoutCount;
304 		&setLayout,		// const VkDescriptorSetLayout* pSetLayouts;
305 	};
306 	return allocateDescriptorSet(
307 			   context.getDeviceInterface(), context.getDevice(), &allocateParams);
308 }
309 
makeCommandPool(Context & context)310 Move<VkCommandPool> makeCommandPool(Context& context)
311 {
312 	const VkCommandPoolCreateInfo commandPoolParams =
313 	{
314 		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
315 		DE_NULL,									// const void*        pNext;
316 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
317 		// flags;
318 		context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
319 	};
320 
321 	return createCommandPool(
322 			   context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
323 }
324 
makeCommandBuffer(Context & context,const VkCommandPool commandPool)325 Move<VkCommandBuffer> makeCommandBuffer(
326 	Context& context, const VkCommandPool commandPool)
327 {
328 	const VkCommandBufferAllocateInfo bufferAllocateParams =
329 	{
330 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
331 		DE_NULL,										// const void*			pNext;
332 		commandPool,									// VkCommandPool		commandPool;
333 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
334 		1u,												// deUint32				bufferCount;
335 	};
336 	return allocateCommandBuffer(context.getDeviceInterface(),
337 								 context.getDevice(), &bufferAllocateParams);
338 }
339 
submitCommandBuffer(Context & context,const VkCommandBuffer commandBuffer)340 Move<VkFence> submitCommandBuffer(
341 	Context& context, const VkCommandBuffer commandBuffer)
342 {
343 	const VkFenceCreateInfo fenceParams =
344 	{
345 		VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType    sType;
346 		DE_NULL,							 // const void*      pNext;
347 		0u,									 // VkFenceCreateFlags flags;
348 	};
349 
350 	Move<VkFence> fence(createFence(
351 							context.getDeviceInterface(), context.getDevice(), &fenceParams));
352 
353 	const VkSubmitInfo submitInfo =
354 	{
355 		VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType      sType;
356 		DE_NULL,					   // const void*        pNext;
357 		0u,							   // deUint32         waitSemaphoreCount;
358 		DE_NULL,					   // const VkSemaphore*   pWaitSemaphores;
359 		(const VkPipelineStageFlags*)DE_NULL,
360 		1u,				// deUint32         commandBufferCount;
361 		&commandBuffer, // const VkCommandBuffer* pCommandBuffers;
362 		0u,				// deUint32         signalSemaphoreCount;
363 		DE_NULL,		// const VkSemaphore*   pSignalSemaphores;
364 	};
365 
366 	vk::VkResult result = (context.getDeviceInterface().queueSubmit(
367 							   context.getUniversalQueue(), 1u, &submitInfo, *fence));
368 	VK_CHECK(result);
369 
370 	return Move<VkFence>(fence);
371 }
372 
waitFence(Context & context,Move<VkFence> fence)373 void waitFence(Context& context, Move<VkFence> fence)
374 {
375 	VK_CHECK(context.getDeviceInterface().waitForFences(
376 				 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
377 }
378 
379 struct Buffer;
380 struct Image;
381 
382 struct BufferOrImage
383 {
isImage__anon8b2961bb0111::BufferOrImage384 	bool isImage() const
385 	{
386 		return m_isImage;
387 	}
388 
getAsBuffer__anon8b2961bb0111::BufferOrImage389 	Buffer* getAsBuffer()
390 	{
391 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
392 		return reinterpret_cast<Buffer* >(this);
393 	}
394 
getAsImage__anon8b2961bb0111::BufferOrImage395 	Image* getAsImage()
396 	{
397 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
398 		return reinterpret_cast<Image*>(this);
399 	}
400 
getType__anon8b2961bb0111::BufferOrImage401 	virtual VkDescriptorType getType() const
402 	{
403 		if (m_isImage)
404 		{
405 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
406 		}
407 		else
408 		{
409 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
410 		}
411 	}
412 
getAllocation__anon8b2961bb0111::BufferOrImage413 	Allocation& getAllocation() const
414 	{
415 		return *m_allocation;
416 	}
417 
~BufferOrImage__anon8b2961bb0111::BufferOrImage418 	virtual ~BufferOrImage() {}
419 
420 protected:
BufferOrImage__anon8b2961bb0111::BufferOrImage421 	explicit BufferOrImage(bool image) : m_isImage(image) {}
422 
423 	bool m_isImage;
424 	de::details::MovePtr<Allocation> m_allocation;
425 };
426 
427 struct Buffer : public BufferOrImage
428 {
Buffer__anon8b2961bb0111::Buffer429 	explicit Buffer(
430 		Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
431 		: BufferOrImage		(false)
432 		, m_sizeInBytes		(sizeInBytes)
433 		, m_usage			(usage)
434 	{
435 		const vk::VkBufferCreateInfo bufferCreateInfo =
436 		{
437 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
438 			DE_NULL,
439 			0u,
440 			m_sizeInBytes,
441 			m_usage,
442 			VK_SHARING_MODE_EXCLUSIVE,
443 			0u,
444 			DE_NULL,
445 		};
446 		m_buffer = createBuffer(context.getDeviceInterface(),
447 								context.getDevice(), &bufferCreateInfo);
448 		vk::VkMemoryRequirements req = getBufferMemoryRequirements(
449 										   context.getDeviceInterface(), context.getDevice(), *m_buffer);
450 		m_allocation = context.getDefaultAllocator().allocate(
451 						   req, MemoryRequirement::HostVisible);
452 		VK_CHECK(context.getDeviceInterface().bindBufferMemory(
453 					 context.getDevice(), *m_buffer, m_allocation->getMemory(),
454 					 m_allocation->getOffset()));
455 	}
456 
getType__anon8b2961bb0111::Buffer457 	virtual VkDescriptorType getType() const
458 	{
459 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
460 		{
461 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
462 		}
463 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
464 	}
465 
getBuffer__anon8b2961bb0111::Buffer466 	VkBuffer getBuffer() const {
467 		return *m_buffer;
468 	}
469 
getBufferPtr__anon8b2961bb0111::Buffer470 	const VkBuffer* getBufferPtr() const {
471 		return &(*m_buffer);
472 	}
473 
getSize__anon8b2961bb0111::Buffer474 	VkDeviceSize getSize() const {
475 		return m_sizeInBytes;
476 	}
477 
478 private:
479 	Move<VkBuffer>				m_buffer;
480 	VkDeviceSize				m_sizeInBytes;
481 	const VkBufferUsageFlags	m_usage;
482 };
483 
484 struct Image : public BufferOrImage
485 {
Image__anon8b2961bb0111::Image486 	explicit Image(Context& context, deUint32 width, deUint32 height,
487 				   VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
488 		: BufferOrImage(true)
489 	{
490 		const VkImageCreateInfo imageCreateInfo =
491 		{
492 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
493 			format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
494 			VK_IMAGE_TILING_OPTIMAL, usage,
495 			VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
496 			VK_IMAGE_LAYOUT_UNDEFINED
497 		};
498 		m_image = createImage(context.getDeviceInterface(), context.getDevice(),
499 							  &imageCreateInfo);
500 		vk::VkMemoryRequirements req = getImageMemoryRequirements(
501 										   context.getDeviceInterface(), context.getDevice(), *m_image);
502 		req.size *= 2;
503 		m_allocation =
504 			context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
505 		VK_CHECK(context.getDeviceInterface().bindImageMemory(
506 					 context.getDevice(), *m_image, m_allocation->getMemory(),
507 					 m_allocation->getOffset()));
508 
509 		const VkComponentMapping componentMapping =
510 		{
511 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
512 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
513 		};
514 
515 		const VkImageViewCreateInfo imageViewCreateInfo =
516 		{
517 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
518 			VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
519 			{
520 				VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
521 			}
522 		};
523 
524 		m_imageView = createImageView(context.getDeviceInterface(),
525 									  context.getDevice(), &imageViewCreateInfo);
526 
527 		const struct VkSamplerCreateInfo samplerCreateInfo =
528 		{
529 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
530 			DE_NULL,
531 			0u,
532 			VK_FILTER_NEAREST,
533 			VK_FILTER_NEAREST,
534 			VK_SAMPLER_MIPMAP_MODE_NEAREST,
535 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
536 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
537 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
538 			0.0f,
539 			VK_FALSE,
540 			1.0f,
541 			DE_FALSE,
542 			VK_COMPARE_OP_ALWAYS,
543 			0.0f,
544 			0.0f,
545 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
546 			VK_FALSE,
547 		};
548 
549 		m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
550 	}
551 
getImage__anon8b2961bb0111::Image552 	VkImage getImage() const {
553 		return *m_image;
554 	}
555 
getImageView__anon8b2961bb0111::Image556 	VkImageView getImageView() const {
557 		return *m_imageView;
558 	}
559 
getSampler__anon8b2961bb0111::Image560 	VkSampler getSampler() const {
561 		return *m_sampler;
562 	}
563 
564 private:
565 	Move<VkImage> m_image;
566 	Move<VkImageView> m_imageView;
567 	Move<VkSampler> m_sampler;
568 };
569 }
570 
getSharedMemoryBallotHelper()571 std::string vkt::subgroups::getSharedMemoryBallotHelper()
572 {
573 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
574 			"uvec4 sharedMemoryBallot(bool vote)\n"
575 			"{\n"
576 			"  uint groupOffset = gl_SubgroupID;\n"
577 			"  // One invocation in the group 0's the whole group's data\n"
578 			"  if (subgroupElect())\n"
579 			"  {\n"
580 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
581 			"  }\n"
582 			"  subgroupMemoryBarrierShared();\n"
583 			"  if (vote)\n"
584 			"  {\n"
585 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
586 			"    const highp uint bitToSet = 1u << invocationId;\n"
587 			"    switch (gl_SubgroupInvocationID / 32)\n"
588 			"    {\n"
589 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
590 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
591 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
592 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
593 			"    }\n"
594 			"  }\n"
595 			"  subgroupMemoryBarrierShared();\n"
596 			"  return superSecretComputeShaderHelper[groupOffset];\n"
597 			"}\n";
598 }
599 
getSubgroupSize(Context & context)600 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
601 {
602 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
603 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
604 	subgroupProperties.pNext = DE_NULL;
605 
606 	VkPhysicalDeviceProperties2 properties;
607 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
608 	properties.pNext = &subgroupProperties;
609 
610 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
611 
612 	return subgroupProperties.subgroupSize;
613 }
614 
maxSupportedSubgroupSize()615 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
616 	return 128u;
617 }
618 
getShaderStageName(VkShaderStageFlags stage)619 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
620 {
621 	switch (stage)
622 	{
623 		default:
624 			DE_FATAL("Unhandled stage!");
625 			return "";
626 		case VK_SHADER_STAGE_COMPUTE_BIT:
627 			return "compute";
628 		case VK_SHADER_STAGE_FRAGMENT_BIT:
629 			return "fragment";
630 		case VK_SHADER_STAGE_VERTEX_BIT:
631 			return "vertex";
632 		case VK_SHADER_STAGE_GEOMETRY_BIT:
633 			return "geometry";
634 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
635 			return "tess_control";
636 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
637 			return "tess_eval";
638 	}
639 }
640 
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)641 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
642 {
643 	switch (bit)
644 	{
645 		default:
646 			DE_FATAL("Unknown subgroup feature category!");
647 			return "";
648 		case VK_SUBGROUP_FEATURE_BASIC_BIT:
649 			return "VK_SUBGROUP_FEATURE_BASIC_BIT";
650 		case VK_SUBGROUP_FEATURE_VOTE_BIT:
651 			return "VK_SUBGROUP_FEATURE_VOTE_BIT";
652 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
653 			return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
654 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:
655 			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
656 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
657 			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
658 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
659 			return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
660 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
661 			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
662 		case VK_SUBGROUP_FEATURE_QUAD_BIT:
663 			return "VK_SUBGROUP_FEATURE_QUAD_BIT";
664 	}
665 }
666 
addNoSubgroupShader(SourceCollections & programCollection)667 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
668 {
669 	{
670 	/*
671 		"#version 450\n"
672 		"void main (void)\n"
673 		"{\n"
674 		"  float pixelSize = 2.0f/1024.0f;\n"
675 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
676 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
677 		"  gl_PointSize = 1.0f;\n"
678 		"}\n"
679 	*/
680 		const std::string vertNoSubgroup =
681 			"; SPIR-V\n"
682 			"; Version: 1.3\n"
683 			"; Generator: Khronos Glslang Reference Front End; 1\n"
684 			"; Bound: 37\n"
685 			"; Schema: 0\n"
686 			"OpCapability Shader\n"
687 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
688 			"OpMemoryModel Logical GLSL450\n"
689 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
690 			"OpMemberDecorate %20 0 BuiltIn Position\n"
691 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
692 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
693 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
694 			"OpDecorate %20 Block\n"
695 			"OpDecorate %26 BuiltIn VertexIndex\n"
696 			"%2 = OpTypeVoid\n"
697 			"%3 = OpTypeFunction %2\n"
698 			"%6 = OpTypeFloat 32\n"
699 			"%7 = OpTypePointer Function %6\n"
700 			"%9 = OpConstant %6 0.00195313\n"
701 			"%12 = OpConstant %6 2\n"
702 			"%14 = OpConstant %6 1\n"
703 			"%16 = OpTypeVector %6 4\n"
704 			"%17 = OpTypeInt 32 0\n"
705 			"%18 = OpConstant %17 1\n"
706 			"%19 = OpTypeArray %6 %18\n"
707 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
708 			"%21 = OpTypePointer Output %20\n"
709 			"%22 = OpVariable %21 Output\n"
710 			"%23 = OpTypeInt 32 1\n"
711 			"%24 = OpConstant %23 0\n"
712 			"%25 = OpTypePointer Input %23\n"
713 			"%26 = OpVariable %25 Input\n"
714 			"%33 = OpConstant %6 0\n"
715 			"%35 = OpTypePointer Output %16\n"
716 			"%37 = OpConstant %23 1\n"
717 			"%38 = OpTypePointer Output %6\n"
718 			"%4 = OpFunction %2 None %3\n"
719 			"%5 = OpLabel\n"
720 			"%8 = OpVariable %7 Function\n"
721 			"%10 = OpVariable %7 Function\n"
722 			"OpStore %8 %9\n"
723 			"%11 = OpLoad %6 %8\n"
724 			"%13 = OpFDiv %6 %11 %12\n"
725 			"%15 = OpFSub %6 %13 %14\n"
726 			"OpStore %10 %15\n"
727 			"%27 = OpLoad %23 %26\n"
728 			"%28 = OpConvertSToF %6 %27\n"
729 			"%29 = OpLoad %6 %8\n"
730 			"%30 = OpFMul %6 %28 %29\n"
731 			"%31 = OpLoad %6 %10\n"
732 			"%32 = OpFAdd %6 %30 %31\n"
733 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
734 			"%36 = OpAccessChain %35 %22 %24\n"
735 			"OpStore %36 %34\n"
736 			"%39 = OpAccessChain %38 %22 %37\n"
737 			"OpStore %39 %14\n"
738 			"OpReturn\n"
739 			"OpFunctionEnd\n";
740 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
741 	}
742 
743 	{
744 	/*
745 		"#version 450\n"
746 		"layout(vertices=1) out;\n"
747 		"\n"
748 		"void main (void)\n"
749 		"{\n"
750 		"  if (gl_InvocationID == 0)\n"
751 		"  {\n"
752 		"    gl_TessLevelOuter[0] = 1.0f;\n"
753 		"    gl_TessLevelOuter[1] = 1.0f;\n"
754 		"  }\n"
755 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
756 		"}\n"
757 	*/
758 		const std::string tescNoSubgroup =
759 			"; SPIR-V\n"
760 			"; Version: 1.3\n"
761 			"; Generator: Khronos Glslang Reference Front End; 1\n"
762 			"; Bound: 45\n"
763 			"; Schema: 0\n"
764 			"OpCapability Tessellation\n"
765 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
766 			"OpMemoryModel Logical GLSL450\n"
767 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
768 			"OpExecutionMode %4 OutputVertices 1\n"
769 			"OpDecorate %8 BuiltIn InvocationId\n"
770 			"OpDecorate %20 Patch\n"
771 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
772 			"OpMemberDecorate %29 0 BuiltIn Position\n"
773 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
774 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
775 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
776 			"OpDecorate %29 Block\n"
777 			"OpMemberDecorate %34 0 BuiltIn Position\n"
778 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
779 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
780 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
781 			"OpDecorate %34 Block\n"
782 			"%2 = OpTypeVoid\n"
783 			"%3 = OpTypeFunction %2\n"
784 			"%6 = OpTypeInt 32 1\n"
785 			"%7 = OpTypePointer Input %6\n"
786 			"%8 = OpVariable %7 Input\n"
787 			"%10 = OpConstant %6 0\n"
788 			"%11 = OpTypeBool\n"
789 			"%15 = OpTypeFloat 32\n"
790 			"%16 = OpTypeInt 32 0\n"
791 			"%17 = OpConstant %16 4\n"
792 			"%18 = OpTypeArray %15 %17\n"
793 			"%19 = OpTypePointer Output %18\n"
794 			"%20 = OpVariable %19 Output\n"
795 			"%21 = OpConstant %15 1\n"
796 			"%22 = OpTypePointer Output %15\n"
797 			"%24 = OpConstant %6 1\n"
798 			"%26 = OpTypeVector %15 4\n"
799 			"%27 = OpConstant %16 1\n"
800 			"%28 = OpTypeArray %15 %27\n"
801 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
802 			"%30 = OpTypeArray %29 %27\n"
803 			"%31 = OpTypePointer Output %30\n"
804 			"%32 = OpVariable %31 Output\n"
805 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
806 			"%35 = OpConstant %16 32\n"
807 			"%36 = OpTypeArray %34 %35\n"
808 			"%37 = OpTypePointer Input %36\n"
809 			"%38 = OpVariable %37 Input\n"
810 			"%40 = OpTypePointer Input %26\n"
811 			"%43 = OpTypePointer Output %26\n"
812 			"%4 = OpFunction %2 None %3\n"
813 			"%5 = OpLabel\n"
814 			"%9 = OpLoad %6 %8\n"
815 			"%12 = OpIEqual %11 %9 %10\n"
816 			"OpSelectionMerge %14 None\n"
817 			"OpBranchConditional %12 %13 %14\n"
818 			"%13 = OpLabel\n"
819 			"%23 = OpAccessChain %22 %20 %10\n"
820 			"OpStore %23 %21\n"
821 			"%25 = OpAccessChain %22 %20 %24\n"
822 			"OpStore %25 %21\n"
823 			"OpBranch %14\n"
824 			"%14 = OpLabel\n"
825 			"%33 = OpLoad %6 %8\n"
826 			"%39 = OpLoad %6 %8\n"
827 			"%41 = OpAccessChain %40 %38 %39 %10\n"
828 			"%42 = OpLoad %26 %41\n"
829 			"%44 = OpAccessChain %43 %32 %33 %10\n"
830 			"OpStore %44 %42\n"
831 			"OpReturn\n"
832 			"OpFunctionEnd\n";
833 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
834 	}
835 
836 	{
837 	/*
838 		"#version 450\n"
839 		"layout(isolines) in;\n"
840 		"\n"
841 		"void main (void)\n"
842 		"{\n"
843 		"  float pixelSize = 2.0f/1024.0f;\n"
844 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
845 		"}\n";
846 	*/
847 		const std::string teseNoSubgroup =
848 			"; SPIR-V\n"
849 			"; Version: 1.3\n"
850 			"; Generator: Khronos Glslang Reference Front End; 2\n"
851 			"; Bound: 42\n"
852 			"; Schema: 0\n"
853 			"OpCapability Tessellation\n"
854 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
855 			"OpMemoryModel Logical GLSL450\n"
856 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
857 			"OpExecutionMode %4 Isolines\n"
858 			"OpExecutionMode %4 SpacingEqual\n"
859 			"OpExecutionMode %4 VertexOrderCcw\n"
860 			"OpMemberDecorate %14 0 BuiltIn Position\n"
861 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
862 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
863 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
864 			"OpDecorate %14 Block\n"
865 			"OpMemberDecorate %19 0 BuiltIn Position\n"
866 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
867 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
868 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
869 			"OpDecorate %19 Block\n"
870 			"OpDecorate %29 BuiltIn TessCoord\n"
871 			"%2 = OpTypeVoid\n"
872 			"%3 = OpTypeFunction %2\n"
873 			"%6 = OpTypeFloat 32\n"
874 			"%7 = OpTypePointer Function %6\n"
875 			"%9 = OpConstant %6 0.00195313\n"
876 			"%10 = OpTypeVector %6 4\n"
877 			"%11 = OpTypeInt 32 0\n"
878 			"%12 = OpConstant %11 1\n"
879 			"%13 = OpTypeArray %6 %12\n"
880 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
881 			"%15 = OpTypePointer Output %14\n"
882 			"%16 = OpVariable %15 Output\n"
883 			"%17 = OpTypeInt 32 1\n"
884 			"%18 = OpConstant %17 0\n"
885 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
886 			"%20 = OpConstant %11 32\n"
887 			"%21 = OpTypeArray %19 %20\n"
888 			"%22 = OpTypePointer Input %21\n"
889 			"%23 = OpVariable %22 Input\n"
890 			"%24 = OpTypePointer Input %10\n"
891 			"%27 = OpTypeVector %6 3\n"
892 			"%28 = OpTypePointer Input %27\n"
893 			"%29 = OpVariable %28 Input\n"
894 			"%30 = OpConstant %11 0\n"
895 			"%31 = OpTypePointer Input %6\n"
896 			"%36 = OpConstant %6 2\n"
897 			"%40 = OpTypePointer Output %10\n"
898 			"%4 = OpFunction %2 None %3\n"
899 			"%5 = OpLabel\n"
900 			"%8 = OpVariable %7 Function\n"
901 			"OpStore %8 %9\n"
902 			"%25 = OpAccessChain %24 %23 %18 %18\n"
903 			"%26 = OpLoad %10 %25\n"
904 			"%32 = OpAccessChain %31 %29 %30\n"
905 			"%33 = OpLoad %6 %32\n"
906 			"%34 = OpLoad %6 %8\n"
907 			"%35 = OpFMul %6 %33 %34\n"
908 			"%37 = OpFDiv %6 %35 %36\n"
909 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
910 			"%39 = OpFAdd %10 %26 %38\n"
911 			"%41 = OpAccessChain %40 %16 %18\n"
912 			"OpStore %41 %39\n"
913 			"OpReturn\n"
914 			"OpFunctionEnd\n";
915 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
916 	}
917 
918 }
919 
920 
getVertShaderForStage(vk::VkShaderStageFlags stage)921 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
922 {
923 	switch (stage)
924 	{
925 		default:
926 			DE_FATAL("Unhandled stage!");
927 			return "";
928 		case VK_SHADER_STAGE_FRAGMENT_BIT:
929 			return
930 				"#version 450\n"
931 				"void main (void)\n"
932 				"{\n"
933 				"  float pixelSize = 2.0f/1024.0f;\n"
934 				"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
935 				"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
936 				"}\n";
937 		case VK_SHADER_STAGE_GEOMETRY_BIT:
938 			return
939 				"#version 450\n"
940 				"void main (void)\n"
941 				"{\n"
942 				"}\n";
943 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
944 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
945 			return
946 				"#version 450\n"
947 				"void main (void)\n"
948 				"{\n"
949 				"}\n";
950 	}
951 }
952 
isSubgroupSupported(Context & context)953 bool vkt::subgroups::isSubgroupSupported(Context& context)
954 {
955 	return context.contextSupports(vk::ApiVersion(1, 1, 0));
956 }
957 
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)958 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
959 	Context& context, const VkShaderStageFlags stage)
960 {
961 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
962 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
963 	subgroupProperties.pNext = DE_NULL;
964 
965 	VkPhysicalDeviceProperties2 properties;
966 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
967 	properties.pNext = &subgroupProperties;
968 
969 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
970 
971 	return (stage & subgroupProperties.supportedStages) ? true : false;
972 }
973 
areSubgroupOperationsRequiredForStage(VkShaderStageFlags stage)974 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
975 	VkShaderStageFlags stage)
976 {
977 	switch (stage)
978 	{
979 		default:
980 			return false;
981 		case VK_SHADER_STAGE_COMPUTE_BIT:
982 			return true;
983 	}
984 }
985 
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)986 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
987 	Context& context,
988 	VkSubgroupFeatureFlagBits bit) {
989 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
990 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
991 	subgroupProperties.pNext = DE_NULL;
992 
993 	VkPhysicalDeviceProperties2 properties;
994 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
995 	properties.pNext = &subgroupProperties;
996 
997 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
998 
999 	return (bit & subgroupProperties.supportedOperations) ? true : false;
1000 }
1001 
isFragmentSSBOSupportedForDevice(Context & context)1002 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1003 {
1004 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1005 				context.getInstanceInterface(), context.getPhysicalDevice());
1006 	return features.fragmentStoresAndAtomics ? true : false;
1007 }
1008 
isVertexSSBOSupportedForDevice(Context & context)1009 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1010 {
1011 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1012 				context.getInstanceInterface(), context.getPhysicalDevice());
1013 	return features.vertexPipelineStoresAndAtomics ? true : false;
1014 }
1015 
isDoubleSupportedForDevice(Context & context)1016 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
1017 {
1018 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1019 				context.getInstanceInterface(), context.getPhysicalDevice());
1020 	return features.shaderFloat64 ? true : false;
1021 }
1022 
isDoubleFormat(VkFormat format)1023 bool vkt::subgroups::isDoubleFormat(VkFormat format)
1024 {
1025 	switch (format)
1026 	{
1027 		default:
1028 			return false;
1029 		case VK_FORMAT_R64_SFLOAT:
1030 		case VK_FORMAT_R64G64_SFLOAT:
1031 		case VK_FORMAT_R64G64B64_SFLOAT:
1032 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1033 			return true;
1034 	}
1035 }
1036 
getFormatNameForGLSL(VkFormat format)1037 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1038 {
1039 	switch (format)
1040 	{
1041 		default:
1042 			DE_FATAL("Unhandled format!");
1043 			return "";
1044 		case VK_FORMAT_R32_SINT:
1045 			return "int";
1046 		case VK_FORMAT_R32G32_SINT:
1047 			return "ivec2";
1048 		case VK_FORMAT_R32G32B32_SINT:
1049 			return "ivec3";
1050 		case VK_FORMAT_R32G32B32A32_SINT:
1051 			return "ivec4";
1052 		case VK_FORMAT_R32_UINT:
1053 			return "uint";
1054 		case VK_FORMAT_R32G32_UINT:
1055 			return "uvec2";
1056 		case VK_FORMAT_R32G32B32_UINT:
1057 			return "uvec3";
1058 		case VK_FORMAT_R32G32B32A32_UINT:
1059 			return "uvec4";
1060 		case VK_FORMAT_R32_SFLOAT:
1061 			return "float";
1062 		case VK_FORMAT_R32G32_SFLOAT:
1063 			return "vec2";
1064 		case VK_FORMAT_R32G32B32_SFLOAT:
1065 			return "vec3";
1066 		case VK_FORMAT_R32G32B32A32_SFLOAT:
1067 			return "vec4";
1068 		case VK_FORMAT_R64_SFLOAT:
1069 			return "double";
1070 		case VK_FORMAT_R64G64_SFLOAT:
1071 			return "dvec2";
1072 		case VK_FORMAT_R64G64B64_SFLOAT:
1073 			return "dvec3";
1074 		case VK_FORMAT_R64G64B64A64_SFLOAT:
1075 			return "dvec4";
1076 		case VK_FORMAT_R8_USCALED:
1077 			return "bool";
1078 		case VK_FORMAT_R8G8_USCALED:
1079 			return "bvec2";
1080 		case VK_FORMAT_R8G8B8_USCALED:
1081 			return "bvec3";
1082 		case VK_FORMAT_R8G8B8A8_USCALED:
1083 			return "bvec4";
1084 	}
1085 }
1086 
setVertexShaderFrameBuffer(SourceCollections & programCollection)1087 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1088 {
1089 	/*
1090 		"layout(location = 0) in highp vec4 in_position;\n"
1091 		"void main (void)\n"
1092 		"{\n"
1093 		"  gl_Position = in_position;\n"
1094 		"}\n";
1095 	*/
1096 	programCollection.spirvAsmSources.add("vert") <<
1097 		"; SPIR-V\n"
1098 		"; Version: 1.3\n"
1099 		"; Generator: Khronos Glslang Reference Front End; 2\n"
1100 		"; Bound: 21\n"
1101 		"; Schema: 0\n"
1102 		"OpCapability Shader\n"
1103 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1104 		"OpMemoryModel Logical GLSL450\n"
1105 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1106 		"OpMemberDecorate %11 0 BuiltIn Position\n"
1107 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
1108 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1109 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1110 		"OpDecorate %11 Block\n"
1111 		"OpDecorate %17 Location 0\n"
1112 		"%2 = OpTypeVoid\n"
1113 		"%3 = OpTypeFunction %2\n"
1114 		"%6 = OpTypeFloat 32\n"
1115 		"%7 = OpTypeVector %6 4\n"
1116 		"%8 = OpTypeInt 32 0\n"
1117 		"%9 = OpConstant %8 1\n"
1118 		"%10 = OpTypeArray %6 %9\n"
1119 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
1120 		"%12 = OpTypePointer Output %11\n"
1121 		"%13 = OpVariable %12 Output\n"
1122 		"%14 = OpTypeInt 32 1\n"
1123 		"%15 = OpConstant %14 0\n"
1124 		"%16 = OpTypePointer Input %7\n"
1125 		"%17 = OpVariable %16 Input\n"
1126 		"%19 = OpTypePointer Output %7\n"
1127 		"%4 = OpFunction %2 None %3\n"
1128 		"%5 = OpLabel\n"
1129 		"%18 = OpLoad %7 %17\n"
1130 		"%20 = OpAccessChain %19 %13 %15\n"
1131 		"OpStore %20 %18\n"
1132 		"OpReturn\n"
1133 		"OpFunctionEnd\n";
1134 }
1135 
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)1136 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1137 {
1138 	/*
1139 		"layout(location = 0) in float in_color;\n"
1140 		"layout(location = 0) out uint out_color;\n"
1141 		"void main()\n"
1142 		{\n"
1143 		"	out_color = uint(in_color);\n"
1144 		"}\n";
1145 	*/
1146 	programCollection.spirvAsmSources.add("fragment") <<
1147 		"; SPIR-V\n"
1148 		"; Version: 1.3\n"
1149 		"; Generator: Khronos Glslang Reference Front End; 2\n"
1150 		"; Bound: 14\n"
1151 		"; Schema: 0\n"
1152 		"OpCapability Shader\n"
1153 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1154 		"OpMemoryModel Logical GLSL450\n"
1155 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1156 		"OpExecutionMode %4 OriginUpperLeft\n"
1157 		"OpDecorate %8 Location 0\n"
1158 		"OpDecorate %11 Location 0\n"
1159 		"%2 = OpTypeVoid\n"
1160 		"%3 = OpTypeFunction %2\n"
1161 		"%6 = OpTypeInt 32 0\n"
1162 		"%7 = OpTypePointer Output %6\n"
1163 		"%8 = OpVariable %7 Output\n"
1164 		"%9 = OpTypeFloat 32\n"
1165 		"%10 = OpTypePointer Input %9\n"
1166 		"%11 = OpVariable %10 Input\n"
1167 		"%4 = OpFunction %2 None %3\n"
1168 		"%5 = OpLabel\n"
1169 		"%12 = OpLoad %9 %11\n"
1170 		"%13 = OpConvertFToU %6 %12\n"
1171 		"OpStore %8 %13\n"
1172 		"OpReturn\n"
1173 		"OpFunctionEnd\n";
1174 }
1175 
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)1176 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1177 {
1178 	/*
1179 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
1180 		"#extension GL_EXT_tessellation_shader : require\n"
1181 		"layout(vertices = 2) out;\n"
1182 		"void main (void)\n"
1183 		"{\n"
1184 		"  if (gl_InvocationID == 0)\n"
1185 		  {\n"
1186 		"    gl_TessLevelOuter[0] = 1.0f;\n"
1187 		"    gl_TessLevelOuter[1] = 1.0f;\n"
1188 		"  }\n"
1189 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1190 		"}\n";
1191 	*/
1192 	programCollection.spirvAsmSources.add("tesc") <<
1193 		"; SPIR-V\n"
1194 		"; Version: 1.3\n"
1195 		"; Generator: Khronos Glslang Reference Front End; 2\n"
1196 		"; Bound: 46\n"
1197 		"; Schema: 0\n"
1198 		"OpCapability Tessellation\n"
1199 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1200 		"OpMemoryModel Logical GLSL450\n"
1201 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1202 		"OpExecutionMode %4 OutputVertices 2\n"
1203 		"OpDecorate %8 BuiltIn InvocationId\n"
1204 		"OpDecorate %20 Patch\n"
1205 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
1206 		"OpMemberDecorate %29 0 BuiltIn Position\n"
1207 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
1208 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1209 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1210 		"OpDecorate %29 Block\n"
1211 		"OpMemberDecorate %35 0 BuiltIn Position\n"
1212 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
1213 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1214 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1215 		"OpDecorate %35 Block\n"
1216 		"%2 = OpTypeVoid\n"
1217 		"%3 = OpTypeFunction %2\n"
1218 		"%6 = OpTypeInt 32 1\n"
1219 		"%7 = OpTypePointer Input %6\n"
1220 		"%8 = OpVariable %7 Input\n"
1221 		"%10 = OpConstant %6 0\n"
1222 		"%11 = OpTypeBool\n"
1223 		"%15 = OpTypeFloat 32\n"
1224 		"%16 = OpTypeInt 32 0\n"
1225 		"%17 = OpConstant %16 4\n"
1226 		"%18 = OpTypeArray %15 %17\n"
1227 		"%19 = OpTypePointer Output %18\n"
1228 		"%20 = OpVariable %19 Output\n"
1229 		"%21 = OpConstant %15 1\n"
1230 		"%22 = OpTypePointer Output %15\n"
1231 		"%24 = OpConstant %6 1\n"
1232 		"%26 = OpTypeVector %15 4\n"
1233 		"%27 = OpConstant %16 1\n"
1234 		"%28 = OpTypeArray %15 %27\n"
1235 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
1236 		"%30 = OpConstant %16 2\n"
1237 		"%31 = OpTypeArray %29 %30\n"
1238 		"%32 = OpTypePointer Output %31\n"
1239 		"%33 = OpVariable %32 Output\n"
1240 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
1241 		"%36 = OpConstant %16 32\n"
1242 		"%37 = OpTypeArray %35 %36\n"
1243 		"%38 = OpTypePointer Input %37\n"
1244 		"%39 = OpVariable %38 Input\n"
1245 		"%41 = OpTypePointer Input %26\n"
1246 		"%44 = OpTypePointer Output %26\n"
1247 		"%4 = OpFunction %2 None %3\n"
1248 		"%5 = OpLabel\n"
1249 		"%9 = OpLoad %6 %8\n"
1250 		"%12 = OpIEqual %11 %9 %10\n"
1251 		"OpSelectionMerge %14 None\n"
1252 		"OpBranchConditional %12 %13 %14\n"
1253 		"%13 = OpLabel\n"
1254 		"%23 = OpAccessChain %22 %20 %10\n"
1255 		"OpStore %23 %21\n"
1256 		"%25 = OpAccessChain %22 %20 %24\n"
1257 		"OpStore %25 %21\n"
1258 		"OpBranch %14\n"
1259 		"%14 = OpLabel\n"
1260 		"%34 = OpLoad %6 %8\n"
1261 		"%40 = OpLoad %6 %8\n"
1262 		"%42 = OpAccessChain %41 %39 %40 %10\n"
1263 		"%43 = OpLoad %26 %42\n"
1264 		"%45 = OpAccessChain %44 %33 %34 %10\n"
1265 		"OpStore %45 %43\n"
1266 		"OpReturn\n"
1267 		"OpFunctionEnd\n";
1268 }
1269 
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)1270 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1271 {
1272 	/*
1273 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
1274 		"#extension GL_EXT_tessellation_shader : require\n"
1275 		"layout(isolines, equal_spacing, ccw ) in;\n"
1276 		"layout(location = 0) in float in_color[];\n"
1277 		"layout(location = 0) out float out_color;\n"
1278 		"\n"
1279 		"void main (void)\n"
1280 		"{\n"
1281 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1282 		"  out_color = in_color[0];\n"
1283 		"}\n";
1284 	*/
1285 	programCollection.spirvAsmSources.add("tese") <<
1286 		"; SPIR-V\n"
1287 		"; Version: 1.3\n"
1288 		"; Generator: Khronos Glslang Reference Front End; 2\n"
1289 		"; Bound: 45\n"
1290 		"; Schema: 0\n"
1291 		"OpCapability Tessellation\n"
1292 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
1293 		"OpMemoryModel Logical GLSL450\n"
1294 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1295 		"OpExecutionMode %4 Isolines\n"
1296 		"OpExecutionMode %4 SpacingEqual\n"
1297 		"OpExecutionMode %4 VertexOrderCcw\n"
1298 		"OpMemberDecorate %11 0 BuiltIn Position\n"
1299 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
1300 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1301 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1302 		"OpDecorate %11 Block\n"
1303 		"OpMemberDecorate %16 0 BuiltIn Position\n"
1304 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
1305 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1306 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1307 		"OpDecorate %16 Block\n"
1308 		"OpDecorate %29 BuiltIn TessCoord\n"
1309 		"OpDecorate %39 Location 0\n"
1310 		"OpDecorate %42 Location 0\n"
1311 		"%2 = OpTypeVoid\n"
1312 		"%3 = OpTypeFunction %2\n"
1313 		"%6 = OpTypeFloat 32\n"
1314 		"%7 = OpTypeVector %6 4\n"
1315 		"%8 = OpTypeInt 32 0\n"
1316 		"%9 = OpConstant %8 1\n"
1317 		"%10 = OpTypeArray %6 %9\n"
1318 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
1319 		"%12 = OpTypePointer Output %11\n"
1320 		"%13 = OpVariable %12 Output\n"
1321 		"%14 = OpTypeInt 32 1\n"
1322 		"%15 = OpConstant %14 0\n"
1323 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
1324 		"%17 = OpConstant %8 32\n"
1325 		"%18 = OpTypeArray %16 %17\n"
1326 		"%19 = OpTypePointer Input %18\n"
1327 		"%20 = OpVariable %19 Input\n"
1328 		"%21 = OpTypePointer Input %7\n"
1329 		"%24 = OpConstant %14 1\n"
1330 		"%27 = OpTypeVector %6 3\n"
1331 		"%28 = OpTypePointer Input %27\n"
1332 		"%29 = OpVariable %28 Input\n"
1333 		"%30 = OpConstant %8 0\n"
1334 		"%31 = OpTypePointer Input %6\n"
1335 		"%36 = OpTypePointer Output %7\n"
1336 		"%38 = OpTypePointer Output %6\n"
1337 		"%39 = OpVariable %38 Output\n"
1338 		"%40 = OpTypeArray %6 %17\n"
1339 		"%41 = OpTypePointer Input %40\n"
1340 		"%42 = OpVariable %41 Input\n"
1341 		"%4 = OpFunction %2 None %3\n"
1342 		"%5 = OpLabel\n"
1343 		"%22 = OpAccessChain %21 %20 %15 %15\n"
1344 		"%23 = OpLoad %7 %22\n"
1345 		"%25 = OpAccessChain %21 %20 %24 %15\n"
1346 		"%26 = OpLoad %7 %25\n"
1347 		"%32 = OpAccessChain %31 %29 %30\n"
1348 		"%33 = OpLoad %6 %32\n"
1349 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1350 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1351 		"%37 = OpAccessChain %36 %13 %15\n"
1352 		"OpStore %37 %35\n"
1353 		"%43 = OpAccessChain %31 %42 %15\n"
1354 		"%44 = OpLoad %6 %43\n"
1355 		"OpStore %39 %44\n"
1356 		"OpReturn\n"
1357 		"OpFunctionEnd\n";
1358 }
1359 
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)1360 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
1361 {
1362 	tcu::StringTemplate geometryTemplate(glslTemplate);
1363 
1364 	map<string, string>		linesParams;
1365 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1366 
1367 	map<string, string>		pointsParams;
1368 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1369 
1370 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
1371 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
1372 }
1373 
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)1374 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1375 {
1376 	tcu::StringTemplate geometryTemplate(spirvTemplate);
1377 
1378 	map<string, string>		linesParams;
1379 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1380 
1381 	map<string, string>		pointsParams;
1382 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1383 
1384 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
1385 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
1386 }
1387 
initializeMemory(Context & context,const Allocation & alloc,subgroups::SSBOData & data)1388 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
1389 {
1390 	const vk::VkFormat format = data.format;
1391 	const vk::VkDeviceSize size = data.numElements *
1392 		(data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
1393 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
1394 	{
1395 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
1396 
1397 		switch (format)
1398 		{
1399 			default:
1400 				DE_FATAL("Illegal buffer format");
1401 				break;
1402 			case VK_FORMAT_R8_USCALED:
1403 			case VK_FORMAT_R8G8_USCALED:
1404 			case VK_FORMAT_R8G8B8_USCALED:
1405 			case VK_FORMAT_R8G8B8A8_USCALED:
1406 			case VK_FORMAT_R32_SINT:
1407 			case VK_FORMAT_R32G32_SINT:
1408 			case VK_FORMAT_R32G32B32_SINT:
1409 			case VK_FORMAT_R32G32B32A32_SINT:
1410 			case VK_FORMAT_R32_UINT:
1411 			case VK_FORMAT_R32G32_UINT:
1412 			case VK_FORMAT_R32G32B32_UINT:
1413 			case VK_FORMAT_R32G32B32A32_UINT:
1414 			{
1415 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1416 
1417 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1418 				{
1419 					ptr[k] = rnd.getUint32();
1420 				}
1421 			}
1422 			break;
1423 			case VK_FORMAT_R32_SFLOAT:
1424 			case VK_FORMAT_R32G32_SFLOAT:
1425 			case VK_FORMAT_R32G32B32_SFLOAT:
1426 			case VK_FORMAT_R32G32B32A32_SFLOAT:
1427 			{
1428 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
1429 
1430 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
1431 				{
1432 					ptr[k] = rnd.getFloat();
1433 				}
1434 			}
1435 			break;
1436 			case VK_FORMAT_R64_SFLOAT:
1437 			case VK_FORMAT_R64G64_SFLOAT:
1438 			case VK_FORMAT_R64G64B64_SFLOAT:
1439 			case VK_FORMAT_R64G64B64A64_SFLOAT:
1440 			{
1441 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
1442 
1443 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
1444 				{
1445 					ptr[k] = rnd.getDouble();
1446 				}
1447 			}
1448 			break;
1449 		}
1450 	}
1451 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1452 	{
1453 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1454 
1455 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
1456 		{
1457 			ptr[k] = 0;
1458 		}
1459 	}
1460 
1461 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
1462 	{
1463 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1464 	}
1465 }
1466 
getResultBinding(const VkShaderStageFlagBits shaderStage)1467 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
1468 {
1469 	switch(shaderStage)
1470 	{
1471 		case VK_SHADER_STAGE_VERTEX_BIT:
1472 			return 0u;
1473 			break;
1474 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1475 			return 1u;
1476 			break;
1477 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1478 			return 2u;
1479 			break;
1480 		case VK_SHADER_STAGE_GEOMETRY_BIT:
1481 			return 3u;
1482 			break;
1483 		default:
1484 			DE_ASSERT(0);
1485 			return -1;
1486 	}
1487 	DE_ASSERT(0);
1488 	return -1;
1489 }
1490 
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage)1491 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
1492 	Context& context, VkFormat format, SSBOData* extraData,
1493 	deUint32 extraDataCount,
1494 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1495 	const VkShaderStageFlags shaderStage)
1496 {
1497 	const deUint32							maxWidth				= 1024u;
1498 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
1499 	DescriptorSetLayoutBuilder				layoutBuilder;
1500 	DescriptorPoolBuilder					poolBuilder;
1501 	DescriptorSetUpdateBuilder				updateBuilder;
1502 	Move <VkDescriptorPool>					descriptorPool;
1503 	Move <VkDescriptorSet>					descriptorSet;
1504 
1505 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1506 																		context.getBinaryCollection().get("vert"), 0u));
1507 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1508 																		context.getBinaryCollection().get("tesc"), 0u));
1509 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1510 																		context.getBinaryCollection().get("tese"), 0u));
1511 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1512 																	context.getBinaryCollection().get("fragment"), 0u));
1513 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
1514 
1515 	const VkVertexInputBindingDescription	vertexInputBinding		=
1516 	{
1517 		0u,											// binding;
1518 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
1519 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
1520 	};
1521 
1522 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
1523 	{
1524 		0u,
1525 		0u,
1526 		VK_FORMAT_R32G32B32A32_SFLOAT,
1527 		0u
1528 	};
1529 
1530 	for (deUint32 i = 0u; i < extraDataCount; i++)
1531 	{
1532 		if (extraData[i].isImage)
1533 		{
1534 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1535 		}
1536 		else
1537 		{
1538 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1539 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1540 		}
1541 		const Allocation& alloc = inputBuffers[i]->getAllocation();
1542 		initializeMemory(context, alloc, extraData[i]);
1543 	}
1544 
1545 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1546 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
1547 
1548 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1549 
1550 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
1551 
1552 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
1553 																	VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
1554 																	VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1555 																	*vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
1556 																	*renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1557 
1558 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1559 		poolBuilder.addType(inputBuffers[ndx]->getType());
1560 
1561 	if (extraDataCount > 0)
1562 	{
1563 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1564 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1565 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1566 	}
1567 
1568 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1569 	{
1570 		if (inputBuffers[buffersNdx]->isImage())
1571 		{
1572 			VkDescriptorImageInfo info =
1573 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1574 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1575 
1576 			updateBuilder.writeSingle(*descriptorSet,
1577 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1578 										inputBuffers[buffersNdx]->getType(), &info);
1579 		}
1580 		else
1581 		{
1582 			VkDescriptorBufferInfo info =
1583 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1584 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1585 
1586 			updateBuilder.writeSingle(*descriptorSet,
1587 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1588 										inputBuffers[buffersNdx]->getType(), &info);
1589 		}
1590 	}
1591 
1592 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1593 
1594 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
1595 	const deUint32							subgroupSize			= getSubgroupSize(context);
1596 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
1597 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
1598 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1599 	unsigned								totalIterations			= 0u;
1600 	unsigned								failedIterations		= 0u;
1601 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1602 
1603 	{
1604 		const Allocation&		alloc				= vertexBuffer.getAllocation();
1605 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
1606 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
1607 		float					leftHandPosition	= -1.0f;
1608 
1609 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
1610 		{
1611 			data[ndx][0] = leftHandPosition;
1612 			leftHandPosition += pixelSize;
1613 			data[ndx+1][0] = leftHandPosition;
1614 		}
1615 
1616 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
1617 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1618 	}
1619 
1620 	for (deUint32 width = 1u; width < maxWidth; ++width)
1621 	{
1622 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1623 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
1624 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
1625 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1626 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1627 		const VkDeviceSize			vertexBufferOffset	= 0u;
1628 
1629 		totalIterations++;
1630 
1631 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1632 		{
1633 
1634 			context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
1635 			context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
1636 
1637 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1638 
1639 			context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1640 
1641 			if (extraDataCount > 0)
1642 			{
1643 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1644 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1645 					&descriptorSet.get(), 0u, DE_NULL);
1646 			}
1647 
1648 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1649 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
1650 
1651 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1652 
1653 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1654 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1655 
1656 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1657 			waitFence(context, fence);
1658 		}
1659 
1660 		{
1661 			const Allocation& allocResult = imageBufferResult.getAllocation();
1662 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1663 
1664 			std::vector<const void*> datas;
1665 			datas.push_back(allocResult.getHostPtr());
1666 			if (!checkResult(datas, width/2u, subgroupSize))
1667 				failedIterations++;
1668 		}
1669 	}
1670 
1671 	if (0 < failedIterations)
1672 	{
1673 		context.getTestContext().getLog()
1674 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
1675 				<< totalIterations << " values passed" << TestLog::EndMessage;
1676 		return tcu::TestStatus::fail("Failed!");
1677 	}
1678 
1679 	return tcu::TestStatus::pass("OK");
1680 }
1681 
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)1682 bool vkt::subgroups::check(std::vector<const void*> datas,
1683 	deUint32 width, deUint32 ref)
1684 {
1685 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
1686 
1687 	for (deUint32 n = 0; n < width; ++n)
1688 	{
1689 		if (data[n] != ref)
1690 		{
1691 			return false;
1692 		}
1693 	}
1694 
1695 	return true;
1696 }
1697 
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)1698 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
1699 	const deUint32 numWorkgroups[3], const deUint32 localSize[3],
1700 	deUint32 ref)
1701 {
1702 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
1703 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
1704 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
1705 
1706 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
1707 }
1708 
makeGeometryFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))1709 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
1710 	Context& context, VkFormat format, SSBOData* extraData,
1711 	deUint32 extraDataCount,
1712 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1713 {
1714 	const deUint32							maxWidth				= 1024u;
1715 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
1716 	DescriptorSetLayoutBuilder				layoutBuilder;
1717 	DescriptorPoolBuilder					poolBuilder;
1718 	DescriptorSetUpdateBuilder				updateBuilder;
1719 	Move <VkDescriptorPool>					descriptorPool;
1720 	Move <VkDescriptorSet>					descriptorSet;
1721 
1722 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1723 																		context.getBinaryCollection().get("vert"), 0u));
1724 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1725 																		context.getBinaryCollection().get("geometry"), 0u));
1726 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
1727 																	context.getBinaryCollection().get("fragment"), 0u));
1728 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
1729 	const VkVertexInputBindingDescription	vertexInputBinding		=
1730 	{
1731 		0u,											// binding;
1732 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
1733 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
1734 	};
1735 
1736 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
1737 	{
1738 		0u,
1739 		0u,
1740 		VK_FORMAT_R32G32B32A32_SFLOAT,
1741 		0u
1742 	};
1743 
1744 	for (deUint32 i = 0u; i < extraDataCount; i++)
1745 	{
1746 		if (extraData[i].isImage)
1747 		{
1748 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1749 		}
1750 		else
1751 		{
1752 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1753 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1754 		}
1755 		const Allocation& alloc = inputBuffers[i]->getAllocation();
1756 		initializeMemory(context, alloc, extraData[i]);
1757 	}
1758 
1759 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1760 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
1761 
1762 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1763 
1764 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
1765 
1766 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
1767 																	VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
1768 																	*vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
1769 																	*renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1770 
1771 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1772 		poolBuilder.addType(inputBuffers[ndx]->getType());
1773 
1774 	if (extraDataCount > 0)
1775 	{
1776 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1777 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1778 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1779 	}
1780 
1781 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1782 	{
1783 		if (inputBuffers[buffersNdx]->isImage())
1784 		{
1785 			VkDescriptorImageInfo info =
1786 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1787 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1788 
1789 			updateBuilder.writeSingle(*descriptorSet,
1790 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1791 										inputBuffers[buffersNdx]->getType(), &info);
1792 		}
1793 		else
1794 		{
1795 			VkDescriptorBufferInfo info =
1796 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1797 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1798 
1799 			updateBuilder.writeSingle(*descriptorSet,
1800 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1801 										inputBuffers[buffersNdx]->getType(), &info);
1802 		}
1803 	}
1804 
1805 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1806 
1807 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
1808 	const deUint32							subgroupSize			= getSubgroupSize(context);
1809 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
1810 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
1811 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1812 	unsigned								totalIterations			= 0u;
1813 	unsigned								failedIterations		= 0u;
1814 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1815 
1816 	{
1817 		const Allocation&		alloc				= vertexBuffer.getAllocation();
1818 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
1819 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
1820 		float					leftHandPosition	= -1.0f;
1821 
1822 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1823 		{
1824 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1825 			leftHandPosition += pixelSize;
1826 		}
1827 
1828 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
1829 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1830 	}
1831 
1832 	for (deUint32 width = 1u; width < maxWidth; width++)
1833 	{
1834 		totalIterations++;
1835 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1836 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
1837 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
1838 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1839 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1840 		const VkDeviceSize			vertexBufferOffset	= 0u;
1841 
1842 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1843 		{
1844 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1845 			initializeMemory(context, alloc, extraData[ndx]);
1846 		}
1847 
1848 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1849 		{
1850 			context.getDeviceInterface().cmdSetViewport(
1851 				*cmdBuffer, 0, 1, &viewport);
1852 
1853 			context.getDeviceInterface().cmdSetScissor(
1854 				*cmdBuffer, 0, 1, &scissor);
1855 
1856 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1857 
1858 			context.getDeviceInterface().cmdBindPipeline(
1859 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1860 
1861 			if (extraDataCount > 0)
1862 			{
1863 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1864 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1865 					&descriptorSet.get(), 0u, DE_NULL);
1866 			}
1867 
1868 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1869 
1870 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
1871 
1872 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1873 
1874 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1875 
1876 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1877 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1878 			waitFence(context, fence);
1879 		}
1880 
1881 		{
1882 			const Allocation& allocResult = imageBufferResult.getAllocation();
1883 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1884 
1885 			std::vector<const void*> datas;
1886 			datas.push_back(allocResult.getHostPtr());
1887 			if (!checkResult(datas, width, subgroupSize))
1888 				failedIterations++;
1889 		}
1890 	}
1891 
1892 	if (0 < failedIterations)
1893 	{
1894 		context.getTestContext().getLog()
1895 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
1896 				<< totalIterations << " values passed" << TestLog::EndMessage;
1897 		return tcu::TestStatus::fail("Failed!");
1898 	}
1899 
1900 	return tcu::TestStatus::pass("OK");
1901 }
1902 
1903 
allStages(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStageTested)1904 tcu::TestStatus vkt::subgroups::allStages(
1905 	Context& context, VkFormat format, SSBOData* extraDatas,
1906 	deUint32 extraDatasCount,
1907 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1908 	const VkShaderStageFlags shaderStageTested)
1909 {
1910 	const deUint32					maxWidth			= 1024u;
1911 	vector<VkShaderStageFlagBits>	stagesVector;
1912 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
1913 
1914 	Move<VkShaderModule>			vertexShaderModule;
1915 	Move<VkShaderModule>			teCtrlShaderModule;
1916 	Move<VkShaderModule>			teEvalShaderModule;
1917 	Move<VkShaderModule>			geometryShaderModule;
1918 	Move<VkShaderModule>			fragmentShaderModule;
1919 
1920 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
1921 	{
1922 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
1923 	}
1924 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1925 	{
1926 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
1927 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1928 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1929 	}
1930 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1931 	{
1932 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
1933 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1934 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1935 	}
1936 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
1937 	{
1938 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
1939 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1940 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1941 	}
1942 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
1943 	{
1944 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1945 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1946 	}
1947 
1948 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
1949 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
1950 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
1951 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
1952 
1953 	shaderStageRequired = shaderStageTested | shaderStageRequired;
1954 
1955 	vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
1956 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1957 	{
1958 		teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
1959 		teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
1960 	}
1961 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
1962 	{
1963 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1964 		{
1965 			// tessellation shaders output line primitives
1966 			geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
1967 		}
1968 		else
1969 		{
1970 			// otherwise points are processed by geometry shader
1971 			geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
1972 		}
1973 	}
1974 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
1975 		fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
1976 
1977 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
1978 
1979 	DescriptorSetLayoutBuilder layoutBuilder;
1980 	// The implicit result SSBO we use to store our outputs from the shader
1981 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1982 	{
1983 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
1984 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
1985 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1986 
1987 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
1988 	}
1989 
1990 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1991 	{
1992 		const deUint32 datasNdx = ndx - stagesCount;
1993 		if (extraDatas[datasNdx].isImage)
1994 		{
1995 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
1996 		}
1997 		else
1998 		{
1999 			const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
2000 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2001 		}
2002 
2003 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2004 		initializeMemory(context, alloc, extraDatas[datasNdx]);
2005 
2006 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
2007 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
2008 	}
2009 
2010 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2011 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2012 
2013 	const Unique<VkPipelineLayout> pipelineLayout(
2014 		makePipelineLayout(context, *descriptorSetLayout));
2015 
2016 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2017 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2018 										shaderStageRequired,
2019 										*vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2020 										*renderPass,
2021 										(shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2022 
2023 	DescriptorPoolBuilder poolBuilder;
2024 
2025 	for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2026 	{
2027 		poolBuilder.addType(inputBuffers[ndx]->getType());
2028 	}
2029 
2030 	const Unique<VkDescriptorPool> descriptorPool(
2031 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2032 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2033 
2034 	// Create descriptor set
2035 	const Unique<VkDescriptorSet> descriptorSet(
2036 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2037 
2038 	DescriptorSetUpdateBuilder updateBuilder;
2039 
2040 	for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
2041 	{
2042 		if (inputBuffers[ndx]->isImage())
2043 		{
2044 			VkDescriptorImageInfo info =
2045 				makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2046 										inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2047 
2048 			updateBuilder.writeSingle(*descriptorSet,
2049 									  DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2050 									  inputBuffers[ndx]->getType(), &info);
2051 		}
2052 		else
2053 		{
2054 			VkDescriptorBufferInfo info =
2055 				makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2056 										 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2057 
2058 			updateBuilder.writeSingle(*descriptorSet,
2059 									  DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2060 									  inputBuffers[ndx]->getType(), &info);
2061 		}
2062 	}
2063 
2064 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
2065 	{
2066 		if (inputBuffers[ndx]->isImage())
2067 		{
2068 			VkDescriptorImageInfo info =
2069 				makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2070 										inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2071 
2072 			updateBuilder.writeSingle(*descriptorSet,
2073 									  DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
2074 									  inputBuffers[ndx]->getType(), &info);
2075 		}
2076 		else
2077 		{
2078 			VkDescriptorBufferInfo info =
2079 				makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2080 										 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2081 
2082 			updateBuilder.writeSingle(*descriptorSet,
2083 									  DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
2084 									  inputBuffers[ndx]->getType(), &info);
2085 		}
2086 	}
2087 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2088 
2089 	{
2090 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(context));
2091 		const deUint32					subgroupSize			= getSubgroupSize(context);
2092 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2093 		unsigned						totalIterations			= 0u;
2094 		unsigned						failedIterations		= 0u;
2095 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2096 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
2097 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
2098 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
2099 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2100 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2101 		const VkImageSubresourceRange	subresourceRange		=
2102 		{
2103 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
2104 			0u,																	//deUint32				baseMipLevel
2105 			1u,																	//deUint32				levelCount
2106 			0u,																	//deUint32				baseArrayLayer
2107 			1u																	//deUint32				layerCount
2108 		};
2109 
2110 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
2111 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2112 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2113 			resultImage.getImage(), subresourceRange);
2114 
2115 		for (deUint32 width = 1u; width < maxWidth; width++)
2116 		{
2117 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2118 			{
2119 				// re-init the data
2120 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2121 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2122 			}
2123 
2124 			totalIterations++;
2125 
2126 			beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2127 
2128 			context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2129 
2130 			context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2131 
2132 			context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2133 
2134 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2135 
2136 			context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2137 
2138 			context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2139 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2140 					&descriptorSet.get(), 0u, DE_NULL);
2141 
2142 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
2143 
2144 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2145 
2146 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2147 
2148 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2149 
2150 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2151 			waitFence(context, fence);
2152 
2153 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2154 			{
2155 				std::vector<const void*> datas;
2156 				if (!inputBuffers[ndx]->isImage())
2157 				{
2158 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2159 					invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2160 					// we always have our result data first
2161 					datas.push_back(resultAlloc.getHostPtr());
2162 				}
2163 
2164 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2165 				{
2166 					const deUint32 datasNdx = index - stagesCount;
2167 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2168 					{
2169 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2170 						invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2171 						// we always have our result data first
2172 						datas.push_back(resultAlloc.getHostPtr());
2173 					}
2174 				}
2175 
2176 				if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2177 					failedIterations++;
2178 			}
2179 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2180 			{
2181 				std::vector<const void*> datas;
2182 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
2183 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2184 
2185 				// we always have our result data first
2186 				datas.push_back(resultAlloc.getHostPtr());
2187 
2188 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2189 				{
2190 					const deUint32 datasNdx = index - stagesCount;
2191 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2192 					{
2193 						const Allocation& alloc = inputBuffers[index]->getAllocation();
2194 						invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2195 						// we always have our result data first
2196 						datas.push_back(alloc.getHostPtr());
2197 					}
2198 				}
2199 
2200 				if (!checkResult(datas, width , subgroupSize))
2201 					failedIterations++;
2202 			}
2203 
2204 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2205 		}
2206 
2207 		if (0 < failedIterations)
2208 		{
2209 			context.getTestContext().getLog()
2210 					<< TestLog::Message << (totalIterations - failedIterations) << " / "
2211 					<< totalIterations << " values passed" << TestLog::EndMessage;
2212 			return tcu::TestStatus::fail("Failed!");
2213 		}
2214 	}
2215 
2216 	return tcu::TestStatus::pass("OK");
2217 }
2218 
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))2219 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2220 	SSBOData* extraData, deUint32 extraDataCount,
2221 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2222 {
2223 	const deUint32							maxWidth				= 1024u;
2224 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
2225 	DescriptorSetLayoutBuilder				layoutBuilder;
2226 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
2227 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2228 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
2229 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2230 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
2231 
2232 	const VkVertexInputBindingDescription	vertexInputBinding		=
2233 	{
2234 		0u,											// binding;
2235 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
2236 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
2237 	};
2238 
2239 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
2240 	{
2241 		0u,
2242 		0u,
2243 		VK_FORMAT_R32G32B32A32_SFLOAT,
2244 		0u
2245 	};
2246 
2247 	for (deUint32 i = 0u; i < extraDataCount; i++)
2248 	{
2249 		if (extraData[i].isImage)
2250 		{
2251 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2252 		}
2253 		else
2254 		{
2255 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2256 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2257 		}
2258 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2259 		initializeMemory(context, alloc, extraData[i]);
2260 	}
2261 
2262 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2263 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2264 
2265 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2266 
2267 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
2268 
2269 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
2270 																		VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2271 																		*vertexShaderModule, *fragmentShaderModule,
2272 																		DE_NULL, DE_NULL, DE_NULL,
2273 																		*renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2274 																		&vertexInputBinding, &vertexInputAttribute, true, format));
2275 	DescriptorPoolBuilder					poolBuilder;
2276 	DescriptorSetUpdateBuilder				updateBuilder;
2277 
2278 
2279 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2280 		poolBuilder.addType(inputBuffers[ndx]->getType());
2281 
2282 	Move <VkDescriptorPool>					descriptorPool;
2283 	Move <VkDescriptorSet>					descriptorSet;
2284 
2285 	if (extraDataCount > 0)
2286 	{
2287 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2288 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2289 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2290 	}
2291 
2292 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2293 	{
2294 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2295 		initializeMemory(context, alloc, extraData[ndx]);
2296 	}
2297 
2298 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2299 	{
2300 		if (inputBuffers[buffersNdx]->isImage())
2301 		{
2302 			VkDescriptorImageInfo info =
2303 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2304 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2305 
2306 			updateBuilder.writeSingle(*descriptorSet,
2307 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2308 										inputBuffers[buffersNdx]->getType(), &info);
2309 		}
2310 		else
2311 		{
2312 			VkDescriptorBufferInfo info =
2313 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2314 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2315 
2316 			updateBuilder.writeSingle(*descriptorSet,
2317 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2318 										inputBuffers[buffersNdx]->getType(), &info);
2319 		}
2320 	}
2321 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2322 
2323 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
2324 
2325 	const deUint32							subgroupSize			= getSubgroupSize(context);
2326 
2327 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
2328 
2329 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
2330 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2331 
2332 	unsigned								totalIterations			= 0u;
2333 	unsigned								failedIterations		= 0u;
2334 
2335 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2336 
2337 	{
2338 		const Allocation&		alloc				= vertexBuffer.getAllocation();
2339 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2340 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
2341 		float					leftHandPosition	= -1.0f;
2342 
2343 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2344 		{
2345 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2346 			leftHandPosition += pixelSize;
2347 		}
2348 
2349 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2350 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2351 	}
2352 
2353 	for (deUint32 width = 1u; width < maxWidth; width++)
2354 	{
2355 		totalIterations++;
2356 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
2357 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
2358 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
2359 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2360 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2361 		const VkDeviceSize			vertexBufferOffset	= 0u;
2362 
2363 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2364 		{
2365 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2366 			initializeMemory(context, alloc, extraData[ndx]);
2367 		}
2368 
2369 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2370 		{
2371 			context.getDeviceInterface().cmdSetViewport(
2372 				*cmdBuffer, 0, 1, &viewport);
2373 
2374 			context.getDeviceInterface().cmdSetScissor(
2375 				*cmdBuffer, 0, 1, &scissor);
2376 
2377 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2378 
2379 			context.getDeviceInterface().cmdBindPipeline(
2380 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2381 
2382 			if (extraDataCount > 0)
2383 			{
2384 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2385 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2386 					&descriptorSet.get(), 0u, DE_NULL);
2387 			}
2388 
2389 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2390 
2391 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2392 
2393 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2394 
2395 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2396 
2397 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2398 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2399 			waitFence(context, fence);
2400 		}
2401 
2402 		{
2403 			const Allocation& allocResult = imageBufferResult.getAllocation();
2404 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
2405 
2406 			std::vector<const void*> datas;
2407 			datas.push_back(allocResult.getHostPtr());
2408 			if (!checkResult(datas, width, subgroupSize))
2409 				failedIterations++;
2410 		}
2411 	}
2412 
2413 	if (0 < failedIterations)
2414 	{
2415 		context.getTestContext().getLog()
2416 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
2417 				<< totalIterations << " values passed" << TestLog::EndMessage;
2418 		return tcu::TestStatus::fail("Failed!");
2419 	}
2420 
2421 	return tcu::TestStatus::pass("OK");
2422 }
2423 
2424 
makeFragmentFrameBufferTest(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize))2425 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest	(Context& context, VkFormat format, SSBOData* extraDatas,
2426 	deUint32 extraDatasCount,
2427 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
2428 						deUint32 height, deUint32 subgroupSize))
2429 {
2430 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
2431 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2432 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
2433 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2434 
2435 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
2436 
2437 	for (deUint32 i = 0; i < extraDatasCount; i++)
2438 	{
2439 		if (extraDatas[i].isImage)
2440 		{
2441 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2442 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
2443 		}
2444 		else
2445 		{
2446 			vk::VkDeviceSize size =
2447 				getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
2448 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2449 		}
2450 
2451 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2452 		initializeMemory(context, alloc, extraDatas[i]);
2453 	}
2454 
2455 	DescriptorSetLayoutBuilder layoutBuilder;
2456 
2457 	for (deUint32 i = 0; i < extraDatasCount; i++)
2458 	{
2459 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
2460 								 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
2461 	}
2462 
2463 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2464 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2465 
2466 	const Unique<VkPipelineLayout> pipelineLayout(
2467 		makePipelineLayout(context, *descriptorSetLayout));
2468 
2469 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2470 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2471 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2472 									  *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
2473 									  DE_NULL, DE_NULL, true));
2474 
2475 	DescriptorPoolBuilder poolBuilder;
2476 
2477 	// To stop validation complaining, always add at least one type to pool.
2478 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2479 	for (deUint32 i = 0; i < extraDatasCount; i++)
2480 	{
2481 		poolBuilder.addType(inputBuffers[i]->getType());
2482 	}
2483 
2484 	Move<VkDescriptorPool> descriptorPool;
2485 	// Create descriptor set
2486 	Move<VkDescriptorSet> descriptorSet;
2487 
2488 	if (extraDatasCount > 0)
2489 	{
2490 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2491 													VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2492 
2493 		descriptorSet	= makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2494 	}
2495 
2496 	DescriptorSetUpdateBuilder updateBuilder;
2497 
2498 	for (deUint32 i = 0; i < extraDatasCount; i++)
2499 	{
2500 		if (inputBuffers[i]->isImage())
2501 		{
2502 			VkDescriptorImageInfo info =
2503 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2504 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2505 
2506 			updateBuilder.writeSingle(*descriptorSet,
2507 									  DescriptorSetUpdateBuilder::Location::binding(i),
2508 									  inputBuffers[i]->getType(), &info);
2509 		}
2510 		else
2511 		{
2512 			VkDescriptorBufferInfo info =
2513 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
2514 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
2515 
2516 			updateBuilder.writeSingle(*descriptorSet,
2517 									  DescriptorSetUpdateBuilder::Location::binding(i),
2518 									  inputBuffers[i]->getType(), &info);
2519 		}
2520 	}
2521 
2522 	if (extraDatasCount > 0)
2523 		updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2524 
2525 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2526 
2527 	const deUint32 subgroupSize = getSubgroupSize(context);
2528 
2529 	const Unique<VkCommandBuffer> cmdBuffer(
2530 		makeCommandBuffer(context, *cmdPool));
2531 
2532 	unsigned totalIterations = 0;
2533 	unsigned failedIterations = 0;
2534 
2535 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
2536 	{
2537 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
2538 		{
2539 			totalIterations++;
2540 
2541 			// re-init the data
2542 			for (deUint32 i = 0; i < extraDatasCount; i++)
2543 			{
2544 				const Allocation& alloc = inputBuffers[i]->getAllocation();
2545 				initializeMemory(context, alloc, extraDatas[i]);
2546 			}
2547 
2548 			VkDeviceSize formatSize = getFormatSizeInBytes(format);
2549 			const VkDeviceSize resultImageSizeInBytes =
2550 				width * height * formatSize;
2551 
2552 			Image resultImage(context, width, height, format,
2553 							  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2554 							  VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2555 
2556 			Buffer resultBuffer(context, resultImageSizeInBytes,
2557 								VK_IMAGE_USAGE_TRANSFER_DST_BIT);
2558 
2559 			const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
2560 													*renderPass, resultImage.getImageView(), width, height));
2561 
2562 			beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2563 
2564 			VkViewport viewport = makeViewport(width, height);
2565 
2566 			context.getDeviceInterface().cmdSetViewport(
2567 				*cmdBuffer, 0, 1, &viewport);
2568 
2569 			VkRect2D scissor = {{0, 0}, {width, height}};
2570 
2571 			context.getDeviceInterface().cmdSetScissor(
2572 				*cmdBuffer, 0, 1, &scissor);
2573 
2574 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
2575 
2576 			context.getDeviceInterface().cmdBindPipeline(
2577 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2578 
2579 			if (extraDatasCount > 0)
2580 			{
2581 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2582 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2583 						&descriptorSet.get(), 0u, DE_NULL);
2584 			}
2585 
2586 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
2587 
2588 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2589 
2590 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2591 
2592 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2593 
2594 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2595 
2596 			waitFence(context, fence);
2597 
2598 			std::vector<const void*> datas;
2599 			{
2600 				const Allocation& resultAlloc = resultBuffer.getAllocation();
2601 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2602 
2603 				// we always have our result data first
2604 				datas.push_back(resultAlloc.getHostPtr());
2605 			}
2606 
2607 			if (!checkResult(datas, width, height, subgroupSize))
2608 			{
2609 				failedIterations++;
2610 			}
2611 
2612 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2613 		}
2614 	}
2615 
2616 	if (0 < failedIterations)
2617 	{
2618 		context.getTestContext().getLog()
2619 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
2620 				<< totalIterations << " values passed" << TestLog::EndMessage;
2621 		return tcu::TestStatus::fail("Failed!");
2622 	}
2623 
2624 	return tcu::TestStatus::pass("OK");
2625 }
2626 
makeComputeTest(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,bool (* checkResult)(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize))2627 tcu::TestStatus vkt::subgroups::makeComputeTest(
2628 	Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
2629 	bool (*checkResult)(std::vector<const void*> datas,
2630 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2631 						deUint32 subgroupSize))
2632 {
2633 	VkDeviceSize elementSize = getFormatSizeInBytes(format);
2634 
2635 	const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
2636 										  maxSupportedSubgroupSize() *
2637 										  maxSupportedSubgroupSize();
2638 	const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
2639 
2640 	Buffer resultBuffer(
2641 		context, resultBufferSizeInBytes);
2642 
2643 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
2644 
2645 	for (deUint32 i = 0; i < inputsCount; i++)
2646 	{
2647 		if (inputs[i].isImage)
2648 		{
2649 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2650 										static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
2651 		}
2652 		else
2653 		{
2654 			vk::VkDeviceSize size =
2655 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
2656 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2657 		}
2658 
2659 		const Allocation& alloc = inputBuffers[i]->getAllocation();
2660 		initializeMemory(context, alloc, inputs[i]);
2661 	}
2662 
2663 	DescriptorSetLayoutBuilder layoutBuilder;
2664 	layoutBuilder.addBinding(
2665 		resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2666 
2667 	for (deUint32 i = 0; i < inputsCount; i++)
2668 	{
2669 		layoutBuilder.addBinding(
2670 			inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2671 	}
2672 
2673 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2674 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2675 
2676 	const Unique<VkShaderModule> shaderModule(
2677 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
2678 						   context.getBinaryCollection().get("comp"), 0u));
2679 	const Unique<VkPipelineLayout> pipelineLayout(
2680 		makePipelineLayout(context, *descriptorSetLayout));
2681 
2682 	DescriptorPoolBuilder poolBuilder;
2683 
2684 	poolBuilder.addType(resultBuffer.getType());
2685 
2686 	for (deUint32 i = 0; i < inputsCount; i++)
2687 	{
2688 		poolBuilder.addType(inputBuffers[i]->getType());
2689 	}
2690 
2691 	const Unique<VkDescriptorPool> descriptorPool(
2692 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2693 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2694 
2695 	// Create descriptor set
2696 	const Unique<VkDescriptorSet> descriptorSet(
2697 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2698 
2699 	DescriptorSetUpdateBuilder updateBuilder;
2700 
2701 	const VkDescriptorBufferInfo resultDescriptorInfo =
2702 		makeDescriptorBufferInfo(
2703 			resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
2704 
2705 	updateBuilder.writeSingle(*descriptorSet,
2706 							  DescriptorSetUpdateBuilder::Location::binding(0u),
2707 							  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
2708 
2709 	for (deUint32 i = 0; i < inputsCount; i++)
2710 	{
2711 		if (inputBuffers[i]->isImage())
2712 		{
2713 			VkDescriptorImageInfo info =
2714 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2715 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2716 
2717 			updateBuilder.writeSingle(*descriptorSet,
2718 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
2719 									  inputBuffers[i]->getType(), &info);
2720 		}
2721 		else
2722 		{
2723 			vk::VkDeviceSize size =
2724 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
2725 			VkDescriptorBufferInfo info =
2726 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
2727 
2728 			updateBuilder.writeSingle(*descriptorSet,
2729 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
2730 									  inputBuffers[i]->getType(), &info);
2731 		}
2732 	}
2733 
2734 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2735 
2736 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2737 
2738 	unsigned totalIterations = 0;
2739 	unsigned failedIterations = 0;
2740 
2741 	const deUint32 subgroupSize = getSubgroupSize(context);
2742 
2743 	const Unique<VkCommandBuffer> cmdBuffer(
2744 		makeCommandBuffer(context, *cmdPool));
2745 
2746 	const deUint32 numWorkgroups[3] = {4, 2, 2};
2747 
2748 	const deUint32 localSizesToTestCount = 15;
2749 	deUint32 localSizesToTest[localSizesToTestCount][3] =
2750 	{
2751 		{1, 1, 1},
2752 		{32, 4, 1},
2753 		{32, 1, 4},
2754 		{1, 32, 4},
2755 		{1, 4, 32},
2756 		{4, 1, 32},
2757 		{4, 32, 1},
2758 		{subgroupSize, 1, 1},
2759 		{1, subgroupSize, 1},
2760 		{1, 1, subgroupSize},
2761 		{3, 5, 7},
2762 		{128, 1, 1},
2763 		{1, 128, 1},
2764 		{1, 1, 64},
2765 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
2766 	};
2767 
2768 	Move<VkPipeline> lastPipeline(
2769 		makeComputePipeline(context, *pipelineLayout, *shaderModule,
2770 							localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
2771 
2772 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
2773 	{
2774 		const deUint32 nextX = localSizesToTest[index + 1][0];
2775 		const deUint32 nextY = localSizesToTest[index + 1][1];
2776 		const deUint32 nextZ = localSizesToTest[index + 1][2];
2777 
2778 		// we are running one test
2779 		totalIterations++;
2780 
2781 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2782 
2783 		context.getDeviceInterface().cmdBindPipeline(
2784 			*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
2785 
2786 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2787 				VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2788 				&descriptorSet.get(), 0u, DE_NULL);
2789 
2790 		context.getDeviceInterface().cmdDispatch(*cmdBuffer,
2791 				numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
2792 
2793 		endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2794 
2795 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2796 
2797 		Move<VkPipeline> nextPipeline(
2798 			makeComputePipeline(context, *pipelineLayout, *shaderModule,
2799 								nextX, nextY, nextZ));
2800 
2801 		waitFence(context, fence);
2802 
2803 		std::vector<const void*> datas;
2804 
2805 		{
2806 			const Allocation& resultAlloc = resultBuffer.getAllocation();
2807 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2808 
2809 			// we always have our result data first
2810 			datas.push_back(resultAlloc.getHostPtr());
2811 		}
2812 
2813 		for (deUint32 i = 0; i < inputsCount; i++)
2814 		{
2815 			if (!inputBuffers[i]->isImage())
2816 			{
2817 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
2818 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2819 
2820 				// we always have our result data first
2821 				datas.push_back(resultAlloc.getHostPtr());
2822 			}
2823 		}
2824 
2825 		if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
2826 		{
2827 			failedIterations++;
2828 		}
2829 
2830 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2831 
2832 		lastPipeline = nextPipeline;
2833 	}
2834 
2835 	if (0 < failedIterations)
2836 	{
2837 		context.getTestContext().getLog()
2838 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
2839 				<< totalIterations << " values passed" << TestLog::EndMessage;
2840 		return tcu::TestStatus::fail("Failed!");
2841 	}
2842 
2843 	return tcu::TestStatus::pass("OK");
2844 }
2845