1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2017 The Khronos Group Inc.
6 * Copyright (c) 2017 Codeplay Software Ltd.
7 *
8 * Licensed under the Apache License, Version 2.0 (the "License");
9 * you may not use this file except in compliance with the License.
10 * You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 *
20 */ /*!
21 * \file
22 * \brief Subgroups Tests Utils
23 */ /*--------------------------------------------------------------------*/
24
25 #include "vktSubgroupsTestsUtils.hpp"
26 #include "deRandom.hpp"
27 #include "tcuCommandLine.hpp"
28 #include "tcuStringTemplate.hpp"
29 #include "vkBarrierUtil.hpp"
30 #include "vkImageUtil.hpp"
31 #include "vkTypeUtil.hpp"
32 #include "vkCmdUtil.hpp"
33 #include "vkObjUtil.hpp"
34
35 using namespace tcu;
36 using namespace std;
37 using namespace vk;
38 using namespace vkt;
39
40 namespace
41 {
getFormatSizeInBytes(const VkFormat format)42 deUint32 getFormatSizeInBytes(const VkFormat format)
43 {
44 switch (format)
45 {
46 default:
47 DE_FATAL("Unhandled format!");
48 return 0;
49 case VK_FORMAT_R32_SINT:
50 case VK_FORMAT_R32_UINT:
51 return sizeof(deInt32);
52 case VK_FORMAT_R32G32_SINT:
53 case VK_FORMAT_R32G32_UINT:
54 return static_cast<deUint32>(sizeof(deInt32) * 2);
55 case VK_FORMAT_R32G32B32_SINT:
56 case VK_FORMAT_R32G32B32_UINT:
57 case VK_FORMAT_R32G32B32A32_SINT:
58 case VK_FORMAT_R32G32B32A32_UINT:
59 return static_cast<deUint32>(sizeof(deInt32) * 4);
60 case VK_FORMAT_R32_SFLOAT:
61 return 4;
62 case VK_FORMAT_R32G32_SFLOAT:
63 return 8;
64 case VK_FORMAT_R32G32B32_SFLOAT:
65 return 16;
66 case VK_FORMAT_R32G32B32A32_SFLOAT:
67 return 16;
68 case VK_FORMAT_R64_SFLOAT:
69 return 8;
70 case VK_FORMAT_R64G64_SFLOAT:
71 return 16;
72 case VK_FORMAT_R64G64B64_SFLOAT:
73 return 32;
74 case VK_FORMAT_R64G64B64A64_SFLOAT:
75 return 32;
76 // The below formats are used to represent bool and bvec* types. These
77 // types are passed to the shader as int and ivec* types, before the
78 // calculations are done as booleans. We need a distinct type here so
79 // that the shader generators can switch on it and generate the correct
80 // shader source for testing.
81 case VK_FORMAT_R8_USCALED:
82 return sizeof(deInt32);
83 case VK_FORMAT_R8G8_USCALED:
84 return static_cast<deUint32>(sizeof(deInt32) * 2);
85 case VK_FORMAT_R8G8B8_USCALED:
86 case VK_FORMAT_R8G8B8A8_USCALED:
87 return static_cast<deUint32>(sizeof(deInt32) * 4);
88 }
89 }
90
getElementSizeInBytes(const VkFormat format,const subgroups::SSBOData::InputDataLayoutType layout)91 deUint32 getElementSizeInBytes(
92 const VkFormat format,
93 const subgroups::SSBOData::InputDataLayoutType layout)
94 {
95 deUint32 bytes = getFormatSizeInBytes(format);
96 if (layout == subgroups::SSBOData::LayoutStd140)
97 return bytes < 16 ? 16 : bytes;
98 else
99 return bytes;
100 }
101
makePipelineLayout(Context & context,const VkDescriptorSetLayout descriptorSetLayout)102 Move<VkPipelineLayout> makePipelineLayout(
103 Context& context, const VkDescriptorSetLayout descriptorSetLayout)
104 {
105 const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
106 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
107 DE_NULL, // const void* pNext;
108 0u, // VkPipelineLayoutCreateFlags flags;
109 1u, // deUint32 setLayoutCount;
110 &descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts;
111 0u, // deUint32 pushConstantRangeCount;
112 DE_NULL, // const VkPushConstantRange* pPushConstantRanges;
113 };
114 return createPipelineLayout(context.getDeviceInterface(),
115 context.getDevice(), &pipelineLayoutParams);
116 }
117
makeRenderPass(Context & context,VkFormat format)118 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
119 {
120 VkAttachmentReference colorReference = {
121 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
122 };
123
124 const VkSubpassDescription subpassDescription = {0u,
125 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
126 DE_NULL, DE_NULL, 0, DE_NULL
127 };
128
129 const VkSubpassDependency subpassDependencies[2] = {
130 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
131 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
132 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
133 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
134 VK_DEPENDENCY_BY_REGION_BIT
135 },
136 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
137 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
138 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
139 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
140 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
141 },
142 };
143
144 VkAttachmentDescription attachmentDescription = {0u, format,
145 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
146 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
147 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
148 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
149 };
150
151 const VkRenderPassCreateInfo renderPassCreateInfo = {
152 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
153 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
154 };
155
156 return createRenderPass(context.getDeviceInterface(), context.getDevice(),
157 &renderPassCreateInfo);
158 }
159
makeFramebuffer(Context & context,const VkRenderPass renderPass,const VkImageView imageView,deUint32 width,deUint32 height)160 Move<VkFramebuffer> makeFramebuffer(Context& context,
161 const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
162 deUint32 height)
163 {
164 const VkFramebufferCreateInfo framebufferCreateInfo = {
165 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
166 &imageView, width, height, 1
167 };
168
169 return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
170 &framebufferCreateInfo);
171 }
172
makeGraphicsPipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderStageFlags stages,const VkShaderModule vertexShaderModule,const VkShaderModule fragmentShaderModule,const VkShaderModule geometryShaderModule,const VkShaderModule tessellationControlModule,const VkShaderModule tessellationEvaluationModule,const VkRenderPass renderPass,const VkPrimitiveTopology topology=VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,const VkVertexInputBindingDescription * vertexInputBindingDescription=DE_NULL,const VkVertexInputAttributeDescription * vertexInputAttributeDescriptions=DE_NULL,const bool frameBufferTests=false,const vk::VkFormat attachmentFormat=VK_FORMAT_R32G32B32A32_SFLOAT)173 Move<VkPipeline> makeGraphicsPipeline(Context& context,
174 const VkPipelineLayout pipelineLayout,
175 const VkShaderStageFlags stages,
176 const VkShaderModule vertexShaderModule,
177 const VkShaderModule fragmentShaderModule,
178 const VkShaderModule geometryShaderModule,
179 const VkShaderModule tessellationControlModule,
180 const VkShaderModule tessellationEvaluationModule,
181 const VkRenderPass renderPass,
182 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
183 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL,
184 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL,
185 const bool frameBufferTests = false,
186 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
187 {
188 std::vector<VkViewport> noViewports;
189 std::vector<VkRect2D> noScissors;
190
191 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
192 {
193 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
194 DE_NULL, // const void* pNext;
195 0u, // VkPipelineVertexInputStateCreateFlags flags;
196 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount;
197 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
198 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount;
199 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
200 };
201
202 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
203 const VkColorComponentFlags colorComponent =
204 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
205 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
206 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
207 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
208
209 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
210 {
211 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
212 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
213 colorComponent
214 };
215
216 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
217 {
218 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
219 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
220 { 0.0f, 0.0f, 0.0f, 0.0f }
221 };
222
223 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
224
225 return vk::makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk
226 context.getDevice(), // const VkDevice device
227 pipelineLayout, // const VkPipelineLayout pipelineLayout
228 vertexShaderModule, // const VkShaderModule vertexShaderModule
229 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule
230 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule
231 geometryShaderModule, // const VkShaderModule geometryShaderModule
232 fragmentShaderModule, // const VkShaderModule fragmentShaderModule
233 renderPass, // const VkRenderPass renderPass
234 noViewports, // const std::vector<VkViewport>& viewports
235 noScissors, // const std::vector<VkRect2D>& scissors
236 topology, // const VkPrimitiveTopology topology
237 0u, // const deUint32 subpass
238 patchControlPoints, // const deUint32 patchControlPoints
239 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
240 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
241 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
242 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
243 &colorBlendStateCreateInfo); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
244 }
245
makeComputePipeline(Context & context,const VkPipelineLayout pipelineLayout,const VkShaderModule shaderModule,deUint32 localSizeX,deUint32 localSizeY,deUint32 localSizeZ)246 Move<VkPipeline> makeComputePipeline(Context& context,
247 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
248 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
249 {
250 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
251
252 const vk::VkSpecializationMapEntry entries[3] =
253 {
254 {0, sizeof(deUint32) * 0, sizeof(deUint32)},
255 {1, sizeof(deUint32) * 1, sizeof(deUint32)},
256 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
257 };
258
259 const vk::VkSpecializationInfo info =
260 {
261 /* mapEntryCount = */ 3,
262 /* pMapEntries = */ entries,
263 /* dataSize = */ sizeof(localSize),
264 /* pData = */ localSize
265 };
266
267 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
268 {
269 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
270 DE_NULL, // const void* pNext;
271 0u, // VkPipelineShaderStageCreateFlags flags;
272 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage;
273 shaderModule, // VkShaderModule module;
274 "main", // const char* pName;
275 &info, // const VkSpecializationInfo* pSpecializationInfo;
276 };
277
278 const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
279 {
280 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
281 DE_NULL, // const void* pNext;
282 0u, // VkPipelineCreateFlags flags;
283 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage;
284 pipelineLayout, // VkPipelineLayout layout;
285 DE_NULL, // VkPipeline basePipelineHandle;
286 0, // deInt32 basePipelineIndex;
287 };
288
289 return createComputePipeline(context.getDeviceInterface(),
290 context.getDevice(), DE_NULL, &pipelineCreateInfo);
291 }
292
makeDescriptorSet(Context & context,const VkDescriptorPool descriptorPool,const VkDescriptorSetLayout setLayout)293 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
294 const VkDescriptorPool descriptorPool,
295 const VkDescriptorSetLayout setLayout)
296 {
297 const VkDescriptorSetAllocateInfo allocateParams =
298 {
299 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
300 // sType;
301 DE_NULL, // const void* pNext;
302 descriptorPool, // VkDescriptorPool descriptorPool;
303 1u, // deUint32 setLayoutCount;
304 &setLayout, // const VkDescriptorSetLayout* pSetLayouts;
305 };
306 return allocateDescriptorSet(
307 context.getDeviceInterface(), context.getDevice(), &allocateParams);
308 }
309
makeCommandPool(Context & context)310 Move<VkCommandPool> makeCommandPool(Context& context)
311 {
312 const VkCommandPoolCreateInfo commandPoolParams =
313 {
314 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
315 DE_NULL, // const void* pNext;
316 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
317 // flags;
318 context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
319 };
320
321 return createCommandPool(
322 context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
323 }
324
makeCommandBuffer(Context & context,const VkCommandPool commandPool)325 Move<VkCommandBuffer> makeCommandBuffer(
326 Context& context, const VkCommandPool commandPool)
327 {
328 const VkCommandBufferAllocateInfo bufferAllocateParams =
329 {
330 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType;
331 DE_NULL, // const void* pNext;
332 commandPool, // VkCommandPool commandPool;
333 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level;
334 1u, // deUint32 bufferCount;
335 };
336 return allocateCommandBuffer(context.getDeviceInterface(),
337 context.getDevice(), &bufferAllocateParams);
338 }
339
submitCommandBuffer(Context & context,const VkCommandBuffer commandBuffer)340 Move<VkFence> submitCommandBuffer(
341 Context& context, const VkCommandBuffer commandBuffer)
342 {
343 const VkFenceCreateInfo fenceParams =
344 {
345 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType;
346 DE_NULL, // const void* pNext;
347 0u, // VkFenceCreateFlags flags;
348 };
349
350 Move<VkFence> fence(createFence(
351 context.getDeviceInterface(), context.getDevice(), &fenceParams));
352
353 const VkSubmitInfo submitInfo =
354 {
355 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType;
356 DE_NULL, // const void* pNext;
357 0u, // deUint32 waitSemaphoreCount;
358 DE_NULL, // const VkSemaphore* pWaitSemaphores;
359 (const VkPipelineStageFlags*)DE_NULL,
360 1u, // deUint32 commandBufferCount;
361 &commandBuffer, // const VkCommandBuffer* pCommandBuffers;
362 0u, // deUint32 signalSemaphoreCount;
363 DE_NULL, // const VkSemaphore* pSignalSemaphores;
364 };
365
366 vk::VkResult result = (context.getDeviceInterface().queueSubmit(
367 context.getUniversalQueue(), 1u, &submitInfo, *fence));
368 VK_CHECK(result);
369
370 return Move<VkFence>(fence);
371 }
372
waitFence(Context & context,Move<VkFence> fence)373 void waitFence(Context& context, Move<VkFence> fence)
374 {
375 VK_CHECK(context.getDeviceInterface().waitForFences(
376 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
377 }
378
379 struct Buffer;
380 struct Image;
381
382 struct BufferOrImage
383 {
isImage__anon8b2961bb0111::BufferOrImage384 bool isImage() const
385 {
386 return m_isImage;
387 }
388
getAsBuffer__anon8b2961bb0111::BufferOrImage389 Buffer* getAsBuffer()
390 {
391 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
392 return reinterpret_cast<Buffer* >(this);
393 }
394
getAsImage__anon8b2961bb0111::BufferOrImage395 Image* getAsImage()
396 {
397 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
398 return reinterpret_cast<Image*>(this);
399 }
400
getType__anon8b2961bb0111::BufferOrImage401 virtual VkDescriptorType getType() const
402 {
403 if (m_isImage)
404 {
405 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
406 }
407 else
408 {
409 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
410 }
411 }
412
getAllocation__anon8b2961bb0111::BufferOrImage413 Allocation& getAllocation() const
414 {
415 return *m_allocation;
416 }
417
~BufferOrImage__anon8b2961bb0111::BufferOrImage418 virtual ~BufferOrImage() {}
419
420 protected:
BufferOrImage__anon8b2961bb0111::BufferOrImage421 explicit BufferOrImage(bool image) : m_isImage(image) {}
422
423 bool m_isImage;
424 de::details::MovePtr<Allocation> m_allocation;
425 };
426
427 struct Buffer : public BufferOrImage
428 {
Buffer__anon8b2961bb0111::Buffer429 explicit Buffer(
430 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
431 : BufferOrImage (false)
432 , m_sizeInBytes (sizeInBytes)
433 , m_usage (usage)
434 {
435 const vk::VkBufferCreateInfo bufferCreateInfo =
436 {
437 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
438 DE_NULL,
439 0u,
440 m_sizeInBytes,
441 m_usage,
442 VK_SHARING_MODE_EXCLUSIVE,
443 0u,
444 DE_NULL,
445 };
446 m_buffer = createBuffer(context.getDeviceInterface(),
447 context.getDevice(), &bufferCreateInfo);
448 vk::VkMemoryRequirements req = getBufferMemoryRequirements(
449 context.getDeviceInterface(), context.getDevice(), *m_buffer);
450 m_allocation = context.getDefaultAllocator().allocate(
451 req, MemoryRequirement::HostVisible);
452 VK_CHECK(context.getDeviceInterface().bindBufferMemory(
453 context.getDevice(), *m_buffer, m_allocation->getMemory(),
454 m_allocation->getOffset()));
455 }
456
getType__anon8b2961bb0111::Buffer457 virtual VkDescriptorType getType() const
458 {
459 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
460 {
461 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
462 }
463 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
464 }
465
getBuffer__anon8b2961bb0111::Buffer466 VkBuffer getBuffer() const {
467 return *m_buffer;
468 }
469
getBufferPtr__anon8b2961bb0111::Buffer470 const VkBuffer* getBufferPtr() const {
471 return &(*m_buffer);
472 }
473
getSize__anon8b2961bb0111::Buffer474 VkDeviceSize getSize() const {
475 return m_sizeInBytes;
476 }
477
478 private:
479 Move<VkBuffer> m_buffer;
480 VkDeviceSize m_sizeInBytes;
481 const VkBufferUsageFlags m_usage;
482 };
483
484 struct Image : public BufferOrImage
485 {
Image__anon8b2961bb0111::Image486 explicit Image(Context& context, deUint32 width, deUint32 height,
487 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
488 : BufferOrImage(true)
489 {
490 const VkImageCreateInfo imageCreateInfo =
491 {
492 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
493 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
494 VK_IMAGE_TILING_OPTIMAL, usage,
495 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
496 VK_IMAGE_LAYOUT_UNDEFINED
497 };
498 m_image = createImage(context.getDeviceInterface(), context.getDevice(),
499 &imageCreateInfo);
500 vk::VkMemoryRequirements req = getImageMemoryRequirements(
501 context.getDeviceInterface(), context.getDevice(), *m_image);
502 req.size *= 2;
503 m_allocation =
504 context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
505 VK_CHECK(context.getDeviceInterface().bindImageMemory(
506 context.getDevice(), *m_image, m_allocation->getMemory(),
507 m_allocation->getOffset()));
508
509 const VkComponentMapping componentMapping =
510 {
511 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
512 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
513 };
514
515 const VkImageViewCreateInfo imageViewCreateInfo =
516 {
517 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
518 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
519 {
520 VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
521 }
522 };
523
524 m_imageView = createImageView(context.getDeviceInterface(),
525 context.getDevice(), &imageViewCreateInfo);
526
527 const struct VkSamplerCreateInfo samplerCreateInfo =
528 {
529 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
530 DE_NULL,
531 0u,
532 VK_FILTER_NEAREST,
533 VK_FILTER_NEAREST,
534 VK_SAMPLER_MIPMAP_MODE_NEAREST,
535 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
536 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
537 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
538 0.0f,
539 VK_FALSE,
540 1.0f,
541 DE_FALSE,
542 VK_COMPARE_OP_ALWAYS,
543 0.0f,
544 0.0f,
545 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
546 VK_FALSE,
547 };
548
549 m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
550 }
551
getImage__anon8b2961bb0111::Image552 VkImage getImage() const {
553 return *m_image;
554 }
555
getImageView__anon8b2961bb0111::Image556 VkImageView getImageView() const {
557 return *m_imageView;
558 }
559
getSampler__anon8b2961bb0111::Image560 VkSampler getSampler() const {
561 return *m_sampler;
562 }
563
564 private:
565 Move<VkImage> m_image;
566 Move<VkImageView> m_imageView;
567 Move<VkSampler> m_sampler;
568 };
569 }
570
getSharedMemoryBallotHelper()571 std::string vkt::subgroups::getSharedMemoryBallotHelper()
572 {
573 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
574 "uvec4 sharedMemoryBallot(bool vote)\n"
575 "{\n"
576 " uint groupOffset = gl_SubgroupID;\n"
577 " // One invocation in the group 0's the whole group's data\n"
578 " if (subgroupElect())\n"
579 " {\n"
580 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
581 " }\n"
582 " subgroupMemoryBarrierShared();\n"
583 " if (vote)\n"
584 " {\n"
585 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
586 " const highp uint bitToSet = 1u << invocationId;\n"
587 " switch (gl_SubgroupInvocationID / 32)\n"
588 " {\n"
589 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
590 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
591 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
592 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
593 " }\n"
594 " }\n"
595 " subgroupMemoryBarrierShared();\n"
596 " return superSecretComputeShaderHelper[groupOffset];\n"
597 "}\n";
598 }
599
getSubgroupSize(Context & context)600 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
601 {
602 VkPhysicalDeviceSubgroupProperties subgroupProperties;
603 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
604 subgroupProperties.pNext = DE_NULL;
605
606 VkPhysicalDeviceProperties2 properties;
607 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
608 properties.pNext = &subgroupProperties;
609
610 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
611
612 return subgroupProperties.subgroupSize;
613 }
614
maxSupportedSubgroupSize()615 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
616 return 128u;
617 }
618
getShaderStageName(VkShaderStageFlags stage)619 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
620 {
621 switch (stage)
622 {
623 default:
624 DE_FATAL("Unhandled stage!");
625 return "";
626 case VK_SHADER_STAGE_COMPUTE_BIT:
627 return "compute";
628 case VK_SHADER_STAGE_FRAGMENT_BIT:
629 return "fragment";
630 case VK_SHADER_STAGE_VERTEX_BIT:
631 return "vertex";
632 case VK_SHADER_STAGE_GEOMETRY_BIT:
633 return "geometry";
634 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
635 return "tess_control";
636 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
637 return "tess_eval";
638 }
639 }
640
getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)641 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
642 {
643 switch (bit)
644 {
645 default:
646 DE_FATAL("Unknown subgroup feature category!");
647 return "";
648 case VK_SUBGROUP_FEATURE_BASIC_BIT:
649 return "VK_SUBGROUP_FEATURE_BASIC_BIT";
650 case VK_SUBGROUP_FEATURE_VOTE_BIT:
651 return "VK_SUBGROUP_FEATURE_VOTE_BIT";
652 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
653 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
654 case VK_SUBGROUP_FEATURE_BALLOT_BIT:
655 return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
656 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
657 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
658 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
659 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
660 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
661 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
662 case VK_SUBGROUP_FEATURE_QUAD_BIT:
663 return "VK_SUBGROUP_FEATURE_QUAD_BIT";
664 }
665 }
666
addNoSubgroupShader(SourceCollections & programCollection)667 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
668 {
669 {
670 /*
671 "#version 450\n"
672 "void main (void)\n"
673 "{\n"
674 " float pixelSize = 2.0f/1024.0f;\n"
675 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
676 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
677 " gl_PointSize = 1.0f;\n"
678 "}\n"
679 */
680 const std::string vertNoSubgroup =
681 "; SPIR-V\n"
682 "; Version: 1.3\n"
683 "; Generator: Khronos Glslang Reference Front End; 1\n"
684 "; Bound: 37\n"
685 "; Schema: 0\n"
686 "OpCapability Shader\n"
687 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
688 "OpMemoryModel Logical GLSL450\n"
689 "OpEntryPoint Vertex %4 \"main\" %22 %26\n"
690 "OpMemberDecorate %20 0 BuiltIn Position\n"
691 "OpMemberDecorate %20 1 BuiltIn PointSize\n"
692 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
693 "OpMemberDecorate %20 3 BuiltIn CullDistance\n"
694 "OpDecorate %20 Block\n"
695 "OpDecorate %26 BuiltIn VertexIndex\n"
696 "%2 = OpTypeVoid\n"
697 "%3 = OpTypeFunction %2\n"
698 "%6 = OpTypeFloat 32\n"
699 "%7 = OpTypePointer Function %6\n"
700 "%9 = OpConstant %6 0.00195313\n"
701 "%12 = OpConstant %6 2\n"
702 "%14 = OpConstant %6 1\n"
703 "%16 = OpTypeVector %6 4\n"
704 "%17 = OpTypeInt 32 0\n"
705 "%18 = OpConstant %17 1\n"
706 "%19 = OpTypeArray %6 %18\n"
707 "%20 = OpTypeStruct %16 %6 %19 %19\n"
708 "%21 = OpTypePointer Output %20\n"
709 "%22 = OpVariable %21 Output\n"
710 "%23 = OpTypeInt 32 1\n"
711 "%24 = OpConstant %23 0\n"
712 "%25 = OpTypePointer Input %23\n"
713 "%26 = OpVariable %25 Input\n"
714 "%33 = OpConstant %6 0\n"
715 "%35 = OpTypePointer Output %16\n"
716 "%37 = OpConstant %23 1\n"
717 "%38 = OpTypePointer Output %6\n"
718 "%4 = OpFunction %2 None %3\n"
719 "%5 = OpLabel\n"
720 "%8 = OpVariable %7 Function\n"
721 "%10 = OpVariable %7 Function\n"
722 "OpStore %8 %9\n"
723 "%11 = OpLoad %6 %8\n"
724 "%13 = OpFDiv %6 %11 %12\n"
725 "%15 = OpFSub %6 %13 %14\n"
726 "OpStore %10 %15\n"
727 "%27 = OpLoad %23 %26\n"
728 "%28 = OpConvertSToF %6 %27\n"
729 "%29 = OpLoad %6 %8\n"
730 "%30 = OpFMul %6 %28 %29\n"
731 "%31 = OpLoad %6 %10\n"
732 "%32 = OpFAdd %6 %30 %31\n"
733 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
734 "%36 = OpAccessChain %35 %22 %24\n"
735 "OpStore %36 %34\n"
736 "%39 = OpAccessChain %38 %22 %37\n"
737 "OpStore %39 %14\n"
738 "OpReturn\n"
739 "OpFunctionEnd\n";
740 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
741 }
742
743 {
744 /*
745 "#version 450\n"
746 "layout(vertices=1) out;\n"
747 "\n"
748 "void main (void)\n"
749 "{\n"
750 " if (gl_InvocationID == 0)\n"
751 " {\n"
752 " gl_TessLevelOuter[0] = 1.0f;\n"
753 " gl_TessLevelOuter[1] = 1.0f;\n"
754 " }\n"
755 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
756 "}\n"
757 */
758 const std::string tescNoSubgroup =
759 "; SPIR-V\n"
760 "; Version: 1.3\n"
761 "; Generator: Khronos Glslang Reference Front End; 1\n"
762 "; Bound: 45\n"
763 "; Schema: 0\n"
764 "OpCapability Tessellation\n"
765 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
766 "OpMemoryModel Logical GLSL450\n"
767 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
768 "OpExecutionMode %4 OutputVertices 1\n"
769 "OpDecorate %8 BuiltIn InvocationId\n"
770 "OpDecorate %20 Patch\n"
771 "OpDecorate %20 BuiltIn TessLevelOuter\n"
772 "OpMemberDecorate %29 0 BuiltIn Position\n"
773 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
774 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
775 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
776 "OpDecorate %29 Block\n"
777 "OpMemberDecorate %34 0 BuiltIn Position\n"
778 "OpMemberDecorate %34 1 BuiltIn PointSize\n"
779 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
780 "OpMemberDecorate %34 3 BuiltIn CullDistance\n"
781 "OpDecorate %34 Block\n"
782 "%2 = OpTypeVoid\n"
783 "%3 = OpTypeFunction %2\n"
784 "%6 = OpTypeInt 32 1\n"
785 "%7 = OpTypePointer Input %6\n"
786 "%8 = OpVariable %7 Input\n"
787 "%10 = OpConstant %6 0\n"
788 "%11 = OpTypeBool\n"
789 "%15 = OpTypeFloat 32\n"
790 "%16 = OpTypeInt 32 0\n"
791 "%17 = OpConstant %16 4\n"
792 "%18 = OpTypeArray %15 %17\n"
793 "%19 = OpTypePointer Output %18\n"
794 "%20 = OpVariable %19 Output\n"
795 "%21 = OpConstant %15 1\n"
796 "%22 = OpTypePointer Output %15\n"
797 "%24 = OpConstant %6 1\n"
798 "%26 = OpTypeVector %15 4\n"
799 "%27 = OpConstant %16 1\n"
800 "%28 = OpTypeArray %15 %27\n"
801 "%29 = OpTypeStruct %26 %15 %28 %28\n"
802 "%30 = OpTypeArray %29 %27\n"
803 "%31 = OpTypePointer Output %30\n"
804 "%32 = OpVariable %31 Output\n"
805 "%34 = OpTypeStruct %26 %15 %28 %28\n"
806 "%35 = OpConstant %16 32\n"
807 "%36 = OpTypeArray %34 %35\n"
808 "%37 = OpTypePointer Input %36\n"
809 "%38 = OpVariable %37 Input\n"
810 "%40 = OpTypePointer Input %26\n"
811 "%43 = OpTypePointer Output %26\n"
812 "%4 = OpFunction %2 None %3\n"
813 "%5 = OpLabel\n"
814 "%9 = OpLoad %6 %8\n"
815 "%12 = OpIEqual %11 %9 %10\n"
816 "OpSelectionMerge %14 None\n"
817 "OpBranchConditional %12 %13 %14\n"
818 "%13 = OpLabel\n"
819 "%23 = OpAccessChain %22 %20 %10\n"
820 "OpStore %23 %21\n"
821 "%25 = OpAccessChain %22 %20 %24\n"
822 "OpStore %25 %21\n"
823 "OpBranch %14\n"
824 "%14 = OpLabel\n"
825 "%33 = OpLoad %6 %8\n"
826 "%39 = OpLoad %6 %8\n"
827 "%41 = OpAccessChain %40 %38 %39 %10\n"
828 "%42 = OpLoad %26 %41\n"
829 "%44 = OpAccessChain %43 %32 %33 %10\n"
830 "OpStore %44 %42\n"
831 "OpReturn\n"
832 "OpFunctionEnd\n";
833 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
834 }
835
836 {
837 /*
838 "#version 450\n"
839 "layout(isolines) in;\n"
840 "\n"
841 "void main (void)\n"
842 "{\n"
843 " float pixelSize = 2.0f/1024.0f;\n"
844 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
845 "}\n";
846 */
847 const std::string teseNoSubgroup =
848 "; SPIR-V\n"
849 "; Version: 1.3\n"
850 "; Generator: Khronos Glslang Reference Front End; 2\n"
851 "; Bound: 42\n"
852 "; Schema: 0\n"
853 "OpCapability Tessellation\n"
854 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
855 "OpMemoryModel Logical GLSL450\n"
856 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
857 "OpExecutionMode %4 Isolines\n"
858 "OpExecutionMode %4 SpacingEqual\n"
859 "OpExecutionMode %4 VertexOrderCcw\n"
860 "OpMemberDecorate %14 0 BuiltIn Position\n"
861 "OpMemberDecorate %14 1 BuiltIn PointSize\n"
862 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
863 "OpMemberDecorate %14 3 BuiltIn CullDistance\n"
864 "OpDecorate %14 Block\n"
865 "OpMemberDecorate %19 0 BuiltIn Position\n"
866 "OpMemberDecorate %19 1 BuiltIn PointSize\n"
867 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
868 "OpMemberDecorate %19 3 BuiltIn CullDistance\n"
869 "OpDecorate %19 Block\n"
870 "OpDecorate %29 BuiltIn TessCoord\n"
871 "%2 = OpTypeVoid\n"
872 "%3 = OpTypeFunction %2\n"
873 "%6 = OpTypeFloat 32\n"
874 "%7 = OpTypePointer Function %6\n"
875 "%9 = OpConstant %6 0.00195313\n"
876 "%10 = OpTypeVector %6 4\n"
877 "%11 = OpTypeInt 32 0\n"
878 "%12 = OpConstant %11 1\n"
879 "%13 = OpTypeArray %6 %12\n"
880 "%14 = OpTypeStruct %10 %6 %13 %13\n"
881 "%15 = OpTypePointer Output %14\n"
882 "%16 = OpVariable %15 Output\n"
883 "%17 = OpTypeInt 32 1\n"
884 "%18 = OpConstant %17 0\n"
885 "%19 = OpTypeStruct %10 %6 %13 %13\n"
886 "%20 = OpConstant %11 32\n"
887 "%21 = OpTypeArray %19 %20\n"
888 "%22 = OpTypePointer Input %21\n"
889 "%23 = OpVariable %22 Input\n"
890 "%24 = OpTypePointer Input %10\n"
891 "%27 = OpTypeVector %6 3\n"
892 "%28 = OpTypePointer Input %27\n"
893 "%29 = OpVariable %28 Input\n"
894 "%30 = OpConstant %11 0\n"
895 "%31 = OpTypePointer Input %6\n"
896 "%36 = OpConstant %6 2\n"
897 "%40 = OpTypePointer Output %10\n"
898 "%4 = OpFunction %2 None %3\n"
899 "%5 = OpLabel\n"
900 "%8 = OpVariable %7 Function\n"
901 "OpStore %8 %9\n"
902 "%25 = OpAccessChain %24 %23 %18 %18\n"
903 "%26 = OpLoad %10 %25\n"
904 "%32 = OpAccessChain %31 %29 %30\n"
905 "%33 = OpLoad %6 %32\n"
906 "%34 = OpLoad %6 %8\n"
907 "%35 = OpFMul %6 %33 %34\n"
908 "%37 = OpFDiv %6 %35 %36\n"
909 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
910 "%39 = OpFAdd %10 %26 %38\n"
911 "%41 = OpAccessChain %40 %16 %18\n"
912 "OpStore %41 %39\n"
913 "OpReturn\n"
914 "OpFunctionEnd\n";
915 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
916 }
917
918 }
919
920
getVertShaderForStage(vk::VkShaderStageFlags stage)921 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
922 {
923 switch (stage)
924 {
925 default:
926 DE_FATAL("Unhandled stage!");
927 return "";
928 case VK_SHADER_STAGE_FRAGMENT_BIT:
929 return
930 "#version 450\n"
931 "void main (void)\n"
932 "{\n"
933 " float pixelSize = 2.0f/1024.0f;\n"
934 " float pixelPosition = pixelSize/2.0f - 1.0f;\n"
935 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
936 "}\n";
937 case VK_SHADER_STAGE_GEOMETRY_BIT:
938 return
939 "#version 450\n"
940 "void main (void)\n"
941 "{\n"
942 "}\n";
943 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
944 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
945 return
946 "#version 450\n"
947 "void main (void)\n"
948 "{\n"
949 "}\n";
950 }
951 }
952
isSubgroupSupported(Context & context)953 bool vkt::subgroups::isSubgroupSupported(Context& context)
954 {
955 return context.contextSupports(vk::ApiVersion(1, 1, 0));
956 }
957
areSubgroupOperationsSupportedForStage(Context & context,const VkShaderStageFlags stage)958 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
959 Context& context, const VkShaderStageFlags stage)
960 {
961 VkPhysicalDeviceSubgroupProperties subgroupProperties;
962 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
963 subgroupProperties.pNext = DE_NULL;
964
965 VkPhysicalDeviceProperties2 properties;
966 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
967 properties.pNext = &subgroupProperties;
968
969 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
970
971 return (stage & subgroupProperties.supportedStages) ? true : false;
972 }
973
areSubgroupOperationsRequiredForStage(VkShaderStageFlags stage)974 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
975 VkShaderStageFlags stage)
976 {
977 switch (stage)
978 {
979 default:
980 return false;
981 case VK_SHADER_STAGE_COMPUTE_BIT:
982 return true;
983 }
984 }
985
isSubgroupFeatureSupportedForDevice(Context & context,VkSubgroupFeatureFlagBits bit)986 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
987 Context& context,
988 VkSubgroupFeatureFlagBits bit) {
989 VkPhysicalDeviceSubgroupProperties subgroupProperties;
990 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
991 subgroupProperties.pNext = DE_NULL;
992
993 VkPhysicalDeviceProperties2 properties;
994 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
995 properties.pNext = &subgroupProperties;
996
997 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
998
999 return (bit & subgroupProperties.supportedOperations) ? true : false;
1000 }
1001
isFragmentSSBOSupportedForDevice(Context & context)1002 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
1003 {
1004 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1005 context.getInstanceInterface(), context.getPhysicalDevice());
1006 return features.fragmentStoresAndAtomics ? true : false;
1007 }
1008
isVertexSSBOSupportedForDevice(Context & context)1009 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
1010 {
1011 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1012 context.getInstanceInterface(), context.getPhysicalDevice());
1013 return features.vertexPipelineStoresAndAtomics ? true : false;
1014 }
1015
isDoubleSupportedForDevice(Context & context)1016 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
1017 {
1018 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
1019 context.getInstanceInterface(), context.getPhysicalDevice());
1020 return features.shaderFloat64 ? true : false;
1021 }
1022
isDoubleFormat(VkFormat format)1023 bool vkt::subgroups::isDoubleFormat(VkFormat format)
1024 {
1025 switch (format)
1026 {
1027 default:
1028 return false;
1029 case VK_FORMAT_R64_SFLOAT:
1030 case VK_FORMAT_R64G64_SFLOAT:
1031 case VK_FORMAT_R64G64B64_SFLOAT:
1032 case VK_FORMAT_R64G64B64A64_SFLOAT:
1033 return true;
1034 }
1035 }
1036
getFormatNameForGLSL(VkFormat format)1037 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
1038 {
1039 switch (format)
1040 {
1041 default:
1042 DE_FATAL("Unhandled format!");
1043 return "";
1044 case VK_FORMAT_R32_SINT:
1045 return "int";
1046 case VK_FORMAT_R32G32_SINT:
1047 return "ivec2";
1048 case VK_FORMAT_R32G32B32_SINT:
1049 return "ivec3";
1050 case VK_FORMAT_R32G32B32A32_SINT:
1051 return "ivec4";
1052 case VK_FORMAT_R32_UINT:
1053 return "uint";
1054 case VK_FORMAT_R32G32_UINT:
1055 return "uvec2";
1056 case VK_FORMAT_R32G32B32_UINT:
1057 return "uvec3";
1058 case VK_FORMAT_R32G32B32A32_UINT:
1059 return "uvec4";
1060 case VK_FORMAT_R32_SFLOAT:
1061 return "float";
1062 case VK_FORMAT_R32G32_SFLOAT:
1063 return "vec2";
1064 case VK_FORMAT_R32G32B32_SFLOAT:
1065 return "vec3";
1066 case VK_FORMAT_R32G32B32A32_SFLOAT:
1067 return "vec4";
1068 case VK_FORMAT_R64_SFLOAT:
1069 return "double";
1070 case VK_FORMAT_R64G64_SFLOAT:
1071 return "dvec2";
1072 case VK_FORMAT_R64G64B64_SFLOAT:
1073 return "dvec3";
1074 case VK_FORMAT_R64G64B64A64_SFLOAT:
1075 return "dvec4";
1076 case VK_FORMAT_R8_USCALED:
1077 return "bool";
1078 case VK_FORMAT_R8G8_USCALED:
1079 return "bvec2";
1080 case VK_FORMAT_R8G8B8_USCALED:
1081 return "bvec3";
1082 case VK_FORMAT_R8G8B8A8_USCALED:
1083 return "bvec4";
1084 }
1085 }
1086
setVertexShaderFrameBuffer(SourceCollections & programCollection)1087 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
1088 {
1089 /*
1090 "layout(location = 0) in highp vec4 in_position;\n"
1091 "void main (void)\n"
1092 "{\n"
1093 " gl_Position = in_position;\n"
1094 "}\n";
1095 */
1096 programCollection.spirvAsmSources.add("vert") <<
1097 "; SPIR-V\n"
1098 "; Version: 1.3\n"
1099 "; Generator: Khronos Glslang Reference Front End; 2\n"
1100 "; Bound: 21\n"
1101 "; Schema: 0\n"
1102 "OpCapability Shader\n"
1103 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1104 "OpMemoryModel Logical GLSL450\n"
1105 "OpEntryPoint Vertex %4 \"main\" %13 %17\n"
1106 "OpMemberDecorate %11 0 BuiltIn Position\n"
1107 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1108 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1109 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1110 "OpDecorate %11 Block\n"
1111 "OpDecorate %17 Location 0\n"
1112 "%2 = OpTypeVoid\n"
1113 "%3 = OpTypeFunction %2\n"
1114 "%6 = OpTypeFloat 32\n"
1115 "%7 = OpTypeVector %6 4\n"
1116 "%8 = OpTypeInt 32 0\n"
1117 "%9 = OpConstant %8 1\n"
1118 "%10 = OpTypeArray %6 %9\n"
1119 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1120 "%12 = OpTypePointer Output %11\n"
1121 "%13 = OpVariable %12 Output\n"
1122 "%14 = OpTypeInt 32 1\n"
1123 "%15 = OpConstant %14 0\n"
1124 "%16 = OpTypePointer Input %7\n"
1125 "%17 = OpVariable %16 Input\n"
1126 "%19 = OpTypePointer Output %7\n"
1127 "%4 = OpFunction %2 None %3\n"
1128 "%5 = OpLabel\n"
1129 "%18 = OpLoad %7 %17\n"
1130 "%20 = OpAccessChain %19 %13 %15\n"
1131 "OpStore %20 %18\n"
1132 "OpReturn\n"
1133 "OpFunctionEnd\n";
1134 }
1135
setFragmentShaderFrameBuffer(vk::SourceCollections & programCollection)1136 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
1137 {
1138 /*
1139 "layout(location = 0) in float in_color;\n"
1140 "layout(location = 0) out uint out_color;\n"
1141 "void main()\n"
1142 {\n"
1143 " out_color = uint(in_color);\n"
1144 "}\n";
1145 */
1146 programCollection.spirvAsmSources.add("fragment") <<
1147 "; SPIR-V\n"
1148 "; Version: 1.3\n"
1149 "; Generator: Khronos Glslang Reference Front End; 2\n"
1150 "; Bound: 14\n"
1151 "; Schema: 0\n"
1152 "OpCapability Shader\n"
1153 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1154 "OpMemoryModel Logical GLSL450\n"
1155 "OpEntryPoint Fragment %4 \"main\" %8 %11\n"
1156 "OpExecutionMode %4 OriginUpperLeft\n"
1157 "OpDecorate %8 Location 0\n"
1158 "OpDecorate %11 Location 0\n"
1159 "%2 = OpTypeVoid\n"
1160 "%3 = OpTypeFunction %2\n"
1161 "%6 = OpTypeInt 32 0\n"
1162 "%7 = OpTypePointer Output %6\n"
1163 "%8 = OpVariable %7 Output\n"
1164 "%9 = OpTypeFloat 32\n"
1165 "%10 = OpTypePointer Input %9\n"
1166 "%11 = OpVariable %10 Input\n"
1167 "%4 = OpFunction %2 None %3\n"
1168 "%5 = OpLabel\n"
1169 "%12 = OpLoad %9 %11\n"
1170 "%13 = OpConvertFToU %6 %12\n"
1171 "OpStore %8 %13\n"
1172 "OpReturn\n"
1173 "OpFunctionEnd\n";
1174 }
1175
setTesCtrlShaderFrameBuffer(vk::SourceCollections & programCollection)1176 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
1177 {
1178 /*
1179 "#extension GL_KHR_shader_subgroup_basic: enable\n"
1180 "#extension GL_EXT_tessellation_shader : require\n"
1181 "layout(vertices = 2) out;\n"
1182 "void main (void)\n"
1183 "{\n"
1184 " if (gl_InvocationID == 0)\n"
1185 {\n"
1186 " gl_TessLevelOuter[0] = 1.0f;\n"
1187 " gl_TessLevelOuter[1] = 1.0f;\n"
1188 " }\n"
1189 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
1190 "}\n";
1191 */
1192 programCollection.spirvAsmSources.add("tesc") <<
1193 "; SPIR-V\n"
1194 "; Version: 1.3\n"
1195 "; Generator: Khronos Glslang Reference Front End; 2\n"
1196 "; Bound: 46\n"
1197 "; Schema: 0\n"
1198 "OpCapability Tessellation\n"
1199 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1200 "OpMemoryModel Logical GLSL450\n"
1201 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
1202 "OpExecutionMode %4 OutputVertices 2\n"
1203 "OpDecorate %8 BuiltIn InvocationId\n"
1204 "OpDecorate %20 Patch\n"
1205 "OpDecorate %20 BuiltIn TessLevelOuter\n"
1206 "OpMemberDecorate %29 0 BuiltIn Position\n"
1207 "OpMemberDecorate %29 1 BuiltIn PointSize\n"
1208 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
1209 "OpMemberDecorate %29 3 BuiltIn CullDistance\n"
1210 "OpDecorate %29 Block\n"
1211 "OpMemberDecorate %35 0 BuiltIn Position\n"
1212 "OpMemberDecorate %35 1 BuiltIn PointSize\n"
1213 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
1214 "OpMemberDecorate %35 3 BuiltIn CullDistance\n"
1215 "OpDecorate %35 Block\n"
1216 "%2 = OpTypeVoid\n"
1217 "%3 = OpTypeFunction %2\n"
1218 "%6 = OpTypeInt 32 1\n"
1219 "%7 = OpTypePointer Input %6\n"
1220 "%8 = OpVariable %7 Input\n"
1221 "%10 = OpConstant %6 0\n"
1222 "%11 = OpTypeBool\n"
1223 "%15 = OpTypeFloat 32\n"
1224 "%16 = OpTypeInt 32 0\n"
1225 "%17 = OpConstant %16 4\n"
1226 "%18 = OpTypeArray %15 %17\n"
1227 "%19 = OpTypePointer Output %18\n"
1228 "%20 = OpVariable %19 Output\n"
1229 "%21 = OpConstant %15 1\n"
1230 "%22 = OpTypePointer Output %15\n"
1231 "%24 = OpConstant %6 1\n"
1232 "%26 = OpTypeVector %15 4\n"
1233 "%27 = OpConstant %16 1\n"
1234 "%28 = OpTypeArray %15 %27\n"
1235 "%29 = OpTypeStruct %26 %15 %28 %28\n"
1236 "%30 = OpConstant %16 2\n"
1237 "%31 = OpTypeArray %29 %30\n"
1238 "%32 = OpTypePointer Output %31\n"
1239 "%33 = OpVariable %32 Output\n"
1240 "%35 = OpTypeStruct %26 %15 %28 %28\n"
1241 "%36 = OpConstant %16 32\n"
1242 "%37 = OpTypeArray %35 %36\n"
1243 "%38 = OpTypePointer Input %37\n"
1244 "%39 = OpVariable %38 Input\n"
1245 "%41 = OpTypePointer Input %26\n"
1246 "%44 = OpTypePointer Output %26\n"
1247 "%4 = OpFunction %2 None %3\n"
1248 "%5 = OpLabel\n"
1249 "%9 = OpLoad %6 %8\n"
1250 "%12 = OpIEqual %11 %9 %10\n"
1251 "OpSelectionMerge %14 None\n"
1252 "OpBranchConditional %12 %13 %14\n"
1253 "%13 = OpLabel\n"
1254 "%23 = OpAccessChain %22 %20 %10\n"
1255 "OpStore %23 %21\n"
1256 "%25 = OpAccessChain %22 %20 %24\n"
1257 "OpStore %25 %21\n"
1258 "OpBranch %14\n"
1259 "%14 = OpLabel\n"
1260 "%34 = OpLoad %6 %8\n"
1261 "%40 = OpLoad %6 %8\n"
1262 "%42 = OpAccessChain %41 %39 %40 %10\n"
1263 "%43 = OpLoad %26 %42\n"
1264 "%45 = OpAccessChain %44 %33 %34 %10\n"
1265 "OpStore %45 %43\n"
1266 "OpReturn\n"
1267 "OpFunctionEnd\n";
1268 }
1269
setTesEvalShaderFrameBuffer(vk::SourceCollections & programCollection)1270 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
1271 {
1272 /*
1273 "#extension GL_KHR_shader_subgroup_ballot: enable\n"
1274 "#extension GL_EXT_tessellation_shader : require\n"
1275 "layout(isolines, equal_spacing, ccw ) in;\n"
1276 "layout(location = 0) in float in_color[];\n"
1277 "layout(location = 0) out float out_color;\n"
1278 "\n"
1279 "void main (void)\n"
1280 "{\n"
1281 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
1282 " out_color = in_color[0];\n"
1283 "}\n";
1284 */
1285 programCollection.spirvAsmSources.add("tese") <<
1286 "; SPIR-V\n"
1287 "; Version: 1.3\n"
1288 "; Generator: Khronos Glslang Reference Front End; 2\n"
1289 "; Bound: 45\n"
1290 "; Schema: 0\n"
1291 "OpCapability Tessellation\n"
1292 "%1 = OpExtInstImport \"GLSL.std.450\"\n"
1293 "OpMemoryModel Logical GLSL450\n"
1294 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
1295 "OpExecutionMode %4 Isolines\n"
1296 "OpExecutionMode %4 SpacingEqual\n"
1297 "OpExecutionMode %4 VertexOrderCcw\n"
1298 "OpMemberDecorate %11 0 BuiltIn Position\n"
1299 "OpMemberDecorate %11 1 BuiltIn PointSize\n"
1300 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
1301 "OpMemberDecorate %11 3 BuiltIn CullDistance\n"
1302 "OpDecorate %11 Block\n"
1303 "OpMemberDecorate %16 0 BuiltIn Position\n"
1304 "OpMemberDecorate %16 1 BuiltIn PointSize\n"
1305 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
1306 "OpMemberDecorate %16 3 BuiltIn CullDistance\n"
1307 "OpDecorate %16 Block\n"
1308 "OpDecorate %29 BuiltIn TessCoord\n"
1309 "OpDecorate %39 Location 0\n"
1310 "OpDecorate %42 Location 0\n"
1311 "%2 = OpTypeVoid\n"
1312 "%3 = OpTypeFunction %2\n"
1313 "%6 = OpTypeFloat 32\n"
1314 "%7 = OpTypeVector %6 4\n"
1315 "%8 = OpTypeInt 32 0\n"
1316 "%9 = OpConstant %8 1\n"
1317 "%10 = OpTypeArray %6 %9\n"
1318 "%11 = OpTypeStruct %7 %6 %10 %10\n"
1319 "%12 = OpTypePointer Output %11\n"
1320 "%13 = OpVariable %12 Output\n"
1321 "%14 = OpTypeInt 32 1\n"
1322 "%15 = OpConstant %14 0\n"
1323 "%16 = OpTypeStruct %7 %6 %10 %10\n"
1324 "%17 = OpConstant %8 32\n"
1325 "%18 = OpTypeArray %16 %17\n"
1326 "%19 = OpTypePointer Input %18\n"
1327 "%20 = OpVariable %19 Input\n"
1328 "%21 = OpTypePointer Input %7\n"
1329 "%24 = OpConstant %14 1\n"
1330 "%27 = OpTypeVector %6 3\n"
1331 "%28 = OpTypePointer Input %27\n"
1332 "%29 = OpVariable %28 Input\n"
1333 "%30 = OpConstant %8 0\n"
1334 "%31 = OpTypePointer Input %6\n"
1335 "%36 = OpTypePointer Output %7\n"
1336 "%38 = OpTypePointer Output %6\n"
1337 "%39 = OpVariable %38 Output\n"
1338 "%40 = OpTypeArray %6 %17\n"
1339 "%41 = OpTypePointer Input %40\n"
1340 "%42 = OpVariable %41 Input\n"
1341 "%4 = OpFunction %2 None %3\n"
1342 "%5 = OpLabel\n"
1343 "%22 = OpAccessChain %21 %20 %15 %15\n"
1344 "%23 = OpLoad %7 %22\n"
1345 "%25 = OpAccessChain %21 %20 %24 %15\n"
1346 "%26 = OpLoad %7 %25\n"
1347 "%32 = OpAccessChain %31 %29 %30\n"
1348 "%33 = OpLoad %6 %32\n"
1349 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
1350 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
1351 "%37 = OpAccessChain %36 %13 %15\n"
1352 "OpStore %37 %35\n"
1353 "%43 = OpAccessChain %31 %42 %15\n"
1354 "%44 = OpLoad %6 %43\n"
1355 "OpStore %39 %44\n"
1356 "OpReturn\n"
1357 "OpFunctionEnd\n";
1358 }
1359
addGeometryShadersFromTemplate(const std::string & glslTemplate,const vk::ShaderBuildOptions & options,vk::GlslSourceCollection & collection)1360 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection)
1361 {
1362 tcu::StringTemplate geometryTemplate(glslTemplate);
1363
1364 map<string, string> linesParams;
1365 linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
1366
1367 map<string, string> pointsParams;
1368 pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
1369
1370 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options;
1371 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options;
1372 }
1373
addGeometryShadersFromTemplate(const std::string & spirvTemplate,const vk::SpirVAsmBuildOptions & options,vk::SpirVAsmCollection & collection)1374 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
1375 {
1376 tcu::StringTemplate geometryTemplate(spirvTemplate);
1377
1378 map<string, string> linesParams;
1379 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
1380
1381 map<string, string> pointsParams;
1382 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
1383
1384 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options;
1385 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options;
1386 }
1387
initializeMemory(Context & context,const Allocation & alloc,subgroups::SSBOData & data)1388 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
1389 {
1390 const vk::VkFormat format = data.format;
1391 const vk::VkDeviceSize size = data.numElements *
1392 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
1393 if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
1394 {
1395 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
1396
1397 switch (format)
1398 {
1399 default:
1400 DE_FATAL("Illegal buffer format");
1401 break;
1402 case VK_FORMAT_R8_USCALED:
1403 case VK_FORMAT_R8G8_USCALED:
1404 case VK_FORMAT_R8G8B8_USCALED:
1405 case VK_FORMAT_R8G8B8A8_USCALED:
1406 case VK_FORMAT_R32_SINT:
1407 case VK_FORMAT_R32G32_SINT:
1408 case VK_FORMAT_R32G32B32_SINT:
1409 case VK_FORMAT_R32G32B32A32_SINT:
1410 case VK_FORMAT_R32_UINT:
1411 case VK_FORMAT_R32G32_UINT:
1412 case VK_FORMAT_R32G32B32_UINT:
1413 case VK_FORMAT_R32G32B32A32_UINT:
1414 {
1415 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1416
1417 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
1418 {
1419 ptr[k] = rnd.getUint32();
1420 }
1421 }
1422 break;
1423 case VK_FORMAT_R32_SFLOAT:
1424 case VK_FORMAT_R32G32_SFLOAT:
1425 case VK_FORMAT_R32G32B32_SFLOAT:
1426 case VK_FORMAT_R32G32B32A32_SFLOAT:
1427 {
1428 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
1429
1430 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
1431 {
1432 ptr[k] = rnd.getFloat();
1433 }
1434 }
1435 break;
1436 case VK_FORMAT_R64_SFLOAT:
1437 case VK_FORMAT_R64G64_SFLOAT:
1438 case VK_FORMAT_R64G64B64_SFLOAT:
1439 case VK_FORMAT_R64G64B64A64_SFLOAT:
1440 {
1441 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
1442
1443 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
1444 {
1445 ptr[k] = rnd.getDouble();
1446 }
1447 }
1448 break;
1449 }
1450 }
1451 else if (subgroups::SSBOData::InitializeZero == data.initializeType)
1452 {
1453 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
1454
1455 for (vk::VkDeviceSize k = 0; k < size / 4; k++)
1456 {
1457 ptr[k] = 0;
1458 }
1459 }
1460
1461 if (subgroups::SSBOData::InitializeNone != data.initializeType)
1462 {
1463 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1464 }
1465 }
1466
getResultBinding(const VkShaderStageFlagBits shaderStage)1467 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
1468 {
1469 switch(shaderStage)
1470 {
1471 case VK_SHADER_STAGE_VERTEX_BIT:
1472 return 0u;
1473 break;
1474 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
1475 return 1u;
1476 break;
1477 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
1478 return 2u;
1479 break;
1480 case VK_SHADER_STAGE_GEOMETRY_BIT:
1481 return 3u;
1482 break;
1483 default:
1484 DE_ASSERT(0);
1485 return -1;
1486 }
1487 DE_ASSERT(0);
1488 return -1;
1489 }
1490
makeTessellationEvaluationFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStage)1491 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
1492 Context& context, VkFormat format, SSBOData* extraData,
1493 deUint32 extraDataCount,
1494 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1495 const VkShaderStageFlags shaderStage)
1496 {
1497 const deUint32 maxWidth = 1024u;
1498 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
1499 DescriptorSetLayoutBuilder layoutBuilder;
1500 DescriptorPoolBuilder poolBuilder;
1501 DescriptorSetUpdateBuilder updateBuilder;
1502 Move <VkDescriptorPool> descriptorPool;
1503 Move <VkDescriptorSet> descriptorSet;
1504
1505 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1506 context.getBinaryCollection().get("vert"), 0u));
1507 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1508 context.getBinaryCollection().get("tesc"), 0u));
1509 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1510 context.getBinaryCollection().get("tese"), 0u));
1511 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1512 context.getBinaryCollection().get("fragment"), 0u));
1513 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
1514
1515 const VkVertexInputBindingDescription vertexInputBinding =
1516 {
1517 0u, // binding;
1518 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
1519 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
1520 };
1521
1522 const VkVertexInputAttributeDescription vertexInputAttribute =
1523 {
1524 0u,
1525 0u,
1526 VK_FORMAT_R32G32B32A32_SFLOAT,
1527 0u
1528 };
1529
1530 for (deUint32 i = 0u; i < extraDataCount; i++)
1531 {
1532 if (extraData[i].isImage)
1533 {
1534 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1535 }
1536 else
1537 {
1538 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1539 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1540 }
1541 const Allocation& alloc = inputBuffers[i]->getAllocation();
1542 initializeMemory(context, alloc, extraData[i]);
1543 }
1544
1545 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1546 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
1547
1548 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1549
1550 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
1551
1552 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
1553 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
1554 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
1555 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
1556 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1557
1558 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1559 poolBuilder.addType(inputBuffers[ndx]->getType());
1560
1561 if (extraDataCount > 0)
1562 {
1563 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1564 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1565 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1566 }
1567
1568 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1569 {
1570 if (inputBuffers[buffersNdx]->isImage())
1571 {
1572 VkDescriptorImageInfo info =
1573 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1574 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1575
1576 updateBuilder.writeSingle(*descriptorSet,
1577 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1578 inputBuffers[buffersNdx]->getType(), &info);
1579 }
1580 else
1581 {
1582 VkDescriptorBufferInfo info =
1583 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1584 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1585
1586 updateBuilder.writeSingle(*descriptorSet,
1587 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1588 inputBuffers[buffersNdx]->getType(), &info);
1589 }
1590 }
1591
1592 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1593
1594 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
1595 const deUint32 subgroupSize = getSubgroupSize(context);
1596 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
1597 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4);
1598 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1599 unsigned totalIterations = 0u;
1600 unsigned failedIterations = 0u;
1601 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1602
1603 {
1604 const Allocation& alloc = vertexBuffer.getAllocation();
1605 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
1606 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
1607 float leftHandPosition = -1.0f;
1608
1609 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
1610 {
1611 data[ndx][0] = leftHandPosition;
1612 leftHandPosition += pixelSize;
1613 data[ndx+1][0] = leftHandPosition;
1614 }
1615
1616 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
1617 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1618 }
1619
1620 for (deUint32 width = 1u; width < maxWidth; ++width)
1621 {
1622 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1623 const VkViewport viewport = makeViewport(maxWidth, 1u);
1624 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
1625 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1626 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1627 const VkDeviceSize vertexBufferOffset = 0u;
1628
1629 totalIterations++;
1630
1631 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1632 {
1633
1634 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
1635 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
1636
1637 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1638
1639 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1640
1641 if (extraDataCount > 0)
1642 {
1643 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1644 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1645 &descriptorSet.get(), 0u, DE_NULL);
1646 }
1647
1648 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1649 context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
1650
1651 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1652
1653 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1654 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1655
1656 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1657 waitFence(context, fence);
1658 }
1659
1660 {
1661 const Allocation& allocResult = imageBufferResult.getAllocation();
1662 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1663
1664 std::vector<const void*> datas;
1665 datas.push_back(allocResult.getHostPtr());
1666 if (!checkResult(datas, width/2u, subgroupSize))
1667 failedIterations++;
1668 }
1669 }
1670
1671 if (0 < failedIterations)
1672 {
1673 context.getTestContext().getLog()
1674 << TestLog::Message << (totalIterations - failedIterations) << " / "
1675 << totalIterations << " values passed" << TestLog::EndMessage;
1676 return tcu::TestStatus::fail("Failed!");
1677 }
1678
1679 return tcu::TestStatus::pass("OK");
1680 }
1681
check(std::vector<const void * > datas,deUint32 width,deUint32 ref)1682 bool vkt::subgroups::check(std::vector<const void*> datas,
1683 deUint32 width, deUint32 ref)
1684 {
1685 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
1686
1687 for (deUint32 n = 0; n < width; ++n)
1688 {
1689 if (data[n] != ref)
1690 {
1691 return false;
1692 }
1693 }
1694
1695 return true;
1696 }
1697
checkCompute(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 ref)1698 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
1699 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
1700 deUint32 ref)
1701 {
1702 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
1703 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
1704 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
1705
1706 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
1707 }
1708
makeGeometryFrameBufferTest(Context & context,VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))1709 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
1710 Context& context, VkFormat format, SSBOData* extraData,
1711 deUint32 extraDataCount,
1712 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
1713 {
1714 const deUint32 maxWidth = 1024u;
1715 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
1716 DescriptorSetLayoutBuilder layoutBuilder;
1717 DescriptorPoolBuilder poolBuilder;
1718 DescriptorSetUpdateBuilder updateBuilder;
1719 Move <VkDescriptorPool> descriptorPool;
1720 Move <VkDescriptorSet> descriptorSet;
1721
1722 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1723 context.getBinaryCollection().get("vert"), 0u));
1724 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1725 context.getBinaryCollection().get("geometry"), 0u));
1726 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(),
1727 context.getBinaryCollection().get("fragment"), 0u));
1728 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
1729 const VkVertexInputBindingDescription vertexInputBinding =
1730 {
1731 0u, // binding;
1732 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
1733 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
1734 };
1735
1736 const VkVertexInputAttributeDescription vertexInputAttribute =
1737 {
1738 0u,
1739 0u,
1740 VK_FORMAT_R32G32B32A32_SFLOAT,
1741 0u
1742 };
1743
1744 for (deUint32 i = 0u; i < extraDataCount; i++)
1745 {
1746 if (extraData[i].isImage)
1747 {
1748 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
1749 }
1750 else
1751 {
1752 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
1753 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1754 }
1755 const Allocation& alloc = inputBuffers[i]->getAllocation();
1756 initializeMemory(context, alloc, extraData[i]);
1757 }
1758
1759 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1760 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
1761
1762 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
1763
1764 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
1765
1766 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
1767 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
1768 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
1769 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
1770
1771 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
1772 poolBuilder.addType(inputBuffers[ndx]->getType());
1773
1774 if (extraDataCount > 0)
1775 {
1776 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
1777 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
1778 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
1779 }
1780
1781 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
1782 {
1783 if (inputBuffers[buffersNdx]->isImage())
1784 {
1785 VkDescriptorImageInfo info =
1786 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
1787 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
1788
1789 updateBuilder.writeSingle(*descriptorSet,
1790 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1791 inputBuffers[buffersNdx]->getType(), &info);
1792 }
1793 else
1794 {
1795 VkDescriptorBufferInfo info =
1796 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
1797 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
1798
1799 updateBuilder.writeSingle(*descriptorSet,
1800 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
1801 inputBuffers[buffersNdx]->getType(), &info);
1802 }
1803 }
1804
1805 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
1806
1807 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
1808 const deUint32 subgroupSize = getSubgroupSize(context);
1809 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
1810 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
1811 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1812 unsigned totalIterations = 0u;
1813 unsigned failedIterations = 0u;
1814 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
1815
1816 {
1817 const Allocation& alloc = vertexBuffer.getAllocation();
1818 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
1819 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
1820 float leftHandPosition = -1.0f;
1821
1822 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
1823 {
1824 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
1825 leftHandPosition += pixelSize;
1826 }
1827
1828 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
1829 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
1830 }
1831
1832 for (deUint32 width = 1u; width < maxWidth; width++)
1833 {
1834 totalIterations++;
1835 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
1836 const VkViewport viewport = makeViewport(maxWidth, 1u);
1837 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
1838 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
1839 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
1840 const VkDeviceSize vertexBufferOffset = 0u;
1841
1842 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
1843 {
1844 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
1845 initializeMemory(context, alloc, extraData[ndx]);
1846 }
1847
1848 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1849 {
1850 context.getDeviceInterface().cmdSetViewport(
1851 *cmdBuffer, 0, 1, &viewport);
1852
1853 context.getDeviceInterface().cmdSetScissor(
1854 *cmdBuffer, 0, 1, &scissor);
1855
1856 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
1857
1858 context.getDeviceInterface().cmdBindPipeline(
1859 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
1860
1861 if (extraDataCount > 0)
1862 {
1863 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
1864 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
1865 &descriptorSet.get(), 0u, DE_NULL);
1866 }
1867
1868 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
1869
1870 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
1871
1872 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
1873
1874 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1875
1876 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
1877 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
1878 waitFence(context, fence);
1879 }
1880
1881 {
1882 const Allocation& allocResult = imageBufferResult.getAllocation();
1883 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
1884
1885 std::vector<const void*> datas;
1886 datas.push_back(allocResult.getHostPtr());
1887 if (!checkResult(datas, width, subgroupSize))
1888 failedIterations++;
1889 }
1890 }
1891
1892 if (0 < failedIterations)
1893 {
1894 context.getTestContext().getLog()
1895 << TestLog::Message << (totalIterations - failedIterations) << " / "
1896 << totalIterations << " values passed" << TestLog::EndMessage;
1897 return tcu::TestStatus::fail("Failed!");
1898 }
1899
1900 return tcu::TestStatus::pass("OK");
1901 }
1902
1903
allStages(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize),const VkShaderStageFlags shaderStageTested)1904 tcu::TestStatus vkt::subgroups::allStages(
1905 Context& context, VkFormat format, SSBOData* extraDatas,
1906 deUint32 extraDatasCount,
1907 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
1908 const VkShaderStageFlags shaderStageTested)
1909 {
1910 const deUint32 maxWidth = 1024u;
1911 vector<VkShaderStageFlagBits> stagesVector;
1912 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull;
1913
1914 Move<VkShaderModule> vertexShaderModule;
1915 Move<VkShaderModule> teCtrlShaderModule;
1916 Move<VkShaderModule> teEvalShaderModule;
1917 Move<VkShaderModule> geometryShaderModule;
1918 Move<VkShaderModule> fragmentShaderModule;
1919
1920 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
1921 {
1922 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
1923 }
1924 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1925 {
1926 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
1927 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
1928 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1929 }
1930 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1931 {
1932 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
1933 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
1934 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
1935 }
1936 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
1937 {
1938 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
1939 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1940 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1941 }
1942 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
1943 {
1944 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
1945 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
1946 }
1947
1948 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size());
1949 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert";
1950 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc";
1951 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese";
1952
1953 shaderStageRequired = shaderStageTested | shaderStageRequired;
1954
1955 vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
1956 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
1957 {
1958 teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
1959 teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
1960 }
1961 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
1962 {
1963 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
1964 {
1965 // tessellation shaders output line primitives
1966 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
1967 }
1968 else
1969 {
1970 // otherwise points are processed by geometry shader
1971 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
1972 }
1973 }
1974 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
1975 fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
1976
1977 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
1978
1979 DescriptorSetLayoutBuilder layoutBuilder;
1980 // The implicit result SSBO we use to store our outputs from the shader
1981 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
1982 {
1983 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
1984 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
1985 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
1986
1987 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
1988 }
1989
1990 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
1991 {
1992 const deUint32 datasNdx = ndx - stagesCount;
1993 if (extraDatas[datasNdx].isImage)
1994 {
1995 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
1996 }
1997 else
1998 {
1999 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
2000 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2001 }
2002
2003 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2004 initializeMemory(context, alloc, extraDatas[datasNdx]);
2005
2006 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
2007 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
2008 }
2009
2010 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2011 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2012
2013 const Unique<VkPipelineLayout> pipelineLayout(
2014 makePipelineLayout(context, *descriptorSetLayout));
2015
2016 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2017 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2018 shaderStageRequired,
2019 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
2020 *renderPass,
2021 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
2022
2023 DescriptorPoolBuilder poolBuilder;
2024
2025 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
2026 {
2027 poolBuilder.addType(inputBuffers[ndx]->getType());
2028 }
2029
2030 const Unique<VkDescriptorPool> descriptorPool(
2031 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2032 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2033
2034 // Create descriptor set
2035 const Unique<VkDescriptorSet> descriptorSet(
2036 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2037
2038 DescriptorSetUpdateBuilder updateBuilder;
2039
2040 for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
2041 {
2042 if (inputBuffers[ndx]->isImage())
2043 {
2044 VkDescriptorImageInfo info =
2045 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2046 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2047
2048 updateBuilder.writeSingle(*descriptorSet,
2049 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2050 inputBuffers[ndx]->getType(), &info);
2051 }
2052 else
2053 {
2054 VkDescriptorBufferInfo info =
2055 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2056 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2057
2058 updateBuilder.writeSingle(*descriptorSet,
2059 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
2060 inputBuffers[ndx]->getType(), &info);
2061 }
2062 }
2063
2064 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
2065 {
2066 if (inputBuffers[ndx]->isImage())
2067 {
2068 VkDescriptorImageInfo info =
2069 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
2070 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2071
2072 updateBuilder.writeSingle(*descriptorSet,
2073 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
2074 inputBuffers[ndx]->getType(), &info);
2075 }
2076 else
2077 {
2078 VkDescriptorBufferInfo info =
2079 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
2080 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
2081
2082 updateBuilder.writeSingle(*descriptorSet,
2083 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
2084 inputBuffers[ndx]->getType(), &info);
2085 }
2086 }
2087 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2088
2089 {
2090 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
2091 const deUint32 subgroupSize = getSubgroupSize(context);
2092 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2093 unsigned totalIterations = 0u;
2094 unsigned failedIterations = 0u;
2095 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2096 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
2097 const VkViewport viewport = makeViewport(maxWidth, 1u);
2098 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2099 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2100 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2101 const VkImageSubresourceRange subresourceRange =
2102 {
2103 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask
2104 0u, //deUint32 baseMipLevel
2105 1u, //deUint32 levelCount
2106 0u, //deUint32 baseArrayLayer
2107 1u //deUint32 layerCount
2108 };
2109
2110 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier(
2111 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
2112 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
2113 resultImage.getImage(), subresourceRange);
2114
2115 for (deUint32 width = 1u; width < maxWidth; width++)
2116 {
2117 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
2118 {
2119 // re-init the data
2120 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2121 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
2122 }
2123
2124 totalIterations++;
2125
2126 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2127
2128 context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
2129
2130 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
2131
2132 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
2133
2134 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2135
2136 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2137
2138 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2139 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2140 &descriptorSet.get(), 0u, DE_NULL);
2141
2142 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
2143
2144 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2145
2146 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2147
2148 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2149
2150 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2151 waitFence(context, fence);
2152
2153 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
2154 {
2155 std::vector<const void*> datas;
2156 if (!inputBuffers[ndx]->isImage())
2157 {
2158 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
2159 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2160 // we always have our result data first
2161 datas.push_back(resultAlloc.getHostPtr());
2162 }
2163
2164 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2165 {
2166 const deUint32 datasNdx = index - stagesCount;
2167 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
2168 {
2169 const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
2170 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2171 // we always have our result data first
2172 datas.push_back(resultAlloc.getHostPtr());
2173 }
2174 }
2175
2176 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
2177 failedIterations++;
2178 }
2179 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
2180 {
2181 std::vector<const void*> datas;
2182 const Allocation& resultAlloc = imageBufferResult.getAllocation();
2183 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2184
2185 // we always have our result data first
2186 datas.push_back(resultAlloc.getHostPtr());
2187
2188 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
2189 {
2190 const deUint32 datasNdx = index - stagesCount;
2191 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
2192 {
2193 const Allocation& alloc = inputBuffers[index]->getAllocation();
2194 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2195 // we always have our result data first
2196 datas.push_back(alloc.getHostPtr());
2197 }
2198 }
2199
2200 if (!checkResult(datas, width , subgroupSize))
2201 failedIterations++;
2202 }
2203
2204 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2205 }
2206
2207 if (0 < failedIterations)
2208 {
2209 context.getTestContext().getLog()
2210 << TestLog::Message << (totalIterations - failedIterations) << " / "
2211 << totalIterations << " values passed" << TestLog::EndMessage;
2212 return tcu::TestStatus::fail("Failed!");
2213 }
2214 }
2215
2216 return tcu::TestStatus::pass("OK");
2217 }
2218
makeVertexFrameBufferTest(Context & context,vk::VkFormat format,SSBOData * extraData,deUint32 extraDataCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 subgroupSize))2219 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
2220 SSBOData* extraData, deUint32 extraDataCount,
2221 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
2222 {
2223 const deUint32 maxWidth = 1024u;
2224 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount);
2225 DescriptorSetLayoutBuilder layoutBuilder;
2226 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
2227 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2228 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
2229 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2230 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format));
2231
2232 const VkVertexInputBindingDescription vertexInputBinding =
2233 {
2234 0u, // binding;
2235 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride;
2236 VK_VERTEX_INPUT_RATE_VERTEX // inputRate
2237 };
2238
2239 const VkVertexInputAttributeDescription vertexInputAttribute =
2240 {
2241 0u,
2242 0u,
2243 VK_FORMAT_R32G32B32A32_SFLOAT,
2244 0u
2245 };
2246
2247 for (deUint32 i = 0u; i < extraDataCount; i++)
2248 {
2249 if (extraData[i].isImage)
2250 {
2251 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
2252 }
2253 else
2254 {
2255 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
2256 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2257 }
2258 const Allocation& alloc = inputBuffers[i]->getAllocation();
2259 initializeMemory(context, alloc, extraData[i]);
2260 }
2261
2262 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2263 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
2264
2265 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2266
2267 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout));
2268
2269 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout,
2270 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2271 *vertexShaderModule, *fragmentShaderModule,
2272 DE_NULL, DE_NULL, DE_NULL,
2273 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
2274 &vertexInputBinding, &vertexInputAttribute, true, format));
2275 DescriptorPoolBuilder poolBuilder;
2276 DescriptorSetUpdateBuilder updateBuilder;
2277
2278
2279 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2280 poolBuilder.addType(inputBuffers[ndx]->getType());
2281
2282 Move <VkDescriptorPool> descriptorPool;
2283 Move <VkDescriptorSet> descriptorSet;
2284
2285 if (extraDataCount > 0)
2286 {
2287 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2288 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2289 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2290 }
2291
2292 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
2293 {
2294 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2295 initializeMemory(context, alloc, extraData[ndx]);
2296 }
2297
2298 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
2299 {
2300 if (inputBuffers[buffersNdx]->isImage())
2301 {
2302 VkDescriptorImageInfo info =
2303 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
2304 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2305
2306 updateBuilder.writeSingle(*descriptorSet,
2307 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2308 inputBuffers[buffersNdx]->getType(), &info);
2309 }
2310 else
2311 {
2312 VkDescriptorBufferInfo info =
2313 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
2314 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
2315
2316 updateBuilder.writeSingle(*descriptorSet,
2317 DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
2318 inputBuffers[buffersNdx]->getType(), &info);
2319 }
2320 }
2321 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2322
2323 const Unique<VkCommandPool> cmdPool (makeCommandPool(context));
2324
2325 const deUint32 subgroupSize = getSubgroupSize(context);
2326
2327 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool));
2328
2329 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4);
2330 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
2331
2332 unsigned totalIterations = 0u;
2333 unsigned failedIterations = 0u;
2334
2335 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2336
2337 {
2338 const Allocation& alloc = vertexBuffer.getAllocation();
2339 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
2340 const float pixelSize = 2.0f / static_cast<float>(maxWidth);
2341 float leftHandPosition = -1.0f;
2342
2343 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
2344 {
2345 data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
2346 leftHandPosition += pixelSize;
2347 }
2348
2349 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
2350 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
2351 }
2352
2353 for (deUint32 width = 1u; width < maxWidth; width++)
2354 {
2355 totalIterations++;
2356 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
2357 const VkViewport viewport = makeViewport(maxWidth, 1u);
2358 const VkRect2D scissor = makeRect2D(maxWidth, 1u);
2359 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
2360 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
2361 const VkDeviceSize vertexBufferOffset = 0u;
2362
2363 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
2364 {
2365 const Allocation& alloc = inputBuffers[ndx]->getAllocation();
2366 initializeMemory(context, alloc, extraData[ndx]);
2367 }
2368
2369 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2370 {
2371 context.getDeviceInterface().cmdSetViewport(
2372 *cmdBuffer, 0, 1, &viewport);
2373
2374 context.getDeviceInterface().cmdSetScissor(
2375 *cmdBuffer, 0, 1, &scissor);
2376
2377 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
2378
2379 context.getDeviceInterface().cmdBindPipeline(
2380 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2381
2382 if (extraDataCount > 0)
2383 {
2384 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2385 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2386 &descriptorSet.get(), 0u, DE_NULL);
2387 }
2388
2389 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
2390
2391 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
2392
2393 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2394
2395 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2396
2397 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2398 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2399 waitFence(context, fence);
2400 }
2401
2402 {
2403 const Allocation& allocResult = imageBufferResult.getAllocation();
2404 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
2405
2406 std::vector<const void*> datas;
2407 datas.push_back(allocResult.getHostPtr());
2408 if (!checkResult(datas, width, subgroupSize))
2409 failedIterations++;
2410 }
2411 }
2412
2413 if (0 < failedIterations)
2414 {
2415 context.getTestContext().getLog()
2416 << TestLog::Message << (totalIterations - failedIterations) << " / "
2417 << totalIterations << " values passed" << TestLog::EndMessage;
2418 return tcu::TestStatus::fail("Failed!");
2419 }
2420
2421 return tcu::TestStatus::pass("OK");
2422 }
2423
2424
makeFragmentFrameBufferTest(Context & context,VkFormat format,SSBOData * extraDatas,deUint32 extraDatasCount,bool (* checkResult)(std::vector<const void * > datas,deUint32 width,deUint32 height,deUint32 subgroupSize))2425 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context, VkFormat format, SSBOData* extraDatas,
2426 deUint32 extraDatasCount,
2427 bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
2428 deUint32 height, deUint32 subgroupSize))
2429 {
2430 const Unique<VkShaderModule> vertexShaderModule (createShaderModule
2431 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
2432 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule
2433 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
2434
2435 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
2436
2437 for (deUint32 i = 0; i < extraDatasCount; i++)
2438 {
2439 if (extraDatas[i].isImage)
2440 {
2441 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2442 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
2443 }
2444 else
2445 {
2446 vk::VkDeviceSize size =
2447 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
2448 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
2449 }
2450
2451 const Allocation& alloc = inputBuffers[i]->getAllocation();
2452 initializeMemory(context, alloc, extraDatas[i]);
2453 }
2454
2455 DescriptorSetLayoutBuilder layoutBuilder;
2456
2457 for (deUint32 i = 0; i < extraDatasCount; i++)
2458 {
2459 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
2460 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
2461 }
2462
2463 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2464 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2465
2466 const Unique<VkPipelineLayout> pipelineLayout(
2467 makePipelineLayout(context, *descriptorSetLayout));
2468
2469 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
2470 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
2471 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
2472 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
2473 DE_NULL, DE_NULL, true));
2474
2475 DescriptorPoolBuilder poolBuilder;
2476
2477 // To stop validation complaining, always add at least one type to pool.
2478 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2479 for (deUint32 i = 0; i < extraDatasCount; i++)
2480 {
2481 poolBuilder.addType(inputBuffers[i]->getType());
2482 }
2483
2484 Move<VkDescriptorPool> descriptorPool;
2485 // Create descriptor set
2486 Move<VkDescriptorSet> descriptorSet;
2487
2488 if (extraDatasCount > 0)
2489 {
2490 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2491 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2492
2493 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
2494 }
2495
2496 DescriptorSetUpdateBuilder updateBuilder;
2497
2498 for (deUint32 i = 0; i < extraDatasCount; i++)
2499 {
2500 if (inputBuffers[i]->isImage())
2501 {
2502 VkDescriptorImageInfo info =
2503 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2504 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2505
2506 updateBuilder.writeSingle(*descriptorSet,
2507 DescriptorSetUpdateBuilder::Location::binding(i),
2508 inputBuffers[i]->getType(), &info);
2509 }
2510 else
2511 {
2512 VkDescriptorBufferInfo info =
2513 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
2514 0ull, inputBuffers[i]->getAsBuffer()->getSize());
2515
2516 updateBuilder.writeSingle(*descriptorSet,
2517 DescriptorSetUpdateBuilder::Location::binding(i),
2518 inputBuffers[i]->getType(), &info);
2519 }
2520 }
2521
2522 if (extraDatasCount > 0)
2523 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2524
2525 const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2526
2527 const deUint32 subgroupSize = getSubgroupSize(context);
2528
2529 const Unique<VkCommandBuffer> cmdBuffer(
2530 makeCommandBuffer(context, *cmdPool));
2531
2532 unsigned totalIterations = 0;
2533 unsigned failedIterations = 0;
2534
2535 for (deUint32 width = 8; width <= subgroupSize; width *= 2)
2536 {
2537 for (deUint32 height = 8; height <= subgroupSize; height *= 2)
2538 {
2539 totalIterations++;
2540
2541 // re-init the data
2542 for (deUint32 i = 0; i < extraDatasCount; i++)
2543 {
2544 const Allocation& alloc = inputBuffers[i]->getAllocation();
2545 initializeMemory(context, alloc, extraDatas[i]);
2546 }
2547
2548 VkDeviceSize formatSize = getFormatSizeInBytes(format);
2549 const VkDeviceSize resultImageSizeInBytes =
2550 width * height * formatSize;
2551
2552 Image resultImage(context, width, height, format,
2553 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
2554 VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
2555
2556 Buffer resultBuffer(context, resultImageSizeInBytes,
2557 VK_IMAGE_USAGE_TRANSFER_DST_BIT);
2558
2559 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
2560 *renderPass, resultImage.getImageView(), width, height));
2561
2562 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2563
2564 VkViewport viewport = makeViewport(width, height);
2565
2566 context.getDeviceInterface().cmdSetViewport(
2567 *cmdBuffer, 0, 1, &viewport);
2568
2569 VkRect2D scissor = {{0, 0}, {width, height}};
2570
2571 context.getDeviceInterface().cmdSetScissor(
2572 *cmdBuffer, 0, 1, &scissor);
2573
2574 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
2575
2576 context.getDeviceInterface().cmdBindPipeline(
2577 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
2578
2579 if (extraDatasCount > 0)
2580 {
2581 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2582 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
2583 &descriptorSet.get(), 0u, DE_NULL);
2584 }
2585
2586 context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
2587
2588 endRenderPass(context.getDeviceInterface(), *cmdBuffer);
2589
2590 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
2591
2592 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2593
2594 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2595
2596 waitFence(context, fence);
2597
2598 std::vector<const void*> datas;
2599 {
2600 const Allocation& resultAlloc = resultBuffer.getAllocation();
2601 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2602
2603 // we always have our result data first
2604 datas.push_back(resultAlloc.getHostPtr());
2605 }
2606
2607 if (!checkResult(datas, width, height, subgroupSize))
2608 {
2609 failedIterations++;
2610 }
2611
2612 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2613 }
2614 }
2615
2616 if (0 < failedIterations)
2617 {
2618 context.getTestContext().getLog()
2619 << TestLog::Message << (totalIterations - failedIterations) << " / "
2620 << totalIterations << " values passed" << TestLog::EndMessage;
2621 return tcu::TestStatus::fail("Failed!");
2622 }
2623
2624 return tcu::TestStatus::pass("OK");
2625 }
2626
makeComputeTest(Context & context,VkFormat format,SSBOData * inputs,deUint32 inputsCount,bool (* checkResult)(std::vector<const void * > datas,const deUint32 numWorkgroups[3],const deUint32 localSize[3],deUint32 subgroupSize))2627 tcu::TestStatus vkt::subgroups::makeComputeTest(
2628 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
2629 bool (*checkResult)(std::vector<const void*> datas,
2630 const deUint32 numWorkgroups[3], const deUint32 localSize[3],
2631 deUint32 subgroupSize))
2632 {
2633 VkDeviceSize elementSize = getFormatSizeInBytes(format);
2634
2635 const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
2636 maxSupportedSubgroupSize() *
2637 maxSupportedSubgroupSize();
2638 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
2639
2640 Buffer resultBuffer(
2641 context, resultBufferSizeInBytes);
2642
2643 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
2644
2645 for (deUint32 i = 0; i < inputsCount; i++)
2646 {
2647 if (inputs[i].isImage)
2648 {
2649 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
2650 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
2651 }
2652 else
2653 {
2654 vk::VkDeviceSize size =
2655 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
2656 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
2657 }
2658
2659 const Allocation& alloc = inputBuffers[i]->getAllocation();
2660 initializeMemory(context, alloc, inputs[i]);
2661 }
2662
2663 DescriptorSetLayoutBuilder layoutBuilder;
2664 layoutBuilder.addBinding(
2665 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2666
2667 for (deUint32 i = 0; i < inputsCount; i++)
2668 {
2669 layoutBuilder.addBinding(
2670 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
2671 }
2672
2673 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
2674 layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
2675
2676 const Unique<VkShaderModule> shaderModule(
2677 createShaderModule(context.getDeviceInterface(), context.getDevice(),
2678 context.getBinaryCollection().get("comp"), 0u));
2679 const Unique<VkPipelineLayout> pipelineLayout(
2680 makePipelineLayout(context, *descriptorSetLayout));
2681
2682 DescriptorPoolBuilder poolBuilder;
2683
2684 poolBuilder.addType(resultBuffer.getType());
2685
2686 for (deUint32 i = 0; i < inputsCount; i++)
2687 {
2688 poolBuilder.addType(inputBuffers[i]->getType());
2689 }
2690
2691 const Unique<VkDescriptorPool> descriptorPool(
2692 poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
2693 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
2694
2695 // Create descriptor set
2696 const Unique<VkDescriptorSet> descriptorSet(
2697 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
2698
2699 DescriptorSetUpdateBuilder updateBuilder;
2700
2701 const VkDescriptorBufferInfo resultDescriptorInfo =
2702 makeDescriptorBufferInfo(
2703 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
2704
2705 updateBuilder.writeSingle(*descriptorSet,
2706 DescriptorSetUpdateBuilder::Location::binding(0u),
2707 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
2708
2709 for (deUint32 i = 0; i < inputsCount; i++)
2710 {
2711 if (inputBuffers[i]->isImage())
2712 {
2713 VkDescriptorImageInfo info =
2714 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
2715 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
2716
2717 updateBuilder.writeSingle(*descriptorSet,
2718 DescriptorSetUpdateBuilder::Location::binding(i + 1),
2719 inputBuffers[i]->getType(), &info);
2720 }
2721 else
2722 {
2723 vk::VkDeviceSize size =
2724 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
2725 VkDescriptorBufferInfo info =
2726 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
2727
2728 updateBuilder.writeSingle(*descriptorSet,
2729 DescriptorSetUpdateBuilder::Location::binding(i + 1),
2730 inputBuffers[i]->getType(), &info);
2731 }
2732 }
2733
2734 updateBuilder.update(context.getDeviceInterface(), context.getDevice());
2735
2736 const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
2737
2738 unsigned totalIterations = 0;
2739 unsigned failedIterations = 0;
2740
2741 const deUint32 subgroupSize = getSubgroupSize(context);
2742
2743 const Unique<VkCommandBuffer> cmdBuffer(
2744 makeCommandBuffer(context, *cmdPool));
2745
2746 const deUint32 numWorkgroups[3] = {4, 2, 2};
2747
2748 const deUint32 localSizesToTestCount = 15;
2749 deUint32 localSizesToTest[localSizesToTestCount][3] =
2750 {
2751 {1, 1, 1},
2752 {32, 4, 1},
2753 {32, 1, 4},
2754 {1, 32, 4},
2755 {1, 4, 32},
2756 {4, 1, 32},
2757 {4, 32, 1},
2758 {subgroupSize, 1, 1},
2759 {1, subgroupSize, 1},
2760 {1, 1, subgroupSize},
2761 {3, 5, 7},
2762 {128, 1, 1},
2763 {1, 128, 1},
2764 {1, 1, 64},
2765 {1, 1, 1} // Isn't used, just here to make double buffering checks easier
2766 };
2767
2768 Move<VkPipeline> lastPipeline(
2769 makeComputePipeline(context, *pipelineLayout, *shaderModule,
2770 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
2771
2772 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
2773 {
2774 const deUint32 nextX = localSizesToTest[index + 1][0];
2775 const deUint32 nextY = localSizesToTest[index + 1][1];
2776 const deUint32 nextZ = localSizesToTest[index + 1][2];
2777
2778 // we are running one test
2779 totalIterations++;
2780
2781 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2782
2783 context.getDeviceInterface().cmdBindPipeline(
2784 *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
2785
2786 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
2787 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
2788 &descriptorSet.get(), 0u, DE_NULL);
2789
2790 context.getDeviceInterface().cmdDispatch(*cmdBuffer,
2791 numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
2792
2793 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
2794
2795 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
2796
2797 Move<VkPipeline> nextPipeline(
2798 makeComputePipeline(context, *pipelineLayout, *shaderModule,
2799 nextX, nextY, nextZ));
2800
2801 waitFence(context, fence);
2802
2803 std::vector<const void*> datas;
2804
2805 {
2806 const Allocation& resultAlloc = resultBuffer.getAllocation();
2807 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2808
2809 // we always have our result data first
2810 datas.push_back(resultAlloc.getHostPtr());
2811 }
2812
2813 for (deUint32 i = 0; i < inputsCount; i++)
2814 {
2815 if (!inputBuffers[i]->isImage())
2816 {
2817 const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
2818 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
2819
2820 // we always have our result data first
2821 datas.push_back(resultAlloc.getHostPtr());
2822 }
2823 }
2824
2825 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
2826 {
2827 failedIterations++;
2828 }
2829
2830 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
2831
2832 lastPipeline = nextPipeline;
2833 }
2834
2835 if (0 < failedIterations)
2836 {
2837 context.getTestContext().getLog()
2838 << TestLog::Message << (totalIterations - failedIterations) << " / "
2839 << totalIterations << " values passed" << TestLog::EndMessage;
2840 return tcu::TestStatus::fail("Failed!");
2841 }
2842
2843 return tcu::TestStatus::pass("OK");
2844 }
2845