1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Sparse buffer tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTestLog.hpp"
45
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49
50 #include <string>
51 #include <vector>
52 #include <map>
53
54 using namespace vk;
55 using de::MovePtr;
56 using de::UniquePtr;
57 using de::SharedPtr;
58 using tcu::Vec4;
59 using tcu::IVec2;
60 using tcu::IVec4;
61
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68
69 typedef SharedPtr<UniquePtr<Allocation> > AllocationSp;
70
71 enum
72 {
73 RENDER_SIZE = 128, //!< framebuffer size in pixels
74 GRID_SIZE = RENDER_SIZE / 8, //!< number of grid tiles in a row
75 };
76
77 enum TestFlagBits
78 {
79 // sparseBinding is implied
80 TEST_FLAG_ALIASED = 1u << 0, //!< sparseResidencyAliased
81 TEST_FLAG_RESIDENCY = 1u << 1, //!< sparseResidencyBuffer
82 TEST_FLAG_NON_RESIDENT_STRICT = 1u << 2, //!< residencyNonResidentStrict
83 TEST_FLAG_ENABLE_DEVICE_GROUPS = 1u << 3, //!< device groups are enabled
84 };
85 typedef deUint32 TestFlags;
86
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90 deUint32 numResourceChunks;
91 VkDeviceSize resourceSize; //!< buffer size in bytes
92 std::vector<AllocationSp> allocations; //!< actual allocated memory
93 std::vector<VkSparseMemoryBind> memoryBinds; //!< memory binds backing the resource
94 deUint32 memoryType; //!< memory type (same for all allocations)
95 deUint32 heapIndex; //!< memory heap index
96 };
97
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103 SparseAllocationBuilder (void);
104
105 // \note "chunk" is the smallest (due to alignment) bindable amount of memory
106
107 SparseAllocationBuilder& addMemoryHole (const deUint32 numChunks = 1u);
108 SparseAllocationBuilder& addResourceHole (const deUint32 numChunks = 1u);
109 SparseAllocationBuilder& addMemoryBind (const deUint32 numChunks = 1u);
110 SparseAllocationBuilder& addAliasedMemoryBind (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks = 1u);
111 SparseAllocationBuilder& addMemoryAllocation (void);
112
113 MovePtr<SparseAllocation> build (const InstanceInterface& instanceInterface,
114 const VkPhysicalDevice physicalDevice,
115 const DeviceInterface& vk,
116 const VkDevice device,
117 Allocator& allocator,
118 VkBufferCreateInfo referenceCreateInfo, //!< buffer size is ignored in this info
119 const VkDeviceSize minChunkSize = 0ull) const; //!< make sure chunks are at least this big
120
121 private:
122 struct MemoryBind
123 {
124 deUint32 allocationNdx;
125 deUint32 resourceChunkNdx;
126 deUint32 memoryChunkNdx;
127 deUint32 numChunks;
128 };
129
130 deUint32 m_allocationNdx;
131 deUint32 m_resourceChunkNdx;
132 deUint32 m_memoryChunkNdx;
133 std::vector<MemoryBind> m_memoryBinds;
134 std::vector<deUint32> m_chunksPerAllocation;
135
136 };
137
SparseAllocationBuilder(void)138 SparseAllocationBuilder::SparseAllocationBuilder (void)
139 : m_allocationNdx (0)
140 , m_resourceChunkNdx (0)
141 , m_memoryChunkNdx (0)
142 {
143 m_chunksPerAllocation.push_back(0);
144 }
145
addMemoryHole(const deUint32 numChunks)146 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryHole (const deUint32 numChunks)
147 {
148 m_memoryChunkNdx += numChunks;
149 m_chunksPerAllocation[m_allocationNdx] += numChunks;
150
151 return *this;
152 }
153
addResourceHole(const deUint32 numChunks)154 SparseAllocationBuilder& SparseAllocationBuilder::addResourceHole (const deUint32 numChunks)
155 {
156 m_resourceChunkNdx += numChunks;
157
158 return *this;
159 }
160
addMemoryAllocation(void)161 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryAllocation (void)
162 {
163 DE_ASSERT(m_memoryChunkNdx != 0); // doesn't make sense to have an empty allocation
164
165 m_allocationNdx += 1;
166 m_memoryChunkNdx = 0;
167 m_chunksPerAllocation.push_back(0);
168
169 return *this;
170 }
171
addMemoryBind(const deUint32 numChunks)172 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryBind (const deUint32 numChunks)
173 {
174 const MemoryBind memoryBind =
175 {
176 m_allocationNdx,
177 m_resourceChunkNdx,
178 m_memoryChunkNdx,
179 numChunks
180 };
181 m_memoryBinds.push_back(memoryBind);
182
183 m_resourceChunkNdx += numChunks;
184 m_memoryChunkNdx += numChunks;
185 m_chunksPerAllocation[m_allocationNdx] += numChunks;
186
187 return *this;
188 }
189
addAliasedMemoryBind(const deUint32 allocationNdx,const deUint32 chunkOffset,const deUint32 numChunks)190 SparseAllocationBuilder& SparseAllocationBuilder::addAliasedMemoryBind (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks)
191 {
192 DE_ASSERT(allocationNdx <= m_allocationNdx);
193
194 const MemoryBind memoryBind =
195 {
196 allocationNdx,
197 m_resourceChunkNdx,
198 chunkOffset,
199 numChunks
200 };
201 m_memoryBinds.push_back(memoryBind);
202
203 m_resourceChunkNdx += numChunks;
204
205 return *this;
206 }
207
build(const InstanceInterface & instanceInterface,const VkPhysicalDevice physicalDevice,const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const208 MovePtr<SparseAllocation> SparseAllocationBuilder::build (const InstanceInterface& instanceInterface,
209 const VkPhysicalDevice physicalDevice,
210 const DeviceInterface& vk,
211 const VkDevice device,
212 Allocator& allocator,
213 VkBufferCreateInfo referenceCreateInfo,
214 const VkDeviceSize minChunkSize) const
215 {
216
217 MovePtr<SparseAllocation> sparseAllocation (new SparseAllocation());
218
219 referenceCreateInfo.size = sizeof(deUint32);
220 const Unique<VkBuffer> refBuffer (createBuffer(vk, device, &referenceCreateInfo));
221 const VkMemoryRequirements memoryRequirements = getBufferMemoryRequirements(vk, device, *refBuffer);
222 const VkDeviceSize chunkSize = std::max(memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
223 const deUint32 memoryTypeNdx = findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
224 VkMemoryAllocateInfo allocInfo =
225 {
226 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
227 DE_NULL, // const void* pNext;
228 memoryRequirements.size, // VkDeviceSize allocationSize;
229 memoryTypeNdx, // deUint32 memoryTypeIndex;
230 };
231
232 for (std::vector<deUint32>::const_iterator numChunksIter = m_chunksPerAllocation.begin(); numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
233 {
234 allocInfo.allocationSize = *numChunksIter * chunkSize;
235 sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
236 }
237
238 for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin(); memBindIter != m_memoryBinds.end(); ++memBindIter)
239 {
240 const Allocation& alloc = **sparseAllocation->allocations[memBindIter->allocationNdx];
241 const VkSparseMemoryBind bind =
242 {
243 memBindIter->resourceChunkNdx * chunkSize, // VkDeviceSize resourceOffset;
244 memBindIter->numChunks * chunkSize, // VkDeviceSize size;
245 alloc.getMemory(), // VkDeviceMemory memory;
246 alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize, // VkDeviceSize memoryOffset;
247 (VkSparseMemoryBindFlags)0, // VkSparseMemoryBindFlags flags;
248 };
249 sparseAllocation->memoryBinds.push_back(bind);
250 referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
251 }
252
253 sparseAllocation->resourceSize = referenceCreateInfo.size;
254 sparseAllocation->numResourceChunks = m_resourceChunkNdx;
255 sparseAllocation->memoryType = memoryTypeNdx;
256 sparseAllocation->heapIndex = getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
257
258 return sparseAllocation;
259 }
260
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)261 VkImageCreateInfo makeImageCreateInfo (const VkFormat format, const IVec2& size, const VkImageUsageFlags usage)
262 {
263 const VkImageCreateInfo imageParams =
264 {
265 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
266 DE_NULL, // const void* pNext;
267 (VkImageCreateFlags)0, // VkImageCreateFlags flags;
268 VK_IMAGE_TYPE_2D, // VkImageType imageType;
269 format, // VkFormat format;
270 makeExtent3D(size.x(), size.y(), 1), // VkExtent3D extent;
271 1u, // deUint32 mipLevels;
272 1u, // deUint32 arrayLayers;
273 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
274 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
275 usage, // VkImageUsageFlags usage;
276 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
277 0u, // deUint32 queueFamilyIndexCount;
278 DE_NULL, // const deUint32* pQueueFamilyIndices;
279 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
280 };
281 return imageParams;
282 }
283
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const deUint32 stageCount,const VkPipelineShaderStageCreateInfo * pStages)284 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
285 const VkDevice device,
286 const VkPipelineLayout pipelineLayout,
287 const VkRenderPass renderPass,
288 const IVec2 renderSize,
289 const VkPrimitiveTopology topology,
290 const deUint32 stageCount,
291 const VkPipelineShaderStageCreateInfo* pStages)
292 {
293 const VkVertexInputBindingDescription vertexInputBindingDescription =
294 {
295 0u, // uint32_t binding;
296 sizeof(Vec4), // uint32_t stride;
297 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
298 };
299
300 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
301 {
302 0u, // uint32_t location;
303 0u, // uint32_t binding;
304 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
305 0u, // uint32_t offset;
306 };
307
308 const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
309 {
310 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
311 DE_NULL, // const void* pNext;
312 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
313 1u, // uint32_t vertexBindingDescriptionCount;
314 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
315 1u, // uint32_t vertexAttributeDescriptionCount;
316 &vertexInputAttributeDescription, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
317 };
318
319 const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
320 {
321 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
322 DE_NULL, // const void* pNext;
323 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
324 topology, // VkPrimitiveTopology topology;
325 VK_FALSE, // VkBool32 primitiveRestartEnable;
326 };
327
328 const VkViewport viewport = makeViewport(renderSize);
329 const VkRect2D scissor = makeRect2D(renderSize);
330
331 const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
332 {
333 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
334 DE_NULL, // const void* pNext;
335 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags;
336 1u, // uint32_t viewportCount;
337 &viewport, // const VkViewport* pViewports;
338 1u, // uint32_t scissorCount;
339 &scissor, // const VkRect2D* pScissors;
340 };
341
342 const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
343 {
344 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
345 DE_NULL, // const void* pNext;
346 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
347 VK_FALSE, // VkBool32 depthClampEnable;
348 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
349 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
350 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
351 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
352 VK_FALSE, // VkBool32 depthBiasEnable;
353 0.0f, // float depthBiasConstantFactor;
354 0.0f, // float depthBiasClamp;
355 0.0f, // float depthBiasSlopeFactor;
356 1.0f, // float lineWidth;
357 };
358
359 const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
360 {
361 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
362 DE_NULL, // const void* pNext;
363 (VkPipelineMultisampleStateCreateFlags)0, // VkPipelineMultisampleStateCreateFlags flags;
364 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
365 VK_FALSE, // VkBool32 sampleShadingEnable;
366 0.0f, // float minSampleShading;
367 DE_NULL, // const VkSampleMask* pSampleMask;
368 VK_FALSE, // VkBool32 alphaToCoverageEnable;
369 VK_FALSE // VkBool32 alphaToOneEnable;
370 };
371
372 const VkStencilOpState stencilOpState = makeStencilOpState(
373 VK_STENCIL_OP_KEEP, // stencil fail
374 VK_STENCIL_OP_KEEP, // depth & stencil pass
375 VK_STENCIL_OP_KEEP, // depth only fail
376 VK_COMPARE_OP_ALWAYS, // compare op
377 0u, // compare mask
378 0u, // write mask
379 0u); // reference
380
381 VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
382 {
383 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
384 DE_NULL, // const void* pNext;
385 (VkPipelineDepthStencilStateCreateFlags)0, // VkPipelineDepthStencilStateCreateFlags flags;
386 VK_FALSE, // VkBool32 depthTestEnable;
387 VK_FALSE, // VkBool32 depthWriteEnable;
388 VK_COMPARE_OP_LESS, // VkCompareOp depthCompareOp;
389 VK_FALSE, // VkBool32 depthBoundsTestEnable;
390 VK_FALSE, // VkBool32 stencilTestEnable;
391 stencilOpState, // VkStencilOpState front;
392 stencilOpState, // VkStencilOpState back;
393 0.0f, // float minDepthBounds;
394 1.0f, // float maxDepthBounds;
395 };
396
397 const VkColorComponentFlags colorComponentsAll = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
398 const VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState =
399 {
400 VK_FALSE, // VkBool32 blendEnable;
401 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
402 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
403 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
404 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
405 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
406 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
407 colorComponentsAll, // VkColorComponentFlags colorWriteMask;
408 };
409
410 const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
411 {
412 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
413 DE_NULL, // const void* pNext;
414 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
415 VK_FALSE, // VkBool32 logicOpEnable;
416 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
417 1u, // deUint32 attachmentCount;
418 &pipelineColorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
419 { 0.0f, 0.0f, 0.0f, 0.0f }, // float blendConstants[4];
420 };
421
422 const VkGraphicsPipelineCreateInfo graphicsPipelineInfo =
423 {
424 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
425 DE_NULL, // const void* pNext;
426 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
427 stageCount, // deUint32 stageCount;
428 pStages, // const VkPipelineShaderStageCreateInfo* pStages;
429 &vertexInputStateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
430 &pipelineInputAssemblyStateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
431 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
432 &pipelineViewportStateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
433 &pipelineRasterizationStateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
434 &pipelineMultisampleStateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
435 &pipelineDepthStencilStateInfo, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
436 &pipelineColorBlendStateInfo, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
437 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
438 pipelineLayout, // VkPipelineLayout layout;
439 renderPass, // VkRenderPass renderPass;
440 0u, // deUint32 subpass;
441 DE_NULL, // VkPipeline basePipelineHandle;
442 0, // deInt32 basePipelineIndex;
443 };
444
445 return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
446 }
447
448 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)449 bool imageHasErrorPixels (const tcu::ConstPixelBufferAccess image)
450 {
451 const Vec4 errorColor = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
452 const Vec4 blankColor = Vec4();
453
454 for (int y = 0; y < image.getHeight(); ++y)
455 for (int x = 0; x < image.getWidth(); ++x)
456 {
457 const Vec4 color = image.getPixel(x, y);
458 if (color == errorColor || color == blankColor)
459 return true;
460 }
461
462 return false;
463 }
464
465 class Renderer
466 {
467 public:
468 typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo*> SpecializationMap;
469
470 //! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
471 struct Delegate
472 {
~Delegatevkt::sparse::__anonf4c2cf810111::Renderer::Delegate473 virtual ~Delegate (void) {}
474 virtual void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
475 };
476
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())477 Renderer (const DeviceInterface& vk,
478 const VkDevice device,
479 Allocator& allocator,
480 const deUint32 queueFamilyIndex,
481 const VkDescriptorSetLayout descriptorSetLayout, //!< may be NULL, if no descriptors are used
482 BinaryCollection& binaryCollection,
483 const std::string& vertexName,
484 const std::string& fragmentName,
485 const VkBuffer colorBuffer,
486 const IVec2& renderSize,
487 const VkFormat colorFormat,
488 const Vec4& clearColor,
489 const VkPrimitiveTopology topology,
490 SpecializationMap specMap = SpecializationMap())
491 : m_colorBuffer (colorBuffer)
492 , m_renderSize (renderSize)
493 , m_colorFormat (colorFormat)
494 , m_colorSubresourceRange (makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
495 , m_clearColor (clearColor)
496 , m_topology (topology)
497 , m_descriptorSetLayout (descriptorSetLayout)
498 {
499 m_colorImage = makeImage (vk, device, makeImageCreateInfo(m_colorFormat, m_renderSize, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
500 m_colorImageAlloc = bindImage (vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
501 m_colorAttachment = makeImageView (vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
502
503 m_vertexModule = createShaderModule (vk, device, binaryCollection.get(vertexName), 0u);
504 m_fragmentModule = createShaderModule (vk, device, binaryCollection.get(fragmentName), 0u);
505
506 const VkPipelineShaderStageCreateInfo pShaderStages[] =
507 {
508 {
509 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
510 DE_NULL, // const void* pNext;
511 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
512 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
513 *m_vertexModule, // VkShaderModule module;
514 "main", // const char* pName;
515 specMap[VK_SHADER_STAGE_VERTEX_BIT], // const VkSpecializationInfo* pSpecializationInfo;
516 },
517 {
518 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
519 DE_NULL, // const void* pNext;
520 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
521 VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
522 *m_fragmentModule, // VkShaderModule module;
523 "main", // const char* pName;
524 specMap[VK_SHADER_STAGE_FRAGMENT_BIT], // const VkSpecializationInfo* pSpecializationInfo;
525 }
526 };
527
528 m_renderPass = makeRenderPass (vk, device, m_colorFormat);
529 m_framebuffer = makeFramebuffer (vk, device, *m_renderPass, m_colorAttachment.get(),
530 static_cast<deUint32>(m_renderSize.x()), static_cast<deUint32>(m_renderSize.y()));
531 m_pipelineLayout = makePipelineLayout (vk, device, m_descriptorSetLayout);
532 m_pipeline = makeGraphicsPipeline (vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology, DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
533 m_cmdPool = makeCommandPool (vk, device, queueFamilyIndex);
534 m_cmdBuffer = allocateCommandBuffer (vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
535 }
536
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const deUint32 deviceID) const537 void draw (const DeviceInterface& vk,
538 const VkDevice device,
539 const VkQueue queue,
540 const Delegate& drawDelegate,
541 const bool useDeviceGroups,
542 const deUint32 deviceID) const
543 {
544 beginCommandBuffer(vk, *m_cmdBuffer);
545
546 beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
547
548 vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
549 drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
550
551 endRenderPass(vk, *m_cmdBuffer);
552
553 copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
554
555 endCommandBuffer(vk, *m_cmdBuffer);
556 submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups, deviceID);
557 }
558
559 private:
560 const VkBuffer m_colorBuffer;
561 const IVec2 m_renderSize;
562 const VkFormat m_colorFormat;
563 const VkImageSubresourceRange m_colorSubresourceRange;
564 const Vec4 m_clearColor;
565 const VkPrimitiveTopology m_topology;
566 const VkDescriptorSetLayout m_descriptorSetLayout;
567
568 Move<VkImage> m_colorImage;
569 MovePtr<Allocation> m_colorImageAlloc;
570 Move<VkImageView> m_colorAttachment;
571 Move<VkShaderModule> m_vertexModule;
572 Move<VkShaderModule> m_fragmentModule;
573 Move<VkRenderPass> m_renderPass;
574 Move<VkFramebuffer> m_framebuffer;
575 Move<VkPipelineLayout> m_pipelineLayout;
576 Move<VkPipeline> m_pipeline;
577 Move<VkCommandPool> m_cmdPool;
578 Move<VkCommandBuffer> m_cmdBuffer;
579
580 // "deleted"
581 Renderer (const Renderer&);
582 Renderer& operator= (const Renderer&);
583 };
584
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,deUint32 resourceDevId,deUint32 memoryDeviceId)585 void bindSparseBuffer (const DeviceInterface& vk, const VkDevice device, const VkQueue sparseQueue, const VkBuffer buffer, const SparseAllocation& sparseAllocation,
586 const bool useDeviceGroups, deUint32 resourceDevId, deUint32 memoryDeviceId)
587 {
588 const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo =
589 {
590 buffer, // VkBuffer buffer;
591 static_cast<deUint32>(sparseAllocation.memoryBinds.size()), // uint32_t bindCount;
592 &sparseAllocation.memoryBinds[0], // const VkSparseMemoryBind* pBinds;
593 };
594
595 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
596 {
597 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR, //VkStructureType sType;
598 DE_NULL, //const void* pNext;
599 resourceDevId, //deUint32 resourceDeviceIndex;
600 memoryDeviceId, //deUint32 memoryDeviceIndex;
601 };
602
603 const VkBindSparseInfo bindInfo =
604 {
605 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, // VkStructureType sType;
606 useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, // const void* pNext;
607 0u, // uint32_t waitSemaphoreCount;
608 DE_NULL, // const VkSemaphore* pWaitSemaphores;
609 1u, // uint32_t bufferBindCount;
610 &sparseBufferMemoryBindInfo, // const VkSparseBufferMemoryBindInfo* pBufferBinds;
611 0u, // uint32_t imageOpaqueBindCount;
612 DE_NULL, // const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
613 0u, // uint32_t imageBindCount;
614 DE_NULL, // const VkSparseImageMemoryBindInfo* pImageBinds;
615 0u, // uint32_t signalSemaphoreCount;
616 DE_NULL, // const VkSemaphore* pSignalSemaphores;
617 };
618
619 const Unique<VkFence> fence(createFence(vk, device));
620
621 VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
622 VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
623 }
624
625 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
626 {
627 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)628 SparseBufferTestInstance (Context& context, const TestFlags flags)
629 : SparseResourcesBaseInstance (context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
630 , m_aliased ((flags & TEST_FLAG_ALIASED) != 0)
631 , m_residency ((flags & TEST_FLAG_RESIDENCY) != 0)
632 , m_nonResidentStrict ((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
633 , m_renderSize (RENDER_SIZE, RENDER_SIZE)
634 , m_colorFormat (VK_FORMAT_R8G8B8A8_UNORM)
635 , m_colorBufferSize (m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
636 {
637 {
638 QueueRequirementsVec requirements;
639 requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
640 requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
641
642 createDeviceSupportingQueues(requirements);
643 }
644
645 const DeviceInterface& vk = getDeviceInterface();
646
647 m_sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
648 m_universalQueue = getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
649
650 m_sharedQueueFamilyIndices[0] = m_sparseQueue.queueFamilyIndex;
651 m_sharedQueueFamilyIndices[1] = m_universalQueue.queueFamilyIndex;
652
653 m_colorBuffer = makeBuffer(vk, getDevice(), m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
654 m_colorBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
655
656 deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
657 flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
658 }
659
660 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const661 VkBufferCreateInfo getSparseBufferCreateInfo (const VkBufferUsageFlags usage) const
662 {
663 VkBufferCreateFlags flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
664 if (m_residency)
665 flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
666 if (m_aliased)
667 flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
668
669 VkBufferCreateInfo referenceBufferCreateInfo =
670 {
671 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
672 DE_NULL, // const void* pNext;
673 flags, // VkBufferCreateFlags flags;
674 0u, // override later // VkDeviceSize size;
675 VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage, // VkBufferUsageFlags usage;
676 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
677 0u, // uint32_t queueFamilyIndexCount;
678 DE_NULL, // const uint32_t* pQueueFamilyIndices;
679 };
680
681 if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
682 {
683 referenceBufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
684 referenceBufferCreateInfo.queueFamilyIndexCount = DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
685 referenceBufferCreateInfo.pQueueFamilyIndices = m_sharedQueueFamilyIndices;
686 }
687
688 return referenceBufferCreateInfo;
689 }
690
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,deUint32 deviceID=0)691 void draw (const VkPrimitiveTopology topology,
692 const VkDescriptorSetLayout descriptorSetLayout = DE_NULL,
693 Renderer::SpecializationMap specMap = Renderer::SpecializationMap(),
694 bool useDeviceGroups = false,
695 deUint32 deviceID = 0)
696 {
697 const UniquePtr<Renderer> renderer(new Renderer(
698 getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex, descriptorSetLayout,
699 m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer, m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
700
701 renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups, deviceID);
702 }
703
isResultImageCorrect(void) const704 bool isResultImageCorrect (void) const
705 {
706 invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
707
708 const tcu::ConstPixelBufferAccess resultImage (mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(), 1u, m_colorBufferAlloc->getHostPtr());
709
710 m_context.getTestContext().getLog()
711 << tcu::LogImageSet("Result", "Result") << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
712
713 return !imageHasErrorPixels(resultImage);
714 }
715
716 const bool m_aliased;
717 const bool m_residency;
718 const bool m_nonResidentStrict;
719
720 Queue m_sparseQueue;
721 Queue m_universalQueue;
722
723 private:
724 const IVec2 m_renderSize;
725 const VkFormat m_colorFormat;
726 const VkDeviceSize m_colorBufferSize;
727
728 Move<VkBuffer> m_colorBuffer;
729 MovePtr<Allocation> m_colorBufferAlloc;
730
731 deUint32 m_sharedQueueFamilyIndices[2];
732 };
733
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)734 void initProgramsDrawWithUBO (vk::SourceCollections& programCollection, const TestFlags flags)
735 {
736 // Vertex shader
737 {
738 std::ostringstream src;
739 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
740 << "\n"
741 << "layout(location = 0) in vec4 in_position;\n"
742 << "\n"
743 << "out gl_PerVertex {\n"
744 << " vec4 gl_Position;\n"
745 << "};\n"
746 << "\n"
747 << "void main(void)\n"
748 << "{\n"
749 << " gl_Position = in_position;\n"
750 << "}\n";
751
752 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
753 }
754
755 // Fragment shader
756 {
757 const bool aliased = (flags & TEST_FLAG_ALIASED) != 0;
758 const bool residency = (flags & TEST_FLAG_RESIDENCY) != 0;
759 const bool nonResidentStrict = (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
760 const std::string valueExpr = (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
761
762 std::ostringstream src;
763 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
764 << "\n"
765 << "layout(location = 0) out vec4 o_color;\n"
766 << "\n"
767 << "layout(constant_id = 1) const int dataSize = 1;\n"
768 << "layout(constant_id = 2) const int chunkSize = 1;\n"
769 << "\n"
770 << "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
771 << " ivec4 data[dataSize];\n"
772 << "} ubo;\n"
773 << "\n"
774 << "void main(void)\n"
775 << "{\n"
776 << " const int fragNdx = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
777 << " const int pageSize = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
778 << " const int numChunks = dataSize / chunkSize;\n";
779
780 if (aliased)
781 src << " const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
782
783 src << " bool ok = true;\n"
784 << "\n"
785 << " for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
786 << " {\n";
787
788 if (residency && nonResidentStrict)
789 {
790 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
791 << " ok = ok && (ubo.data[ndx] == ivec4(0));\n"
792 << " else\n"
793 << " ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
794 }
795 else if (residency)
796 {
797 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
798 << " continue;\n"
799 << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
800 }
801 else
802 src << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
803
804 src << " }\n"
805 << "\n"
806 << " if (ok)\n"
807 << " o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
808 << " else\n"
809 << " o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
810 << "}\n";
811
812 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
813 }
814 }
815
816 //! Sparse buffer backing a UBO
817 class UBOTestInstance : public SparseBufferTestInstance
818 {
819 public:
UBOTestInstance(Context & context,const TestFlags flags)820 UBOTestInstance (Context& context, const TestFlags flags)
821 : SparseBufferTestInstance (context, flags)
822 {
823 }
824
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const825 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
826 {
827 const DeviceInterface& vk = getDeviceInterface();
828 const VkDeviceSize vertexOffset = 0ull;
829
830 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
831 vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
832 vk.cmdDraw (cmdBuffer, 4u, 1u, 0u, 0u);
833 }
834
iterate(void)835 tcu::TestStatus iterate (void)
836 {
837 const InstanceInterface& instance = m_context.getInstanceInterface();
838 const DeviceInterface& vk = getDeviceInterface();
839 MovePtr<SparseAllocation> sparseAllocation;
840 Move<VkBuffer> sparseBuffer;
841 Move<VkBuffer> sparseBufferAliased;
842 bool setupDescriptors = true;
843
844 // Go through all physical devices
845 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
846 {
847 const deUint32 firstDeviceID = physDevID;
848 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
849
850 // Set up the sparse buffer
851 {
852 VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
853 const VkDeviceSize minChunkSize = 512u; // make sure the smallest allocation is at least this big
854 deUint32 numMaxChunks = 0u;
855
856 // Check how many chunks we can allocate given the alignment and size requirements of UBOs
857 {
858 const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder()
859 .addMemoryBind()
860 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize));
861
862 numMaxChunks = deMaxu32(static_cast<deUint32>(m_context.getDeviceProperties().limits.maxUniformBufferRange / minAllocation->resourceSize), 1u);
863 }
864
865 if (numMaxChunks < 4)
866 {
867 sparseAllocation = SparseAllocationBuilder()
868 .addMemoryBind()
869 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
870 }
871 else
872 {
873 // Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
874 SparseAllocationBuilder builder;
875 builder.addMemoryBind();
876
877 if (m_residency)
878 builder.addResourceHole();
879
880 builder
881 .addMemoryAllocation()
882 .addMemoryHole()
883 .addMemoryBind();
884
885 if (m_aliased)
886 builder.addAliasedMemoryBind(0u, 0u);
887
888 sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
889 DE_ASSERT(sparseAllocation->resourceSize <= m_context.getDeviceProperties().limits.maxUniformBufferRange);
890 }
891
892 if (firstDeviceID != secondDeviceID)
893 {
894 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
895 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
896
897 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
898 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
899 {
900 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
901 }
902 }
903
904 // Create the buffer
905 referenceBufferCreateInfo.size = sparseAllocation->resourceSize;
906 sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
907 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
908
909 if (m_aliased)
910 {
911 sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
912 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
913 }
914 }
915
916 // Set uniform data
917 {
918 const bool hasAliasedChunk = (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
919 const VkDeviceSize chunkSize = sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
920 const VkDeviceSize stagingBufferSize = sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
921 const deUint32 numBufferEntries = static_cast<deUint32>(stagingBufferSize / sizeof(IVec4));
922
923 const Unique<VkBuffer> stagingBuffer (makeBuffer(vk, getDevice(), stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
924 const UniquePtr<Allocation> stagingBufferAlloc (bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
925
926 {
927 // If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
928 IVec4* const pData = static_cast<IVec4*>(stagingBufferAlloc->getHostPtr());
929 for (deUint32 i = 0; i < numBufferEntries; ++i)
930 pData[i] = IVec4(3*i ^ 127, 0, 0, 0);
931
932 flushAlloc(vk, getDevice(), *stagingBufferAlloc);
933
934 const VkBufferCopy copyRegion =
935 {
936 0ull, // VkDeviceSize srcOffset;
937 0ull, // VkDeviceSize dstOffset;
938 stagingBufferSize, // VkDeviceSize size;
939 };
940
941 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
942 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
943
944 beginCommandBuffer (vk, *cmdBuffer);
945 vk.cmdCopyBuffer (*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, ©Region);
946 endCommandBuffer (vk, *cmdBuffer);
947
948 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
949 // Once the fence is signaled, the write is also available to the aliasing buffer.
950 }
951 }
952
953 // Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
954 const deUint32 maxBufferRange = deMinu32(static_cast<deUint32>(sparseAllocation->resourceSize), m_context.getDeviceProperties().limits.maxUniformBufferRange);
955
956 // Descriptor sets
957 {
958 // Setup only once
959 if (setupDescriptors)
960 {
961 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
962 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
963 .build(vk, getDevice());
964
965 m_descriptorPool = DescriptorPoolBuilder()
966 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
967 .build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
968
969 m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
970 setupDescriptors = false;
971 }
972
973 const VkBuffer buffer = (m_aliased ? *sparseBufferAliased : *sparseBuffer);
974 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
975
976 DescriptorSetUpdateBuilder()
977 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
978 .update(vk, getDevice());
979 }
980
981 // Vertex data
982 {
983 const Vec4 vertexData[] =
984 {
985 Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
986 Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
987 Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
988 Vec4( 1.0f, 1.0f, 0.0f, 1.0f),
989 };
990
991 const VkDeviceSize vertexBufferSize = sizeof(vertexData);
992
993 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
994 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
995
996 deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
997 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
998 }
999
1000 // Draw
1001 {
1002 std::vector<deInt32> specializationData;
1003 {
1004 const deUint32 numBufferEntries = maxBufferRange / static_cast<deUint32>(sizeof(IVec4));
1005 const deUint32 numEntriesPerChunk = numBufferEntries / sparseAllocation->numResourceChunks;
1006
1007 specializationData.push_back(numBufferEntries);
1008 specializationData.push_back(numEntriesPerChunk);
1009 }
1010
1011 const VkSpecializationMapEntry specMapEntries[] =
1012 {
1013 {
1014 1u, // uint32_t constantID;
1015 0u, // uint32_t offset;
1016 sizeof(deInt32), // size_t size;
1017 },
1018 {
1019 2u, // uint32_t constantID;
1020 sizeof(deInt32), // uint32_t offset;
1021 sizeof(deInt32), // size_t size;
1022 },
1023 };
1024
1025 const VkSpecializationInfo specInfo =
1026 {
1027 DE_LENGTH_OF_ARRAY(specMapEntries), // uint32_t mapEntryCount;
1028 specMapEntries, // const VkSpecializationMapEntry* pMapEntries;
1029 sizeInBytes(specializationData), // size_t dataSize;
1030 getDataOrNullptr(specializationData), // const void* pData;
1031 };
1032
1033 Renderer::SpecializationMap specMap;
1034 specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1035
1036 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(), firstDeviceID);
1037 }
1038
1039 if(!isResultImageCorrect())
1040 return tcu::TestStatus::fail("Some buffer values were incorrect");
1041 }
1042 return tcu::TestStatus::pass("Pass");
1043 }
1044
1045 private:
1046 Move<VkBuffer> m_vertexBuffer;
1047 MovePtr<Allocation> m_vertexBufferAlloc;
1048
1049 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1050 Move<VkDescriptorPool> m_descriptorPool;
1051 Move<VkDescriptorSet> m_descriptorSet;
1052 };
1053
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1054 void initProgramsDrawGrid (vk::SourceCollections& programCollection, const TestFlags flags)
1055 {
1056 DE_UNREF(flags);
1057
1058 // Vertex shader
1059 {
1060 std::ostringstream src;
1061 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1062 << "\n"
1063 << "layout(location = 0) in vec4 in_position;\n"
1064 << "layout(location = 0) out int out_ndx;\n"
1065 << "\n"
1066 << "out gl_PerVertex {\n"
1067 << " vec4 gl_Position;\n"
1068 << "};\n"
1069 << "\n"
1070 << "void main(void)\n"
1071 << "{\n"
1072 << " gl_Position = in_position;\n"
1073 << " out_ndx = gl_VertexIndex;\n"
1074 << "}\n";
1075
1076 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1077 }
1078
1079 // Fragment shader
1080 {
1081 std::ostringstream src;
1082 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1083 << "\n"
1084 << "layout(location = 0) flat in int in_ndx;\n"
1085 << "layout(location = 0) out vec4 o_color;\n"
1086 << "\n"
1087 << "void main(void)\n"
1088 << "{\n"
1089 << " if (in_ndx % 2 == 0)\n"
1090 << " o_color = vec4(vec3(1.0), 1.0);\n"
1091 << " else\n"
1092 << " o_color = vec4(vec3(0.75), 1.0);\n"
1093 << "}\n";
1094
1095 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1096 }
1097 }
1098
1099 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const deUint32 numX,const deUint32 numY,const float z=0.0f)1100 void generateGrid (void* pRawData, const float step, const float ox, const float oy, const deUint32 numX, const deUint32 numY, const float z = 0.0f)
1101 {
1102 typedef Vec4 (*TilePtr)[6];
1103
1104 TilePtr const pData = static_cast<TilePtr>(pRawData);
1105 {
1106 for (deUint32 iy = 0; iy < numY; ++iy)
1107 for (deUint32 ix = 0; ix < numX; ++ix)
1108 {
1109 const deUint32 ndx = ix + numX * iy;
1110 const float x = ox + step * static_cast<float>(ix);
1111 const float y = oy + step * static_cast<float>(iy);
1112
1113 pData[ndx][0] = Vec4(x + step, y, z, 1.0f);
1114 pData[ndx][1] = Vec4(x, y, z, 1.0f);
1115 pData[ndx][2] = Vec4(x, y + step, z, 1.0f);
1116
1117 pData[ndx][3] = Vec4(x, y + step, z, 1.0f);
1118 pData[ndx][4] = Vec4(x + step, y + step, z, 1.0f);
1119 pData[ndx][5] = Vec4(x + step, y, z, 1.0f);
1120 }
1121 }
1122 }
1123
1124 //! Base test for a sparse buffer backing a vertex/index buffer
1125 class DrawGridTestInstance : public SparseBufferTestInstance
1126 {
1127 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1128 DrawGridTestInstance (Context& context, const TestFlags flags, const VkBufferUsageFlags usage, const VkDeviceSize minChunkSize)
1129 : SparseBufferTestInstance (context, flags)
1130 , m_bufferUsage (usage)
1131 , m_minChunkSize (minChunkSize)
1132 {
1133 }
1134
createResources(deUint32 memoryDeviceIndex)1135 void createResources (deUint32 memoryDeviceIndex)
1136 {
1137 const InstanceInterface& instance = m_context.getInstanceInterface();
1138 const DeviceInterface& vk = getDeviceInterface();
1139 VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(m_bufferUsage);
1140
1141 {
1142 // Allocate two chunks, each covering half of the viewport
1143 SparseAllocationBuilder builder;
1144 builder.addMemoryBind();
1145
1146 if (m_residency)
1147 builder.addResourceHole();
1148
1149 builder
1150 .addMemoryAllocation()
1151 .addMemoryHole()
1152 .addMemoryBind();
1153
1154 if (m_aliased)
1155 builder.addAliasedMemoryBind(0u, 0u);
1156
1157 m_sparseAllocation = builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1158 }
1159
1160 // Create the buffer
1161 referenceBufferCreateInfo.size = m_sparseAllocation->resourceSize;
1162 m_sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1163
1164 m_perDrawBufferOffset = m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1165 m_stagingBufferSize = 2 * m_perDrawBufferOffset;
1166 m_stagingBuffer = makeBuffer(vk, getDevice(), m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT);
1167 m_stagingBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1168 }
1169
iterate(void)1170 tcu::TestStatus iterate (void)
1171 {
1172 const DeviceInterface& vk = getDeviceInterface();
1173
1174 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1175 {
1176 const deUint32 firstDeviceID = physDevID;
1177 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
1178
1179 createResources(secondDeviceID);
1180
1181 if (firstDeviceID != secondDeviceID)
1182 {
1183 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1184 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
1185
1186 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
1187 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1188 {
1189 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1190 }
1191 }
1192
1193 // Bind the memory
1194 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1195
1196 initializeBuffers();
1197
1198 // Upload to the sparse buffer
1199 {
1200 flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1201
1202 VkDeviceSize firstChunkOffset = 0ull;
1203 VkDeviceSize secondChunkOffset = m_perDrawBufferOffset;
1204
1205 if (m_residency)
1206 secondChunkOffset += m_perDrawBufferOffset;
1207
1208 if (m_aliased)
1209 firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1210
1211 const VkBufferCopy copyRegions[] =
1212 {
1213 {
1214 0ull, // VkDeviceSize srcOffset;
1215 firstChunkOffset, // VkDeviceSize dstOffset;
1216 m_perDrawBufferOffset, // VkDeviceSize size;
1217 },
1218 {
1219 m_perDrawBufferOffset, // VkDeviceSize srcOffset;
1220 secondChunkOffset, // VkDeviceSize dstOffset;
1221 m_perDrawBufferOffset, // VkDeviceSize size;
1222 },
1223 };
1224
1225 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1226 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1227
1228 beginCommandBuffer (vk, *cmdBuffer);
1229 vk.cmdCopyBuffer (*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions), copyRegions);
1230 endCommandBuffer (vk, *cmdBuffer);
1231
1232 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1233 }
1234
1235
1236 Renderer::SpecializationMap specMap;
1237 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1238
1239 if(!isResultImageCorrect())
1240 return tcu::TestStatus::fail("Some buffer values were incorrect");
1241 }
1242 return tcu::TestStatus::pass("Pass");
1243 }
1244
1245 protected:
1246 virtual void initializeBuffers (void) = 0;
1247
1248 const VkBufferUsageFlags m_bufferUsage;
1249 const VkDeviceSize m_minChunkSize;
1250
1251 VkDeviceSize m_perDrawBufferOffset;
1252
1253 VkDeviceSize m_stagingBufferSize;
1254 Move<VkBuffer> m_stagingBuffer;
1255 MovePtr<Allocation> m_stagingBufferAlloc;
1256
1257 MovePtr<SparseAllocation> m_sparseAllocation;
1258 Move<VkBuffer> m_sparseBuffer;
1259 };
1260
1261 //! Sparse buffer backing a vertex input buffer
1262 class VertexBufferTestInstance : public DrawGridTestInstance
1263 {
1264 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1265 VertexBufferTestInstance (Context& context, const TestFlags flags)
1266 : DrawGridTestInstance (context,
1267 flags,
1268 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1269 GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1270 {
1271 }
1272
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1273 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1274 {
1275 DE_UNREF(pipelineLayout);
1276
1277 m_context.getTestContext().getLog()
1278 << tcu::TestLog::Message << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1279
1280 const DeviceInterface& vk = getDeviceInterface();
1281 const deUint32 vertexCount = 6 * (GRID_SIZE * GRID_SIZE) / 2;
1282 VkDeviceSize vertexOffset = 0ull;
1283
1284 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1285 vk.cmdDraw (cmdBuffer, vertexCount, 1u, 0u, 0u);
1286
1287 vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1288
1289 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1290 vk.cmdDraw (cmdBuffer, vertexCount, 1u, 0u, 0u);
1291 }
1292
initializeBuffers(void)1293 void initializeBuffers (void)
1294 {
1295 deUint8* pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr());
1296 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1297
1298 // Prepare data for two draw calls
1299 generateGrid(pData, step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE/2);
1300 generateGrid(pData + m_perDrawBufferOffset, step, -1.0f, 0.0f, GRID_SIZE, GRID_SIZE/2);
1301 }
1302 };
1303
1304 //! Sparse buffer backing an index buffer
1305 class IndexBufferTestInstance : public DrawGridTestInstance
1306 {
1307 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1308 IndexBufferTestInstance (Context& context, const TestFlags flags)
1309 : DrawGridTestInstance (context,
1310 flags,
1311 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1312 GRID_SIZE * GRID_SIZE * 6 * sizeof(deUint32))
1313 , m_halfVertexCount (6 * (GRID_SIZE * GRID_SIZE) / 2)
1314 {
1315 }
1316
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1317 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1318 {
1319 DE_UNREF(pipelineLayout);
1320
1321 m_context.getTestContext().getLog()
1322 << tcu::TestLog::Message << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1323
1324 const DeviceInterface& vk = getDeviceInterface();
1325 const VkDeviceSize vertexOffset = 0ull;
1326 VkDeviceSize indexOffset = 0ull;
1327
1328 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1329
1330 vk.cmdBindIndexBuffer (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1331 vk.cmdDrawIndexed (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1332
1333 indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1334
1335 vk.cmdBindIndexBuffer (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1336 vk.cmdDrawIndexed (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1337 }
1338
initializeBuffers(void)1339 void initializeBuffers (void)
1340 {
1341 // Vertex buffer
1342 const DeviceInterface& vk = getDeviceInterface();
1343 const VkDeviceSize vertexBufferSize = 2 * m_halfVertexCount * sizeof(Vec4);
1344 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1345 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1346
1347 {
1348 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1349
1350 generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1351
1352 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1353 }
1354
1355 // Sparse index buffer
1356 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1357 {
1358 deUint8* const pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1359 deUint32* const pIndexData = reinterpret_cast<deUint32*>(pData);
1360 const deUint32 ndxBase = chunkNdx * m_halfVertexCount;
1361
1362 for (deUint32 i = 0u; i < m_halfVertexCount; ++i)
1363 pIndexData[i] = ndxBase + i;
1364 }
1365 }
1366
1367 private:
1368 const deUint32 m_halfVertexCount;
1369 Move<VkBuffer> m_vertexBuffer;
1370 MovePtr<Allocation> m_vertexBufferAlloc;
1371 };
1372
1373 //! Draw from a sparse indirect buffer
1374 class IndirectBufferTestInstance : public DrawGridTestInstance
1375 {
1376 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1377 IndirectBufferTestInstance (Context& context, const TestFlags flags)
1378 : DrawGridTestInstance (context,
1379 flags,
1380 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
1381 sizeof(VkDrawIndirectCommand))
1382 {
1383 }
1384
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1385 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1386 {
1387 DE_UNREF(pipelineLayout);
1388
1389 m_context.getTestContext().getLog()
1390 << tcu::TestLog::Message << "Drawing two triangles covering the whole viewport. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1391
1392 const DeviceInterface& vk = getDeviceInterface();
1393 const VkDeviceSize vertexOffset = 0ull;
1394 VkDeviceSize indirectOffset = 0ull;
1395
1396 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1397 vk.cmdDrawIndirect (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1398
1399 indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1400
1401 vk.cmdDrawIndirect (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1402 }
1403
initializeBuffers(void)1404 void initializeBuffers (void)
1405 {
1406 // Vertex buffer
1407 const DeviceInterface& vk = getDeviceInterface();
1408 const VkDeviceSize vertexBufferSize = 2 * 3 * sizeof(Vec4);
1409 m_vertexBuffer = makeBuffer(vk, getDevice(), vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
1410 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1411
1412 {
1413 generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1414 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1415 }
1416
1417 // Indirect buffer
1418 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1419 {
1420 deUint8* const pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1421 VkDrawIndirectCommand* const pCmdData = reinterpret_cast<VkDrawIndirectCommand*>(pData);
1422
1423 pCmdData->firstVertex = 3u * chunkNdx;
1424 pCmdData->firstInstance = 0u;
1425 pCmdData->vertexCount = 3u;
1426 pCmdData->instanceCount = 1u;
1427 }
1428 }
1429
1430 private:
1431 Move<VkBuffer> m_vertexBuffer;
1432 MovePtr<Allocation> m_vertexBufferAlloc;
1433 };
1434
1435 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1436 template<typename Arg0>
1437 class FunctionProgramsSimple1
1438 {
1439 public:
1440 typedef void (*Function) (vk::SourceCollections& dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1441 FunctionProgramsSimple1 (Function func) : m_func(func) {}
init(vk::SourceCollections & dst,const Arg0 & arg0) const1442 void init (vk::SourceCollections& dst, const Arg0& arg0) const { m_func(dst, arg0); }
1443
1444 private:
1445 const Function m_func;
1446 };
1447
checkSupport(Context & context,const TestFlags flags)1448 void checkSupport (Context& context, const TestFlags flags)
1449 {
1450 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_BINDING);
1451
1452 if (flags & TEST_FLAG_RESIDENCY)
1453 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_BUFFER);
1454
1455 if (flags & TEST_FLAG_ALIASED)
1456 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_SPARSE_RESIDENCY_ALIASED);
1457
1458 if (flags & TEST_FLAG_NON_RESIDENT_STRICT && !context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
1459 TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
1460 }
1461
1462 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1463 template<typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,const std::string & desc,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1464 TestCase* createTestInstanceWithPrograms (tcu::TestContext& testCtx,
1465 const std::string& name,
1466 const std::string& desc,
1467 typename FunctionProgramsSimple1<Arg0>::Function initPrograms,
1468 Arg0 arg0)
1469 {
1470 return new InstanceFactory1WithSupport<TestInstanceT, Arg0, FunctionSupport1<Arg0>, FunctionProgramsSimple1<Arg0> >(
1471 testCtx, tcu::NODETYPE_SELF_VALIDATE, name, desc, FunctionProgramsSimple1<Arg0>(initPrograms), arg0, typename FunctionSupport1<Arg0>::Args(checkSupport, arg0));
1472 }
1473
populateTestGroup(tcu::TestCaseGroup * parentGroup)1474 void populateTestGroup (tcu::TestCaseGroup* parentGroup)
1475 {
1476 const struct
1477 {
1478 std::string name;
1479 TestFlags flags;
1480 } groups[] =
1481 {
1482 { "sparse_binding", 0u, },
1483 { "sparse_binding_aliased", TEST_FLAG_ALIASED, },
1484 { "sparse_residency", TEST_FLAG_RESIDENCY, },
1485 { "sparse_residency_aliased", TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED, },
1486 { "sparse_residency_non_resident_strict", TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,},
1487 };
1488
1489 const int numGroupsIncludingNonResidentStrict = DE_LENGTH_OF_ARRAY(groups);
1490 const int numGroupsDefaultList = numGroupsIncludingNonResidentStrict - 1;
1491 std::string devGroupPrefix = "device_group_";
1492
1493 // Transfer
1494 {
1495 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer", ""));
1496 {
1497 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding", ""));
1498 addBufferSparseBindingTests(subGroup.get(), false);
1499 group->addChild(subGroup.release());
1500
1501 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding", ""));
1502 addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1503 group->addChild(subGroupDeviceGroups.release());
1504 }
1505 parentGroup->addChild(group.release());
1506 }
1507
1508 // SSBO
1509 {
1510 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo", ""));
1511 {
1512 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased", ""));
1513 addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1514 group->addChild(subGroup.release());
1515
1516 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased", ""));
1517 addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1518 group->addChild(subGroupDeviceGroups.release());
1519 }
1520 {
1521 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency", ""));
1522 addBufferSparseResidencyTests(subGroup.get(), false);
1523 group->addChild(subGroup.release());
1524
1525 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency", ""));
1526 addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1527 group->addChild(subGroupDeviceGroups.release());
1528 }
1529 parentGroup->addChild(group.release());
1530 }
1531
1532 // UBO
1533 {
1534 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo", ""));
1535
1536 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1537 {
1538 group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags));
1539 }
1540 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1541 {
1542 group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1543 }
1544 parentGroup->addChild(group.release());
1545 }
1546
1547 // Vertex buffer
1548 {
1549 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer", ""));
1550
1551 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1552 {
1553 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1554 }
1555 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1556 {
1557 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1558 }
1559
1560 parentGroup->addChild(group.release());
1561 }
1562
1563 // Index buffer
1564 {
1565 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer", ""));
1566
1567 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1568 {
1569 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1570 }
1571 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1572 {
1573 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1574 }
1575
1576 parentGroup->addChild(group.release());
1577 }
1578
1579 // Indirect buffer
1580 {
1581 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer", ""));
1582
1583 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1584 {
1585 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1586 }
1587 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1588 {
1589 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1590 }
1591
1592 parentGroup->addChild(group.release());
1593 }
1594 }
1595
1596 } // anonymous ns
1597
createSparseBufferTests(tcu::TestContext & testCtx)1598 tcu::TestCaseGroup* createSparseBufferTests (tcu::TestContext& testCtx)
1599 {
1600 return createTestGroup(testCtx, "buffer", "Sparse buffer usage tests", populateTestGroup);
1601 }
1602
1603 } // sparse
1604 } // vkt
1605