1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Sparse buffer tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferTests.hpp"
25 #include "vktTestCaseUtil.hpp"
26 #include "vktTestGroupUtil.hpp"
27 #include "vktSparseResourcesTestsUtil.hpp"
28 #include "vktSparseResourcesBase.hpp"
29 #include "vktSparseResourcesBufferSparseBinding.hpp"
30 #include "vktSparseResourcesBufferSparseResidency.hpp"
31 #include "vktSparseResourcesBufferMemoryAliasing.hpp"
32
33 #include "vkRef.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkPlatform.hpp"
36 #include "vkPrograms.hpp"
37 #include "vkMemUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkQueryUtil.hpp"
40 #include "vkTypeUtil.hpp"
41 #include "vkCmdUtil.hpp"
42 #include "vkObjUtil.hpp"
43
44 #include "tcuTestLog.hpp"
45
46 #include "deUniquePtr.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deMath.h"
49
50 #include <string>
51 #include <vector>
52 #include <map>
53
54 using namespace vk;
55 using de::MovePtr;
56 using de::UniquePtr;
57 using de::SharedPtr;
58 using tcu::Vec4;
59 using tcu::IVec2;
60 using tcu::IVec4;
61
62 namespace vkt
63 {
64 namespace sparse
65 {
66 namespace
67 {
68
69 typedef SharedPtr<UniquePtr<Allocation> > AllocationSp;
70
71 enum
72 {
73 RENDER_SIZE = 128, //!< framebuffer size in pixels
74 GRID_SIZE = RENDER_SIZE / 8, //!< number of grid tiles in a row
75 };
76
77 enum TestFlagBits
78 {
79 // sparseBinding is implied
80 TEST_FLAG_ALIASED = 1u << 0, //!< sparseResidencyAliased
81 TEST_FLAG_RESIDENCY = 1u << 1, //!< sparseResidencyBuffer
82 TEST_FLAG_NON_RESIDENT_STRICT = 1u << 2, //!< residencyNonResidentStrict
83 TEST_FLAG_ENABLE_DEVICE_GROUPS = 1u << 3, //!< device groups are enabled
84 };
85 typedef deUint32 TestFlags;
86
87 //! SparseAllocationBuilder output. Owns the allocated memory.
88 struct SparseAllocation
89 {
90 deUint32 numResourceChunks;
91 VkDeviceSize resourceSize; //!< buffer size in bytes
92 std::vector<AllocationSp> allocations; //!< actual allocated memory
93 std::vector<VkSparseMemoryBind> memoryBinds; //!< memory binds backing the resource
94 deUint32 memoryType; //!< memory type (same for all allocations)
95 deUint32 heapIndex; //!< memory heap index
96 };
97
98 //! Utility to lay out memory allocations for a sparse buffer, including holes and aliased regions.
99 //! Will allocate memory upon building.
100 class SparseAllocationBuilder
101 {
102 public:
103 SparseAllocationBuilder (void);
104
105 // \note "chunk" is the smallest (due to alignment) bindable amount of memory
106
107 SparseAllocationBuilder& addMemoryHole (const deUint32 numChunks = 1u);
108 SparseAllocationBuilder& addResourceHole (const deUint32 numChunks = 1u);
109 SparseAllocationBuilder& addMemoryBind (const deUint32 numChunks = 1u);
110 SparseAllocationBuilder& addAliasedMemoryBind (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks = 1u);
111 SparseAllocationBuilder& addMemoryAllocation (void);
112
113 MovePtr<SparseAllocation> build (const InstanceInterface& instanceInterface,
114 const VkPhysicalDevice physicalDevice,
115 const DeviceInterface& vk,
116 const VkDevice device,
117 Allocator& allocator,
118 VkBufferCreateInfo referenceCreateInfo, //!< buffer size is ignored in this info
119 const VkDeviceSize minChunkSize = 0ull) const; //!< make sure chunks are at least this big
120
121 private:
122 struct MemoryBind
123 {
124 deUint32 allocationNdx;
125 deUint32 resourceChunkNdx;
126 deUint32 memoryChunkNdx;
127 deUint32 numChunks;
128 };
129
130 deUint32 m_allocationNdx;
131 deUint32 m_resourceChunkNdx;
132 deUint32 m_memoryChunkNdx;
133 std::vector<MemoryBind> m_memoryBinds;
134 std::vector<deUint32> m_chunksPerAllocation;
135
136 };
137
SparseAllocationBuilder(void)138 SparseAllocationBuilder::SparseAllocationBuilder (void)
139 : m_allocationNdx (0)
140 , m_resourceChunkNdx (0)
141 , m_memoryChunkNdx (0)
142 {
143 m_chunksPerAllocation.push_back(0);
144 }
145
addMemoryHole(const deUint32 numChunks)146 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryHole (const deUint32 numChunks)
147 {
148 m_memoryChunkNdx += numChunks;
149 m_chunksPerAllocation[m_allocationNdx] += numChunks;
150
151 return *this;
152 }
153
addResourceHole(const deUint32 numChunks)154 SparseAllocationBuilder& SparseAllocationBuilder::addResourceHole (const deUint32 numChunks)
155 {
156 m_resourceChunkNdx += numChunks;
157
158 return *this;
159 }
160
addMemoryAllocation(void)161 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryAllocation (void)
162 {
163 DE_ASSERT(m_memoryChunkNdx != 0); // doesn't make sense to have an empty allocation
164
165 m_allocationNdx += 1;
166 m_memoryChunkNdx = 0;
167 m_chunksPerAllocation.push_back(0);
168
169 return *this;
170 }
171
addMemoryBind(const deUint32 numChunks)172 SparseAllocationBuilder& SparseAllocationBuilder::addMemoryBind (const deUint32 numChunks)
173 {
174 const MemoryBind memoryBind =
175 {
176 m_allocationNdx,
177 m_resourceChunkNdx,
178 m_memoryChunkNdx,
179 numChunks
180 };
181 m_memoryBinds.push_back(memoryBind);
182
183 m_resourceChunkNdx += numChunks;
184 m_memoryChunkNdx += numChunks;
185 m_chunksPerAllocation[m_allocationNdx] += numChunks;
186
187 return *this;
188 }
189
addAliasedMemoryBind(const deUint32 allocationNdx,const deUint32 chunkOffset,const deUint32 numChunks)190 SparseAllocationBuilder& SparseAllocationBuilder::addAliasedMemoryBind (const deUint32 allocationNdx, const deUint32 chunkOffset, const deUint32 numChunks)
191 {
192 DE_ASSERT(allocationNdx <= m_allocationNdx);
193
194 const MemoryBind memoryBind =
195 {
196 allocationNdx,
197 m_resourceChunkNdx,
198 chunkOffset,
199 numChunks
200 };
201 m_memoryBinds.push_back(memoryBind);
202
203 m_resourceChunkNdx += numChunks;
204
205 return *this;
206 }
207
build(const InstanceInterface & instanceInterface,const VkPhysicalDevice physicalDevice,const DeviceInterface & vk,const VkDevice device,Allocator & allocator,VkBufferCreateInfo referenceCreateInfo,const VkDeviceSize minChunkSize) const208 MovePtr<SparseAllocation> SparseAllocationBuilder::build (const InstanceInterface& instanceInterface,
209 const VkPhysicalDevice physicalDevice,
210 const DeviceInterface& vk,
211 const VkDevice device,
212 Allocator& allocator,
213 VkBufferCreateInfo referenceCreateInfo,
214 const VkDeviceSize minChunkSize) const
215 {
216
217 MovePtr<SparseAllocation> sparseAllocation (new SparseAllocation());
218
219 referenceCreateInfo.size = sizeof(deUint32);
220 const Unique<VkBuffer> refBuffer (createBuffer(vk, device, &referenceCreateInfo));
221 const VkMemoryRequirements memoryRequirements = getBufferMemoryRequirements(vk, device, *refBuffer);
222 const VkDeviceSize chunkSize = std::max(memoryRequirements.alignment, static_cast<VkDeviceSize>(deAlign64(minChunkSize, memoryRequirements.alignment)));
223 const deUint32 memoryTypeNdx = findMatchingMemoryType(instanceInterface, physicalDevice, memoryRequirements, MemoryRequirement::Any);
224 VkMemoryAllocateInfo allocInfo =
225 {
226 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
227 DE_NULL, // const void* pNext;
228 memoryRequirements.size, // VkDeviceSize allocationSize;
229 memoryTypeNdx, // deUint32 memoryTypeIndex;
230 };
231
232 for (std::vector<deUint32>::const_iterator numChunksIter = m_chunksPerAllocation.begin(); numChunksIter != m_chunksPerAllocation.end(); ++numChunksIter)
233 {
234 allocInfo.allocationSize = *numChunksIter * chunkSize;
235 sparseAllocation->allocations.push_back(makeDeSharedPtr(allocator.allocate(allocInfo, (VkDeviceSize)0)));
236 }
237
238 for (std::vector<MemoryBind>::const_iterator memBindIter = m_memoryBinds.begin(); memBindIter != m_memoryBinds.end(); ++memBindIter)
239 {
240 const Allocation& alloc = **sparseAllocation->allocations[memBindIter->allocationNdx];
241 const VkSparseMemoryBind bind =
242 {
243 memBindIter->resourceChunkNdx * chunkSize, // VkDeviceSize resourceOffset;
244 memBindIter->numChunks * chunkSize, // VkDeviceSize size;
245 alloc.getMemory(), // VkDeviceMemory memory;
246 alloc.getOffset() + memBindIter->memoryChunkNdx * chunkSize, // VkDeviceSize memoryOffset;
247 (VkSparseMemoryBindFlags)0, // VkSparseMemoryBindFlags flags;
248 };
249 sparseAllocation->memoryBinds.push_back(bind);
250 referenceCreateInfo.size = std::max(referenceCreateInfo.size, bind.resourceOffset + bind.size);
251 }
252
253 sparseAllocation->resourceSize = referenceCreateInfo.size;
254 sparseAllocation->numResourceChunks = m_resourceChunkNdx;
255 sparseAllocation->memoryType = memoryTypeNdx;
256 sparseAllocation->heapIndex = getHeapIndexForMemoryType(instanceInterface, physicalDevice, memoryTypeNdx);
257
258 return sparseAllocation;
259 }
260
makeImageCreateInfo(const VkFormat format,const IVec2 & size,const VkImageUsageFlags usage)261 VkImageCreateInfo makeImageCreateInfo (const VkFormat format, const IVec2& size, const VkImageUsageFlags usage)
262 {
263 const VkImageCreateInfo imageParams =
264 {
265 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
266 DE_NULL, // const void* pNext;
267 (VkImageCreateFlags)0, // VkImageCreateFlags flags;
268 VK_IMAGE_TYPE_2D, // VkImageType imageType;
269 format, // VkFormat format;
270 makeExtent3D(size.x(), size.y(), 1), // VkExtent3D extent;
271 1u, // deUint32 mipLevels;
272 1u, // deUint32 arrayLayers;
273 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
274 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
275 usage, // VkImageUsageFlags usage;
276 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
277 0u, // deUint32 queueFamilyIndexCount;
278 DE_NULL, // const deUint32* pQueueFamilyIndices;
279 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
280 };
281 return imageParams;
282 }
283
makeGraphicsPipeline(const DeviceInterface & vk,const VkDevice device,const VkPipelineLayout pipelineLayout,const VkRenderPass renderPass,const IVec2 renderSize,const VkPrimitiveTopology topology,const deUint32 stageCount,const VkPipelineShaderStageCreateInfo * pStages)284 Move<VkPipeline> makeGraphicsPipeline (const DeviceInterface& vk,
285 const VkDevice device,
286 const VkPipelineLayout pipelineLayout,
287 const VkRenderPass renderPass,
288 const IVec2 renderSize,
289 const VkPrimitiveTopology topology,
290 const deUint32 stageCount,
291 const VkPipelineShaderStageCreateInfo* pStages)
292 {
293 const VkVertexInputBindingDescription vertexInputBindingDescription =
294 {
295 0u, // uint32_t binding;
296 sizeof(Vec4), // uint32_t stride;
297 VK_VERTEX_INPUT_RATE_VERTEX, // VkVertexInputRate inputRate;
298 };
299
300 const VkVertexInputAttributeDescription vertexInputAttributeDescription =
301 {
302 0u, // uint32_t location;
303 0u, // uint32_t binding;
304 VK_FORMAT_R32G32B32A32_SFLOAT, // VkFormat format;
305 0u, // uint32_t offset;
306 };
307
308 const VkPipelineVertexInputStateCreateInfo vertexInputStateInfo =
309 {
310 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
311 DE_NULL, // const void* pNext;
312 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
313 1u, // uint32_t vertexBindingDescriptionCount;
314 &vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
315 1u, // uint32_t vertexAttributeDescriptionCount;
316 &vertexInputAttributeDescription, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions;
317 };
318
319 const VkPipelineInputAssemblyStateCreateInfo pipelineInputAssemblyStateInfo =
320 {
321 VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, // VkStructureType sType;
322 DE_NULL, // const void* pNext;
323 (VkPipelineInputAssemblyStateCreateFlags)0, // VkPipelineInputAssemblyStateCreateFlags flags;
324 topology, // VkPrimitiveTopology topology;
325 VK_FALSE, // VkBool32 primitiveRestartEnable;
326 };
327
328 const VkViewport viewport = makeViewport(renderSize);
329 const VkRect2D scissor = makeRect2D(renderSize);
330
331 const VkPipelineViewportStateCreateInfo pipelineViewportStateInfo =
332 {
333 VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, // VkStructureType sType;
334 DE_NULL, // const void* pNext;
335 (VkPipelineViewportStateCreateFlags)0, // VkPipelineViewportStateCreateFlags flags;
336 1u, // uint32_t viewportCount;
337 &viewport, // const VkViewport* pViewports;
338 1u, // uint32_t scissorCount;
339 &scissor, // const VkRect2D* pScissors;
340 };
341
342 const VkPipelineRasterizationStateCreateInfo pipelineRasterizationStateInfo =
343 {
344 VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, // VkStructureType sType;
345 DE_NULL, // const void* pNext;
346 (VkPipelineRasterizationStateCreateFlags)0, // VkPipelineRasterizationStateCreateFlags flags;
347 VK_FALSE, // VkBool32 depthClampEnable;
348 VK_FALSE, // VkBool32 rasterizerDiscardEnable;
349 VK_POLYGON_MODE_FILL, // VkPolygonMode polygonMode;
350 VK_CULL_MODE_NONE, // VkCullModeFlags cullMode;
351 VK_FRONT_FACE_COUNTER_CLOCKWISE, // VkFrontFace frontFace;
352 VK_FALSE, // VkBool32 depthBiasEnable;
353 0.0f, // float depthBiasConstantFactor;
354 0.0f, // float depthBiasClamp;
355 0.0f, // float depthBiasSlopeFactor;
356 1.0f, // float lineWidth;
357 };
358
359 const VkPipelineMultisampleStateCreateInfo pipelineMultisampleStateInfo =
360 {
361 VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, // VkStructureType sType;
362 DE_NULL, // const void* pNext;
363 (VkPipelineMultisampleStateCreateFlags)0, // VkPipelineMultisampleStateCreateFlags flags;
364 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits rasterizationSamples;
365 VK_FALSE, // VkBool32 sampleShadingEnable;
366 0.0f, // float minSampleShading;
367 DE_NULL, // const VkSampleMask* pSampleMask;
368 VK_FALSE, // VkBool32 alphaToCoverageEnable;
369 VK_FALSE // VkBool32 alphaToOneEnable;
370 };
371
372 const VkStencilOpState stencilOpState = makeStencilOpState(
373 VK_STENCIL_OP_KEEP, // stencil fail
374 VK_STENCIL_OP_KEEP, // depth & stencil pass
375 VK_STENCIL_OP_KEEP, // depth only fail
376 VK_COMPARE_OP_ALWAYS, // compare op
377 0u, // compare mask
378 0u, // write mask
379 0u); // reference
380
381 VkPipelineDepthStencilStateCreateInfo pipelineDepthStencilStateInfo =
382 {
383 VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, // VkStructureType sType;
384 DE_NULL, // const void* pNext;
385 (VkPipelineDepthStencilStateCreateFlags)0, // VkPipelineDepthStencilStateCreateFlags flags;
386 VK_FALSE, // VkBool32 depthTestEnable;
387 VK_FALSE, // VkBool32 depthWriteEnable;
388 VK_COMPARE_OP_LESS, // VkCompareOp depthCompareOp;
389 VK_FALSE, // VkBool32 depthBoundsTestEnable;
390 VK_FALSE, // VkBool32 stencilTestEnable;
391 stencilOpState, // VkStencilOpState front;
392 stencilOpState, // VkStencilOpState back;
393 0.0f, // float minDepthBounds;
394 1.0f, // float maxDepthBounds;
395 };
396
397 const VkColorComponentFlags colorComponentsAll = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
398 const VkPipelineColorBlendAttachmentState pipelineColorBlendAttachmentState =
399 {
400 VK_FALSE, // VkBool32 blendEnable;
401 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
402 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
403 VK_BLEND_OP_ADD, // VkBlendOp colorBlendOp;
404 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
405 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstAlphaBlendFactor;
406 VK_BLEND_OP_ADD, // VkBlendOp alphaBlendOp;
407 colorComponentsAll, // VkColorComponentFlags colorWriteMask;
408 };
409
410 const VkPipelineColorBlendStateCreateInfo pipelineColorBlendStateInfo =
411 {
412 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
413 DE_NULL, // const void* pNext;
414 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
415 VK_FALSE, // VkBool32 logicOpEnable;
416 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
417 1u, // deUint32 attachmentCount;
418 &pipelineColorBlendAttachmentState, // const VkPipelineColorBlendAttachmentState* pAttachments;
419 { 0.0f, 0.0f, 0.0f, 0.0f }, // float blendConstants[4];
420 };
421
422 const VkGraphicsPipelineCreateInfo graphicsPipelineInfo =
423 {
424 VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, // VkStructureType sType;
425 DE_NULL, // const void* pNext;
426 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
427 stageCount, // deUint32 stageCount;
428 pStages, // const VkPipelineShaderStageCreateInfo* pStages;
429 &vertexInputStateInfo, // const VkPipelineVertexInputStateCreateInfo* pVertexInputState;
430 &pipelineInputAssemblyStateInfo, // const VkPipelineInputAssemblyStateCreateInfo* pInputAssemblyState;
431 DE_NULL, // const VkPipelineTessellationStateCreateInfo* pTessellationState;
432 &pipelineViewportStateInfo, // const VkPipelineViewportStateCreateInfo* pViewportState;
433 &pipelineRasterizationStateInfo, // const VkPipelineRasterizationStateCreateInfo* pRasterizationState;
434 &pipelineMultisampleStateInfo, // const VkPipelineMultisampleStateCreateInfo* pMultisampleState;
435 &pipelineDepthStencilStateInfo, // const VkPipelineDepthStencilStateCreateInfo* pDepthStencilState;
436 &pipelineColorBlendStateInfo, // const VkPipelineColorBlendStateCreateInfo* pColorBlendState;
437 DE_NULL, // const VkPipelineDynamicStateCreateInfo* pDynamicState;
438 pipelineLayout, // VkPipelineLayout layout;
439 renderPass, // VkRenderPass renderPass;
440 0u, // deUint32 subpass;
441 DE_NULL, // VkPipeline basePipelineHandle;
442 0, // deInt32 basePipelineIndex;
443 };
444
445 return createGraphicsPipeline(vk, device, DE_NULL, &graphicsPipelineInfo);
446 }
447
448 //! Return true if there are any red (or all zero) pixels in the image
imageHasErrorPixels(const tcu::ConstPixelBufferAccess image)449 bool imageHasErrorPixels (const tcu::ConstPixelBufferAccess image)
450 {
451 const Vec4 errorColor = Vec4(1.0f, 0.0f, 0.0f, 1.0f);
452 const Vec4 blankColor = Vec4();
453
454 for (int y = 0; y < image.getHeight(); ++y)
455 for (int x = 0; x < image.getWidth(); ++x)
456 {
457 const Vec4 color = image.getPixel(x, y);
458 if (color == errorColor || color == blankColor)
459 return true;
460 }
461
462 return false;
463 }
464
465 class Renderer
466 {
467 public:
468 typedef std::map<VkShaderStageFlagBits, const VkSpecializationInfo*> SpecializationMap;
469
470 //! Use the delegate to bind descriptor sets, vertex buffers, etc. and make a draw call
471 struct Delegate
472 {
~Delegatevkt::sparse::__anonf4c2cf810111::Renderer::Delegate473 virtual ~Delegate (void) {}
474 virtual void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const = 0;
475 };
476
Renderer(const DeviceInterface & vk,const VkDevice device,Allocator & allocator,const deUint32 queueFamilyIndex,const VkDescriptorSetLayout descriptorSetLayout,BinaryCollection & binaryCollection,const std::string & vertexName,const std::string & fragmentName,const VkBuffer colorBuffer,const IVec2 & renderSize,const VkFormat colorFormat,const Vec4 & clearColor,const VkPrimitiveTopology topology,SpecializationMap specMap=SpecializationMap ())477 Renderer (const DeviceInterface& vk,
478 const VkDevice device,
479 Allocator& allocator,
480 const deUint32 queueFamilyIndex,
481 const VkDescriptorSetLayout descriptorSetLayout, //!< may be NULL, if no descriptors are used
482 BinaryCollection& binaryCollection,
483 const std::string& vertexName,
484 const std::string& fragmentName,
485 const VkBuffer colorBuffer,
486 const IVec2& renderSize,
487 const VkFormat colorFormat,
488 const Vec4& clearColor,
489 const VkPrimitiveTopology topology,
490 SpecializationMap specMap = SpecializationMap())
491 : m_colorBuffer (colorBuffer)
492 , m_renderSize (renderSize)
493 , m_colorFormat (colorFormat)
494 , m_colorSubresourceRange (makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u))
495 , m_clearColor (clearColor)
496 , m_topology (topology)
497 , m_descriptorSetLayout (descriptorSetLayout)
498 {
499 m_colorImage = makeImage (vk, device, makeImageCreateInfo(m_colorFormat, m_renderSize, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT));
500 m_colorImageAlloc = bindImage (vk, device, allocator, *m_colorImage, MemoryRequirement::Any);
501 m_colorAttachment = makeImageView (vk, device, *m_colorImage, VK_IMAGE_VIEW_TYPE_2D, m_colorFormat, m_colorSubresourceRange);
502
503 m_vertexModule = createShaderModule (vk, device, binaryCollection.get(vertexName), 0u);
504 m_fragmentModule = createShaderModule (vk, device, binaryCollection.get(fragmentName), 0u);
505
506 const VkPipelineShaderStageCreateInfo pShaderStages[] =
507 {
508 {
509 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
510 DE_NULL, // const void* pNext;
511 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
512 VK_SHADER_STAGE_VERTEX_BIT, // VkShaderStageFlagBits stage;
513 *m_vertexModule, // VkShaderModule module;
514 "main", // const char* pName;
515 specMap[VK_SHADER_STAGE_VERTEX_BIT], // const VkSpecializationInfo* pSpecializationInfo;
516 },
517 {
518 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
519 DE_NULL, // const void* pNext;
520 (VkPipelineShaderStageCreateFlags)0, // VkPipelineShaderStageCreateFlags flags;
521 VK_SHADER_STAGE_FRAGMENT_BIT, // VkShaderStageFlagBits stage;
522 *m_fragmentModule, // VkShaderModule module;
523 "main", // const char* pName;
524 specMap[VK_SHADER_STAGE_FRAGMENT_BIT], // const VkSpecializationInfo* pSpecializationInfo;
525 }
526 };
527
528 m_renderPass = makeRenderPass (vk, device, m_colorFormat);
529 m_framebuffer = makeFramebuffer (vk, device, *m_renderPass, 1u, &m_colorAttachment.get(),
530 static_cast<deUint32>(m_renderSize.x()), static_cast<deUint32>(m_renderSize.y()));
531 m_pipelineLayout = makePipelineLayout (vk, device, m_descriptorSetLayout);
532 m_pipeline = makeGraphicsPipeline (vk, device, *m_pipelineLayout, *m_renderPass, m_renderSize, m_topology, DE_LENGTH_OF_ARRAY(pShaderStages), pShaderStages);
533 m_cmdPool = makeCommandPool (vk, device, queueFamilyIndex);
534 m_cmdBuffer = allocateCommandBuffer (vk, device, *m_cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
535 }
536
draw(const DeviceInterface & vk,const VkDevice device,const VkQueue queue,const Delegate & drawDelegate,const bool useDeviceGroups,const deUint32 deviceID) const537 void draw (const DeviceInterface& vk,
538 const VkDevice device,
539 const VkQueue queue,
540 const Delegate& drawDelegate,
541 const bool useDeviceGroups,
542 const deUint32 deviceID) const
543 {
544 beginCommandBuffer(vk, *m_cmdBuffer);
545
546 beginRenderPass(vk, *m_cmdBuffer, *m_renderPass, *m_framebuffer, makeRect2D(0, 0, m_renderSize.x(), m_renderSize.y()), m_clearColor);
547
548 vk.cmdBindPipeline(*m_cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *m_pipeline);
549 drawDelegate.rendererDraw(*m_pipelineLayout, *m_cmdBuffer);
550
551 endRenderPass(vk, *m_cmdBuffer);
552
553 copyImageToBuffer(vk, *m_cmdBuffer, *m_colorImage, m_colorBuffer, m_renderSize);
554
555 endCommandBuffer(vk, *m_cmdBuffer);
556 submitCommandsAndWait(vk, device, queue, *m_cmdBuffer, 0U, DE_NULL, DE_NULL, 0U, DE_NULL, useDeviceGroups, deviceID);
557 }
558
559 private:
560 const VkBuffer m_colorBuffer;
561 const IVec2 m_renderSize;
562 const VkFormat m_colorFormat;
563 const VkImageSubresourceRange m_colorSubresourceRange;
564 const Vec4 m_clearColor;
565 const VkPrimitiveTopology m_topology;
566 const VkDescriptorSetLayout m_descriptorSetLayout;
567
568 Move<VkImage> m_colorImage;
569 MovePtr<Allocation> m_colorImageAlloc;
570 Move<VkImageView> m_colorAttachment;
571 Move<VkShaderModule> m_vertexModule;
572 Move<VkShaderModule> m_fragmentModule;
573 Move<VkRenderPass> m_renderPass;
574 Move<VkFramebuffer> m_framebuffer;
575 Move<VkPipelineLayout> m_pipelineLayout;
576 Move<VkPipeline> m_pipeline;
577 Move<VkCommandPool> m_cmdPool;
578 Move<VkCommandBuffer> m_cmdBuffer;
579
580 // "deleted"
581 Renderer (const Renderer&);
582 Renderer& operator= (const Renderer&);
583 };
584
bindSparseBuffer(const DeviceInterface & vk,const VkDevice device,const VkQueue sparseQueue,const VkBuffer buffer,const SparseAllocation & sparseAllocation,const bool useDeviceGroups,deUint32 resourceDevId,deUint32 memoryDeviceId)585 void bindSparseBuffer (const DeviceInterface& vk, const VkDevice device, const VkQueue sparseQueue, const VkBuffer buffer, const SparseAllocation& sparseAllocation,
586 const bool useDeviceGroups, deUint32 resourceDevId, deUint32 memoryDeviceId)
587 {
588 const VkSparseBufferMemoryBindInfo sparseBufferMemoryBindInfo =
589 {
590 buffer, // VkBuffer buffer;
591 static_cast<deUint32>(sparseAllocation.memoryBinds.size()), // uint32_t bindCount;
592 &sparseAllocation.memoryBinds[0], // const VkSparseMemoryBind* pBinds;
593 };
594
595 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
596 {
597 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR, //VkStructureType sType;
598 DE_NULL, //const void* pNext;
599 resourceDevId, //deUint32 resourceDeviceIndex;
600 memoryDeviceId, //deUint32 memoryDeviceIndex;
601 };
602
603 const VkBindSparseInfo bindInfo =
604 {
605 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, // VkStructureType sType;
606 useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL, // const void* pNext;
607 0u, // uint32_t waitSemaphoreCount;
608 DE_NULL, // const VkSemaphore* pWaitSemaphores;
609 1u, // uint32_t bufferBindCount;
610 &sparseBufferMemoryBindInfo, // const VkSparseBufferMemoryBindInfo* pBufferBinds;
611 0u, // uint32_t imageOpaqueBindCount;
612 DE_NULL, // const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
613 0u, // uint32_t imageBindCount;
614 DE_NULL, // const VkSparseImageMemoryBindInfo* pImageBinds;
615 0u, // uint32_t signalSemaphoreCount;
616 DE_NULL, // const VkSemaphore* pSignalSemaphores;
617 };
618
619 const Unique<VkFence> fence(createFence(vk, device));
620
621 VK_CHECK(vk.queueBindSparse(sparseQueue, 1u, &bindInfo, *fence));
622 VK_CHECK(vk.waitForFences(device, 1u, &fence.get(), VK_TRUE, ~0ull));
623 }
624
625 class SparseBufferTestInstance : public SparseResourcesBaseInstance, Renderer::Delegate
626 {
627 public:
SparseBufferTestInstance(Context & context,const TestFlags flags)628 SparseBufferTestInstance (Context& context, const TestFlags flags)
629 : SparseResourcesBaseInstance (context, (flags & TEST_FLAG_ENABLE_DEVICE_GROUPS) != 0)
630 , m_aliased ((flags & TEST_FLAG_ALIASED) != 0)
631 , m_residency ((flags & TEST_FLAG_RESIDENCY) != 0)
632 , m_nonResidentStrict ((flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0)
633 , m_renderSize (RENDER_SIZE, RENDER_SIZE)
634 , m_colorFormat (VK_FORMAT_R8G8B8A8_UNORM)
635 , m_colorBufferSize (m_renderSize.x() * m_renderSize.y() * tcu::getPixelSize(mapVkFormat(m_colorFormat)))
636 {
637 {
638 QueueRequirementsVec requirements;
639 requirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
640 requirements.push_back(QueueRequirements(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 1u));
641
642 createDeviceSupportingQueues(requirements);
643 }
644 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(m_context.getInstanceInterface(), getPhysicalDevice());
645
646 if (!features.sparseBinding)
647 TCU_THROW(NotSupportedError, "Missing feature: sparseBinding");
648
649 if (m_residency && !features.sparseResidencyBuffer)
650 TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyBuffer");
651
652 if (m_aliased && !features.sparseResidencyAliased)
653 TCU_THROW(NotSupportedError, "Missing feature: sparseResidencyAliased");
654
655 if (m_nonResidentStrict && !m_context.getDeviceProperties().sparseProperties.residencyNonResidentStrict)
656 TCU_THROW(NotSupportedError, "Missing sparse property: residencyNonResidentStrict");
657
658 const DeviceInterface& vk = getDeviceInterface();
659 m_sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0u);
660 m_universalQueue = getQueue(VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT, 0u);
661
662 m_sharedQueueFamilyIndices[0] = m_sparseQueue.queueFamilyIndex;
663 m_sharedQueueFamilyIndices[1] = m_universalQueue.queueFamilyIndex;
664
665 m_colorBuffer = makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_colorBufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT));
666 m_colorBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_colorBuffer, MemoryRequirement::HostVisible);
667
668 deMemset(m_colorBufferAlloc->getHostPtr(), 0, static_cast<std::size_t>(m_colorBufferSize));
669 flushAlloc(vk, getDevice(), *m_colorBufferAlloc);
670 }
671
672 protected:
getSparseBufferCreateInfo(const VkBufferUsageFlags usage) const673 VkBufferCreateInfo getSparseBufferCreateInfo (const VkBufferUsageFlags usage) const
674 {
675 VkBufferCreateFlags flags = VK_BUFFER_CREATE_SPARSE_BINDING_BIT;
676 if (m_residency)
677 flags |= VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT;
678 if (m_aliased)
679 flags |= VK_BUFFER_CREATE_SPARSE_ALIASED_BIT;
680
681 VkBufferCreateInfo referenceBufferCreateInfo =
682 {
683 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
684 DE_NULL, // const void* pNext;
685 flags, // VkBufferCreateFlags flags;
686 0u, // override later // VkDeviceSize size;
687 VK_BUFFER_USAGE_TRANSFER_DST_BIT | usage, // VkBufferUsageFlags usage;
688 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
689 0u, // uint32_t queueFamilyIndexCount;
690 DE_NULL, // const uint32_t* pQueueFamilyIndices;
691 };
692
693 if (m_sparseQueue.queueFamilyIndex != m_universalQueue.queueFamilyIndex)
694 {
695 referenceBufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
696 referenceBufferCreateInfo.queueFamilyIndexCount = DE_LENGTH_OF_ARRAY(m_sharedQueueFamilyIndices);
697 referenceBufferCreateInfo.pQueueFamilyIndices = m_sharedQueueFamilyIndices;
698 }
699
700 return referenceBufferCreateInfo;
701 }
702
draw(const VkPrimitiveTopology topology,const VkDescriptorSetLayout descriptorSetLayout=DE_NULL,Renderer::SpecializationMap specMap=Renderer::SpecializationMap (),bool useDeviceGroups=false,deUint32 deviceID=0)703 void draw (const VkPrimitiveTopology topology,
704 const VkDescriptorSetLayout descriptorSetLayout = DE_NULL,
705 Renderer::SpecializationMap specMap = Renderer::SpecializationMap(),
706 bool useDeviceGroups = false,
707 deUint32 deviceID = 0)
708 {
709 const UniquePtr<Renderer> renderer(new Renderer(
710 getDeviceInterface(), getDevice(), getAllocator(), m_universalQueue.queueFamilyIndex, descriptorSetLayout,
711 m_context.getBinaryCollection(), "vert", "frag", *m_colorBuffer, m_renderSize, m_colorFormat, Vec4(1.0f, 0.0f, 0.0f, 1.0f), topology, specMap));
712
713 renderer->draw(getDeviceInterface(), getDevice(), m_universalQueue.queueHandle, *this, useDeviceGroups, deviceID);
714 }
715
isResultImageCorrect(void) const716 bool isResultImageCorrect (void) const
717 {
718 invalidateAlloc(getDeviceInterface(), getDevice(), *m_colorBufferAlloc);
719
720 const tcu::ConstPixelBufferAccess resultImage (mapVkFormat(m_colorFormat), m_renderSize.x(), m_renderSize.y(), 1u, m_colorBufferAlloc->getHostPtr());
721
722 m_context.getTestContext().getLog()
723 << tcu::LogImageSet("Result", "Result") << tcu::LogImage("color0", "", resultImage) << tcu::TestLog::EndImageSet;
724
725 return !imageHasErrorPixels(resultImage);
726 }
727
728 const bool m_aliased;
729 const bool m_residency;
730 const bool m_nonResidentStrict;
731
732 Queue m_sparseQueue;
733 Queue m_universalQueue;
734
735 private:
736 const IVec2 m_renderSize;
737 const VkFormat m_colorFormat;
738 const VkDeviceSize m_colorBufferSize;
739
740 Move<VkBuffer> m_colorBuffer;
741 MovePtr<Allocation> m_colorBufferAlloc;
742
743 deUint32 m_sharedQueueFamilyIndices[2];
744 };
745
initProgramsDrawWithUBO(vk::SourceCollections & programCollection,const TestFlags flags)746 void initProgramsDrawWithUBO (vk::SourceCollections& programCollection, const TestFlags flags)
747 {
748 // Vertex shader
749 {
750 std::ostringstream src;
751 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
752 << "\n"
753 << "layout(location = 0) in vec4 in_position;\n"
754 << "\n"
755 << "out gl_PerVertex {\n"
756 << " vec4 gl_Position;\n"
757 << "};\n"
758 << "\n"
759 << "void main(void)\n"
760 << "{\n"
761 << " gl_Position = in_position;\n"
762 << "}\n";
763
764 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
765 }
766
767 // Fragment shader
768 {
769 const bool aliased = (flags & TEST_FLAG_ALIASED) != 0;
770 const bool residency = (flags & TEST_FLAG_RESIDENCY) != 0;
771 const bool nonResidentStrict = (flags & TEST_FLAG_NON_RESIDENT_STRICT) != 0;
772 const std::string valueExpr = (aliased ? "ivec4(3*(ndx % nonAliasedSize) ^ 127, 0, 0, 0)" : "ivec4(3*ndx ^ 127, 0, 0, 0)");
773
774 std::ostringstream src;
775 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
776 << "\n"
777 << "layout(location = 0) out vec4 o_color;\n"
778 << "\n"
779 << "layout(constant_id = 1) const int dataSize = 1;\n"
780 << "layout(constant_id = 2) const int chunkSize = 1;\n"
781 << "\n"
782 << "layout(set = 0, binding = 0, std140) uniform SparseBuffer {\n"
783 << " ivec4 data[dataSize];\n"
784 << "} ubo;\n"
785 << "\n"
786 << "void main(void)\n"
787 << "{\n"
788 << " const int fragNdx = int(gl_FragCoord.x) + " << RENDER_SIZE << " * int(gl_FragCoord.y);\n"
789 << " const int pageSize = " << RENDER_SIZE << " * " << RENDER_SIZE << ";\n"
790 << " const int numChunks = dataSize / chunkSize;\n";
791
792 if (aliased)
793 src << " const int nonAliasedSize = (numChunks > 1 ? dataSize - chunkSize : dataSize);\n";
794
795 src << " bool ok = true;\n"
796 << "\n"
797 << " for (int ndx = fragNdx; ndx < dataSize; ndx += pageSize)\n"
798 << " {\n";
799
800 if (residency && nonResidentStrict)
801 {
802 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
803 << " ok = ok && (ubo.data[ndx] == ivec4(0));\n"
804 << " else\n"
805 << " ok = ok && (ubo.data[ndx] == " + valueExpr + ");\n";
806 }
807 else if (residency)
808 {
809 src << " if (ndx >= chunkSize && ndx < 2*chunkSize)\n"
810 << " continue;\n"
811 << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
812 }
813 else
814 src << " ok = ok && (ubo.data[ndx] == " << valueExpr << ");\n";
815
816 src << " }\n"
817 << "\n"
818 << " if (ok)\n"
819 << " o_color = vec4(0.0, 1.0, 0.0, 1.0);\n"
820 << " else\n"
821 << " o_color = vec4(1.0, 0.0, 0.0, 1.0);\n"
822 << "}\n";
823
824 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
825 }
826 }
827
828 //! Sparse buffer backing a UBO
829 class UBOTestInstance : public SparseBufferTestInstance
830 {
831 public:
UBOTestInstance(Context & context,const TestFlags flags)832 UBOTestInstance (Context& context, const TestFlags flags)
833 : SparseBufferTestInstance (context, flags)
834 {
835 }
836
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const837 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
838 {
839 const DeviceInterface& vk = getDeviceInterface();
840 const VkDeviceSize vertexOffset = 0ull;
841
842 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
843 vk.cmdBindDescriptorSets(cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
844 vk.cmdDraw (cmdBuffer, 4u, 1u, 0u, 0u);
845 }
846
iterate(void)847 tcu::TestStatus iterate (void)
848 {
849 const InstanceInterface& instance = m_context.getInstanceInterface();
850 const DeviceInterface& vk = getDeviceInterface();
851 MovePtr<SparseAllocation> sparseAllocation;
852 Move<VkBuffer> sparseBuffer;
853 Move<VkBuffer> sparseBufferAliased;
854 bool setupDescriptors = true;
855
856 // Go through all physical devices
857 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
858 {
859 const deUint32 firstDeviceID = physDevID;
860 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
861
862 // Set up the sparse buffer
863 {
864 VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
865 const VkDeviceSize minChunkSize = 512u; // make sure the smallest allocation is at least this big
866 deUint32 numMaxChunks = 0u;
867
868 // Check how many chunks we can allocate given the alignment and size requirements of UBOs
869 {
870 const UniquePtr<SparseAllocation> minAllocation(SparseAllocationBuilder()
871 .addMemoryBind()
872 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize));
873
874 numMaxChunks = deMaxu32(static_cast<deUint32>(m_context.getDeviceProperties().limits.maxUniformBufferRange / minAllocation->resourceSize), 1u);
875 }
876
877 if (numMaxChunks < 4)
878 {
879 sparseAllocation = SparseAllocationBuilder()
880 .addMemoryBind()
881 .build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
882 }
883 else
884 {
885 // Try to use a non-trivial memory allocation scheme to make it different from a non-sparse binding
886 SparseAllocationBuilder builder;
887 builder.addMemoryBind();
888
889 if (m_residency)
890 builder.addResourceHole();
891
892 builder
893 .addMemoryAllocation()
894 .addMemoryHole()
895 .addMemoryBind();
896
897 if (m_aliased)
898 builder.addAliasedMemoryBind(0u, 0u);
899
900 sparseAllocation = builder.build(instance, getPhysicalDevice(secondDeviceID), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, minChunkSize);
901 DE_ASSERT(sparseAllocation->resourceSize <= m_context.getDeviceProperties().limits.maxUniformBufferRange);
902 }
903
904 if (firstDeviceID != secondDeviceID)
905 {
906 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
907 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
908
909 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
910 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
911 {
912 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
913 }
914 }
915
916 // Create the buffer
917 referenceBufferCreateInfo.size = sparseAllocation->resourceSize;
918 sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
919 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBuffer, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
920
921 if (m_aliased)
922 {
923 sparseBufferAliased = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
924 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *sparseBufferAliased, *sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
925 }
926 }
927
928 // Set uniform data
929 {
930 const bool hasAliasedChunk = (m_aliased && sparseAllocation->memoryBinds.size() > 1u);
931 const VkDeviceSize chunkSize = sparseAllocation->resourceSize / sparseAllocation->numResourceChunks;
932 const VkDeviceSize stagingBufferSize = sparseAllocation->resourceSize - (hasAliasedChunk ? chunkSize : 0);
933 const deUint32 numBufferEntries = static_cast<deUint32>(stagingBufferSize / sizeof(IVec4));
934
935 const Unique<VkBuffer> stagingBuffer (makeBuffer(vk, getDevice(), makeBufferCreateInfo(stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT)));
936 const UniquePtr<Allocation> stagingBufferAlloc (bindBuffer(vk, getDevice(), getAllocator(), *stagingBuffer, MemoryRequirement::HostVisible));
937
938 {
939 // If aliased chunk is used, the staging buffer is smaller than the sparse buffer and we don't overwrite the last chunk
940 IVec4* const pData = static_cast<IVec4*>(stagingBufferAlloc->getHostPtr());
941 for (deUint32 i = 0; i < numBufferEntries; ++i)
942 pData[i] = IVec4(3*i ^ 127, 0, 0, 0);
943
944 flushAlloc(vk, getDevice(), *stagingBufferAlloc);
945
946 const VkBufferCopy copyRegion =
947 {
948 0ull, // VkDeviceSize srcOffset;
949 0ull, // VkDeviceSize dstOffset;
950 stagingBufferSize, // VkDeviceSize size;
951 };
952
953 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
954 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
955
956 beginCommandBuffer (vk, *cmdBuffer);
957 vk.cmdCopyBuffer (*cmdBuffer, *stagingBuffer, *sparseBuffer, 1u, ©Region);
958 endCommandBuffer (vk, *cmdBuffer);
959
960 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
961 // Once the fence is signaled, the write is also available to the aliasing buffer.
962 }
963 }
964
965 // Make sure that we don't try to access a larger range than is allowed. This only applies to a single chunk case.
966 const deUint32 maxBufferRange = deMinu32(static_cast<deUint32>(sparseAllocation->resourceSize), m_context.getDeviceProperties().limits.maxUniformBufferRange);
967
968 // Descriptor sets
969 {
970 // Setup only once
971 if (setupDescriptors)
972 {
973 m_descriptorSetLayout = DescriptorSetLayoutBuilder()
974 .addSingleBinding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_FRAGMENT_BIT)
975 .build(vk, getDevice());
976
977 m_descriptorPool = DescriptorPoolBuilder()
978 .addType(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER)
979 .build(vk, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
980
981 m_descriptorSet = makeDescriptorSet(vk, getDevice(), *m_descriptorPool, *m_descriptorSetLayout);
982 setupDescriptors = false;
983 }
984
985 const VkBuffer buffer = (m_aliased ? *sparseBufferAliased : *sparseBuffer);
986 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(buffer, 0ull, maxBufferRange);
987
988 DescriptorSetUpdateBuilder()
989 .writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, &sparseBufferInfo)
990 .update(vk, getDevice());
991 }
992
993 // Vertex data
994 {
995 const Vec4 vertexData[] =
996 {
997 Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
998 Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
999 Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
1000 Vec4( 1.0f, 1.0f, 0.0f, 1.0f),
1001 };
1002
1003 const VkDeviceSize vertexBufferSize = sizeof(vertexData);
1004
1005 m_vertexBuffer = makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1006 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1007
1008 deMemcpy(m_vertexBufferAlloc->getHostPtr(), &vertexData[0], vertexBufferSize);
1009 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1010 }
1011
1012 // Draw
1013 {
1014 std::vector<deInt32> specializationData;
1015 {
1016 const deUint32 numBufferEntries = maxBufferRange / static_cast<deUint32>(sizeof(IVec4));
1017 const deUint32 numEntriesPerChunk = numBufferEntries / sparseAllocation->numResourceChunks;
1018
1019 specializationData.push_back(numBufferEntries);
1020 specializationData.push_back(numEntriesPerChunk);
1021 }
1022
1023 const VkSpecializationMapEntry specMapEntries[] =
1024 {
1025 {
1026 1u, // uint32_t constantID;
1027 0u, // uint32_t offset;
1028 sizeof(deInt32), // size_t size;
1029 },
1030 {
1031 2u, // uint32_t constantID;
1032 sizeof(deInt32), // uint32_t offset;
1033 sizeof(deInt32), // size_t size;
1034 },
1035 };
1036
1037 const VkSpecializationInfo specInfo =
1038 {
1039 DE_LENGTH_OF_ARRAY(specMapEntries), // uint32_t mapEntryCount;
1040 specMapEntries, // const VkSpecializationMapEntry* pMapEntries;
1041 sizeInBytes(specializationData), // size_t dataSize;
1042 getDataOrNullptr(specializationData), // const void* pData;
1043 };
1044
1045 Renderer::SpecializationMap specMap;
1046 specMap[VK_SHADER_STAGE_FRAGMENT_BIT] = &specInfo;
1047
1048 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, *m_descriptorSetLayout, specMap, usingDeviceGroups(), firstDeviceID);
1049 }
1050
1051 if(!isResultImageCorrect())
1052 return tcu::TestStatus::fail("Some buffer values were incorrect");
1053 }
1054 return tcu::TestStatus::pass("Pass");
1055 }
1056
1057 private:
1058 Move<VkBuffer> m_vertexBuffer;
1059 MovePtr<Allocation> m_vertexBufferAlloc;
1060
1061 Move<VkDescriptorSetLayout> m_descriptorSetLayout;
1062 Move<VkDescriptorPool> m_descriptorPool;
1063 Move<VkDescriptorSet> m_descriptorSet;
1064 };
1065
initProgramsDrawGrid(vk::SourceCollections & programCollection,const TestFlags flags)1066 void initProgramsDrawGrid (vk::SourceCollections& programCollection, const TestFlags flags)
1067 {
1068 DE_UNREF(flags);
1069
1070 // Vertex shader
1071 {
1072 std::ostringstream src;
1073 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1074 << "\n"
1075 << "layout(location = 0) in vec4 in_position;\n"
1076 << "layout(location = 0) out int out_ndx;\n"
1077 << "\n"
1078 << "out gl_PerVertex {\n"
1079 << " vec4 gl_Position;\n"
1080 << "};\n"
1081 << "\n"
1082 << "void main(void)\n"
1083 << "{\n"
1084 << " gl_Position = in_position;\n"
1085 << " out_ndx = gl_VertexIndex;\n"
1086 << "}\n";
1087
1088 programCollection.glslSources.add("vert") << glu::VertexSource(src.str());
1089 }
1090
1091 // Fragment shader
1092 {
1093 std::ostringstream src;
1094 src << glu::getGLSLVersionDeclaration(glu::GLSL_VERSION_450) << "\n"
1095 << "\n"
1096 << "layout(location = 0) flat in int in_ndx;\n"
1097 << "layout(location = 0) out vec4 o_color;\n"
1098 << "\n"
1099 << "void main(void)\n"
1100 << "{\n"
1101 << " if (in_ndx % 2 == 0)\n"
1102 << " o_color = vec4(vec3(1.0), 1.0);\n"
1103 << " else\n"
1104 << " o_color = vec4(vec3(0.75), 1.0);\n"
1105 << "}\n";
1106
1107 programCollection.glslSources.add("frag") << glu::FragmentSource(src.str());
1108 }
1109 }
1110
1111 //! Generate vertex positions for a grid of tiles composed of two triangles each (6 vertices)
generateGrid(void * pRawData,const float step,const float ox,const float oy,const deUint32 numX,const deUint32 numY,const float z=0.0f)1112 void generateGrid (void* pRawData, const float step, const float ox, const float oy, const deUint32 numX, const deUint32 numY, const float z = 0.0f)
1113 {
1114 typedef Vec4 (*TilePtr)[6];
1115
1116 TilePtr const pData = static_cast<TilePtr>(pRawData);
1117 {
1118 for (deUint32 iy = 0; iy < numY; ++iy)
1119 for (deUint32 ix = 0; ix < numX; ++ix)
1120 {
1121 const deUint32 ndx = ix + numX * iy;
1122 const float x = ox + step * static_cast<float>(ix);
1123 const float y = oy + step * static_cast<float>(iy);
1124
1125 pData[ndx][0] = Vec4(x + step, y, z, 1.0f);
1126 pData[ndx][1] = Vec4(x, y, z, 1.0f);
1127 pData[ndx][2] = Vec4(x, y + step, z, 1.0f);
1128
1129 pData[ndx][3] = Vec4(x, y + step, z, 1.0f);
1130 pData[ndx][4] = Vec4(x + step, y + step, z, 1.0f);
1131 pData[ndx][5] = Vec4(x + step, y, z, 1.0f);
1132 }
1133 }
1134 }
1135
1136 //! Base test for a sparse buffer backing a vertex/index buffer
1137 class DrawGridTestInstance : public SparseBufferTestInstance
1138 {
1139 public:
DrawGridTestInstance(Context & context,const TestFlags flags,const VkBufferUsageFlags usage,const VkDeviceSize minChunkSize)1140 DrawGridTestInstance (Context& context, const TestFlags flags, const VkBufferUsageFlags usage, const VkDeviceSize minChunkSize)
1141 : SparseBufferTestInstance (context, flags)
1142 , m_bufferUsage (usage)
1143 , m_minChunkSize (minChunkSize)
1144 {
1145 }
1146
createResources(deUint32 memoryDeviceIndex)1147 void createResources (deUint32 memoryDeviceIndex)
1148 {
1149 const InstanceInterface& instance = m_context.getInstanceInterface();
1150 const DeviceInterface& vk = getDeviceInterface();
1151 VkBufferCreateInfo referenceBufferCreateInfo = getSparseBufferCreateInfo(m_bufferUsage);
1152
1153 {
1154 // Allocate two chunks, each covering half of the viewport
1155 SparseAllocationBuilder builder;
1156 builder.addMemoryBind();
1157
1158 if (m_residency)
1159 builder.addResourceHole();
1160
1161 builder
1162 .addMemoryAllocation()
1163 .addMemoryHole()
1164 .addMemoryBind();
1165
1166 if (m_aliased)
1167 builder.addAliasedMemoryBind(0u, 0u);
1168
1169 m_sparseAllocation = builder.build(instance, getPhysicalDevice(memoryDeviceIndex), vk, getDevice(), getAllocator(), referenceBufferCreateInfo, m_minChunkSize);
1170 }
1171
1172 // Create the buffer
1173 referenceBufferCreateInfo.size = m_sparseAllocation->resourceSize;
1174 m_sparseBuffer = makeBuffer(vk, getDevice(), referenceBufferCreateInfo);
1175
1176 m_perDrawBufferOffset = m_sparseAllocation->resourceSize / m_sparseAllocation->numResourceChunks;
1177 m_stagingBufferSize = 2 * m_perDrawBufferOffset;
1178 m_stagingBuffer = makeBuffer(vk, getDevice(), makeBufferCreateInfo(m_stagingBufferSize, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
1179 m_stagingBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_stagingBuffer, MemoryRequirement::HostVisible);
1180 }
1181
iterate(void)1182 tcu::TestStatus iterate (void)
1183 {
1184 const DeviceInterface& vk = getDeviceInterface();
1185
1186 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
1187 {
1188 const deUint32 firstDeviceID = physDevID;
1189 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
1190
1191 createResources(secondDeviceID);
1192
1193 if (firstDeviceID != secondDeviceID)
1194 {
1195 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
1196 vk.getDeviceGroupPeerMemoryFeatures(getDevice(), m_sparseAllocation->heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
1197
1198 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_DST_BIT) == 0) ||
1199 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT) == 0))
1200 {
1201 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_DST and GENERIC_SRC");
1202 }
1203 }
1204
1205 // Bind the memory
1206 bindSparseBuffer(vk, getDevice(), m_sparseQueue.queueHandle, *m_sparseBuffer, *m_sparseAllocation, usingDeviceGroups(), firstDeviceID, secondDeviceID);
1207
1208 initializeBuffers();
1209
1210 // Upload to the sparse buffer
1211 {
1212 flushAlloc(vk, getDevice(), *m_stagingBufferAlloc);
1213
1214 VkDeviceSize firstChunkOffset = 0ull;
1215 VkDeviceSize secondChunkOffset = m_perDrawBufferOffset;
1216
1217 if (m_residency)
1218 secondChunkOffset += m_perDrawBufferOffset;
1219
1220 if (m_aliased)
1221 firstChunkOffset = secondChunkOffset + m_perDrawBufferOffset;
1222
1223 const VkBufferCopy copyRegions[] =
1224 {
1225 {
1226 0ull, // VkDeviceSize srcOffset;
1227 firstChunkOffset, // VkDeviceSize dstOffset;
1228 m_perDrawBufferOffset, // VkDeviceSize size;
1229 },
1230 {
1231 m_perDrawBufferOffset, // VkDeviceSize srcOffset;
1232 secondChunkOffset, // VkDeviceSize dstOffset;
1233 m_perDrawBufferOffset, // VkDeviceSize size;
1234 },
1235 };
1236
1237 const Unique<VkCommandPool> cmdPool (makeCommandPool(vk, getDevice(), m_universalQueue.queueFamilyIndex));
1238 const Unique<VkCommandBuffer> cmdBuffer (allocateCommandBuffer(vk, getDevice(), *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
1239
1240 beginCommandBuffer (vk, *cmdBuffer);
1241 vk.cmdCopyBuffer (*cmdBuffer, *m_stagingBuffer, *m_sparseBuffer, DE_LENGTH_OF_ARRAY(copyRegions), copyRegions);
1242 endCommandBuffer (vk, *cmdBuffer);
1243
1244 submitCommandsAndWait(vk, getDevice(), m_universalQueue.queueHandle, *cmdBuffer, 0u, DE_NULL, DE_NULL, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
1245 }
1246
1247
1248 Renderer::SpecializationMap specMap;
1249 draw(VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, DE_NULL, specMap, usingDeviceGroups(), firstDeviceID);
1250
1251 if(!isResultImageCorrect())
1252 return tcu::TestStatus::fail("Some buffer values were incorrect");
1253 }
1254 return tcu::TestStatus::pass("Pass");
1255 }
1256
1257 protected:
1258 virtual void initializeBuffers (void) = 0;
1259
1260 const VkBufferUsageFlags m_bufferUsage;
1261 const VkDeviceSize m_minChunkSize;
1262
1263 VkDeviceSize m_perDrawBufferOffset;
1264
1265 VkDeviceSize m_stagingBufferSize;
1266 Move<VkBuffer> m_stagingBuffer;
1267 MovePtr<Allocation> m_stagingBufferAlloc;
1268
1269 MovePtr<SparseAllocation> m_sparseAllocation;
1270 Move<VkBuffer> m_sparseBuffer;
1271 };
1272
1273 //! Sparse buffer backing a vertex input buffer
1274 class VertexBufferTestInstance : public DrawGridTestInstance
1275 {
1276 public:
VertexBufferTestInstance(Context & context,const TestFlags flags)1277 VertexBufferTestInstance (Context& context, const TestFlags flags)
1278 : DrawGridTestInstance (context,
1279 flags,
1280 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
1281 GRID_SIZE * GRID_SIZE * 6 * sizeof(Vec4))
1282 {
1283 }
1284
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1285 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1286 {
1287 DE_UNREF(pipelineLayout);
1288
1289 m_context.getTestContext().getLog()
1290 << tcu::TestLog::Message << "Drawing a grid of triangles backed by a sparse vertex buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1291
1292 const DeviceInterface& vk = getDeviceInterface();
1293 const deUint32 vertexCount = 6 * (GRID_SIZE * GRID_SIZE) / 2;
1294 VkDeviceSize vertexOffset = 0ull;
1295
1296 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1297 vk.cmdDraw (cmdBuffer, vertexCount, 1u, 0u, 0u);
1298
1299 vertexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1300
1301 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_sparseBuffer.get(), &vertexOffset);
1302 vk.cmdDraw (cmdBuffer, vertexCount, 1u, 0u, 0u);
1303 }
1304
initializeBuffers(void)1305 void initializeBuffers (void)
1306 {
1307 deUint8* pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr());
1308 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1309
1310 // Prepare data for two draw calls
1311 generateGrid(pData, step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE/2);
1312 generateGrid(pData + m_perDrawBufferOffset, step, -1.0f, 0.0f, GRID_SIZE, GRID_SIZE/2);
1313 }
1314 };
1315
1316 //! Sparse buffer backing an index buffer
1317 class IndexBufferTestInstance : public DrawGridTestInstance
1318 {
1319 public:
IndexBufferTestInstance(Context & context,const TestFlags flags)1320 IndexBufferTestInstance (Context& context, const TestFlags flags)
1321 : DrawGridTestInstance (context,
1322 flags,
1323 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
1324 GRID_SIZE * GRID_SIZE * 6 * sizeof(deUint32))
1325 , m_halfVertexCount (6 * (GRID_SIZE * GRID_SIZE) / 2)
1326 {
1327 }
1328
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1329 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1330 {
1331 DE_UNREF(pipelineLayout);
1332
1333 m_context.getTestContext().getLog()
1334 << tcu::TestLog::Message << "Drawing a grid of triangles from a sparse index buffer. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1335
1336 const DeviceInterface& vk = getDeviceInterface();
1337 const VkDeviceSize vertexOffset = 0ull;
1338 VkDeviceSize indexOffset = 0ull;
1339
1340 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1341
1342 vk.cmdBindIndexBuffer (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1343 vk.cmdDrawIndexed (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1344
1345 indexOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1346
1347 vk.cmdBindIndexBuffer (cmdBuffer, *m_sparseBuffer, indexOffset, VK_INDEX_TYPE_UINT32);
1348 vk.cmdDrawIndexed (cmdBuffer, m_halfVertexCount, 1u, 0u, 0, 0u);
1349 }
1350
initializeBuffers(void)1351 void initializeBuffers (void)
1352 {
1353 // Vertex buffer
1354 const DeviceInterface& vk = getDeviceInterface();
1355 const VkDeviceSize vertexBufferSize = 2 * m_halfVertexCount * sizeof(Vec4);
1356 m_vertexBuffer = makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1357 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1358
1359 {
1360 const float step = 2.0f / static_cast<float>(GRID_SIZE);
1361
1362 generateGrid(m_vertexBufferAlloc->getHostPtr(), step, -1.0f, -1.0f, GRID_SIZE, GRID_SIZE);
1363
1364 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1365 }
1366
1367 // Sparse index buffer
1368 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1369 {
1370 deUint8* const pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1371 deUint32* const pIndexData = reinterpret_cast<deUint32*>(pData);
1372 const deUint32 ndxBase = chunkNdx * m_halfVertexCount;
1373
1374 for (deUint32 i = 0u; i < m_halfVertexCount; ++i)
1375 pIndexData[i] = ndxBase + i;
1376 }
1377 }
1378
1379 private:
1380 const deUint32 m_halfVertexCount;
1381 Move<VkBuffer> m_vertexBuffer;
1382 MovePtr<Allocation> m_vertexBufferAlloc;
1383 };
1384
1385 //! Draw from a sparse indirect buffer
1386 class IndirectBufferTestInstance : public DrawGridTestInstance
1387 {
1388 public:
IndirectBufferTestInstance(Context & context,const TestFlags flags)1389 IndirectBufferTestInstance (Context& context, const TestFlags flags)
1390 : DrawGridTestInstance (context,
1391 flags,
1392 VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT,
1393 sizeof(VkDrawIndirectCommand))
1394 {
1395 }
1396
rendererDraw(const VkPipelineLayout pipelineLayout,const VkCommandBuffer cmdBuffer) const1397 void rendererDraw (const VkPipelineLayout pipelineLayout, const VkCommandBuffer cmdBuffer) const
1398 {
1399 DE_UNREF(pipelineLayout);
1400
1401 m_context.getTestContext().getLog()
1402 << tcu::TestLog::Message << "Drawing two triangles covering the whole viewport. There should be no red pixels visible." << tcu::TestLog::EndMessage;
1403
1404 const DeviceInterface& vk = getDeviceInterface();
1405 const VkDeviceSize vertexOffset = 0ull;
1406 VkDeviceSize indirectOffset = 0ull;
1407
1408 vk.cmdBindVertexBuffers (cmdBuffer, 0u, 1u, &m_vertexBuffer.get(), &vertexOffset);
1409 vk.cmdDrawIndirect (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1410
1411 indirectOffset += m_perDrawBufferOffset * (m_residency ? 2 : 1);
1412
1413 vk.cmdDrawIndirect (cmdBuffer, *m_sparseBuffer, indirectOffset, 1u, 0u);
1414 }
1415
initializeBuffers(void)1416 void initializeBuffers (void)
1417 {
1418 // Vertex buffer
1419 const DeviceInterface& vk = getDeviceInterface();
1420 const VkDeviceSize vertexBufferSize = 2 * 3 * sizeof(Vec4);
1421 m_vertexBuffer = makeBuffer(vk, getDevice(), makeBufferCreateInfo(vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1422 m_vertexBufferAlloc = bindBuffer(vk, getDevice(), getAllocator(), *m_vertexBuffer, MemoryRequirement::HostVisible);
1423
1424 {
1425 generateGrid(m_vertexBufferAlloc->getHostPtr(), 2.0f, -1.0f, -1.0f, 1, 1);
1426 flushAlloc(vk, getDevice(), *m_vertexBufferAlloc);
1427 }
1428
1429 // Indirect buffer
1430 for (deUint32 chunkNdx = 0u; chunkNdx < 2; ++chunkNdx)
1431 {
1432 deUint8* const pData = static_cast<deUint8*>(m_stagingBufferAlloc->getHostPtr()) + chunkNdx * m_perDrawBufferOffset;
1433 VkDrawIndirectCommand* const pCmdData = reinterpret_cast<VkDrawIndirectCommand*>(pData);
1434
1435 pCmdData->firstVertex = 3u * chunkNdx;
1436 pCmdData->firstInstance = 0u;
1437 pCmdData->vertexCount = 3u;
1438 pCmdData->instanceCount = 1u;
1439 }
1440 }
1441
1442 private:
1443 Move<VkBuffer> m_vertexBuffer;
1444 MovePtr<Allocation> m_vertexBufferAlloc;
1445 };
1446
1447 //! Similar to the class in vktTestCaseUtil.hpp, but uses Arg0 directly rather than through a InstanceFunction1
1448 template<typename Arg0>
1449 class FunctionProgramsSimple1
1450 {
1451 public:
1452 typedef void (*Function) (vk::SourceCollections& dst, Arg0 arg0);
FunctionProgramsSimple1(Function func)1453 FunctionProgramsSimple1 (Function func) : m_func(func) {}
init(vk::SourceCollections & dst,const Arg0 & arg0) const1454 void init (vk::SourceCollections& dst, const Arg0& arg0) const { m_func(dst, arg0); }
1455
1456 private:
1457 const Function m_func;
1458 };
1459
1460 //! Convenience function to create a TestCase based on a freestanding initPrograms and a TestInstance implementation
1461 template<typename TestInstanceT, typename Arg0>
createTestInstanceWithPrograms(tcu::TestContext & testCtx,const std::string & name,const std::string & desc,typename FunctionProgramsSimple1<Arg0>::Function initPrograms,Arg0 arg0)1462 TestCase* createTestInstanceWithPrograms (tcu::TestContext& testCtx,
1463 const std::string& name,
1464 const std::string& desc,
1465 typename FunctionProgramsSimple1<Arg0>::Function initPrograms,
1466 Arg0 arg0)
1467 {
1468 return new InstanceFactory1<TestInstanceT, Arg0, FunctionProgramsSimple1<Arg0> >(
1469 testCtx, tcu::NODETYPE_SELF_VALIDATE, name, desc, FunctionProgramsSimple1<Arg0>(initPrograms), arg0);
1470 }
1471
populateTestGroup(tcu::TestCaseGroup * parentGroup)1472 void populateTestGroup (tcu::TestCaseGroup* parentGroup)
1473 {
1474 const struct
1475 {
1476 std::string name;
1477 TestFlags flags;
1478 } groups[] =
1479 {
1480 { "sparse_binding", 0u, },
1481 { "sparse_binding_aliased", TEST_FLAG_ALIASED, },
1482 { "sparse_residency", TEST_FLAG_RESIDENCY, },
1483 { "sparse_residency_aliased", TEST_FLAG_RESIDENCY | TEST_FLAG_ALIASED, },
1484 { "sparse_residency_non_resident_strict", TEST_FLAG_RESIDENCY | TEST_FLAG_NON_RESIDENT_STRICT,},
1485 };
1486
1487 const int numGroupsIncludingNonResidentStrict = DE_LENGTH_OF_ARRAY(groups);
1488 const int numGroupsDefaultList = numGroupsIncludingNonResidentStrict - 1;
1489 std::string devGroupPrefix = "device_group_";
1490
1491 // Transfer
1492 {
1493 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "transfer", ""));
1494 {
1495 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding", ""));
1496 addBufferSparseBindingTests(subGroup.get(), false);
1497 group->addChild(subGroup.release());
1498
1499 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding", ""));
1500 addBufferSparseBindingTests(subGroupDeviceGroups.get(), true);
1501 group->addChild(subGroupDeviceGroups.release());
1502 }
1503 parentGroup->addChild(group.release());
1504 }
1505
1506 // SSBO
1507 {
1508 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ssbo", ""));
1509 {
1510 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_binding_aliased", ""));
1511 addBufferSparseMemoryAliasingTests(subGroup.get(), false);
1512 group->addChild(subGroup.release());
1513
1514 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_binding_aliased", ""));
1515 addBufferSparseMemoryAliasingTests(subGroupDeviceGroups.get(), true);
1516 group->addChild(subGroupDeviceGroups.release());
1517 }
1518 {
1519 MovePtr<tcu::TestCaseGroup> subGroup(new tcu::TestCaseGroup(parentGroup->getTestContext(), "sparse_residency", ""));
1520 addBufferSparseResidencyTests(subGroup.get(), false);
1521 group->addChild(subGroup.release());
1522
1523 MovePtr<tcu::TestCaseGroup> subGroupDeviceGroups(new tcu::TestCaseGroup(parentGroup->getTestContext(), "device_group_sparse_residency", ""));
1524 addBufferSparseResidencyTests(subGroupDeviceGroups.get(), true);
1525 group->addChild(subGroupDeviceGroups.release());
1526 }
1527 parentGroup->addChild(group.release());
1528 }
1529
1530 // UBO
1531 {
1532 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "ubo", ""));
1533
1534 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1535 {
1536 group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags));
1537 }
1538 for (int groupNdx = 0u; groupNdx < numGroupsIncludingNonResidentStrict; ++groupNdx)
1539 {
1540 group->addChild(createTestInstanceWithPrograms<UBOTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawWithUBO, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1541 }
1542 parentGroup->addChild(group.release());
1543 }
1544
1545 // Vertex buffer
1546 {
1547 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "vertex_buffer", ""));
1548
1549 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1550 {
1551 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1552 }
1553 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1554 {
1555 group->addChild(createTestInstanceWithPrograms<VertexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1556 }
1557
1558 parentGroup->addChild(group.release());
1559 }
1560
1561 // Index buffer
1562 {
1563 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "index_buffer", ""));
1564
1565 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1566 {
1567 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1568 }
1569 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1570 {
1571 group->addChild(createTestInstanceWithPrograms<IndexBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1572 }
1573
1574 parentGroup->addChild(group.release());
1575 }
1576
1577 // Indirect buffer
1578 {
1579 MovePtr<tcu::TestCaseGroup> group(new tcu::TestCaseGroup(parentGroup->getTestContext(), "indirect_buffer", ""));
1580
1581 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1582 {
1583 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), groups[groupNdx].name.c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags));
1584 }
1585 for (int groupNdx = 0u; groupNdx < numGroupsDefaultList; ++groupNdx)
1586 {
1587 group->addChild(createTestInstanceWithPrograms<IndirectBufferTestInstance>(group->getTestContext(), (devGroupPrefix + groups[groupNdx].name).c_str(), "", initProgramsDrawGrid, groups[groupNdx].flags | TEST_FLAG_ENABLE_DEVICE_GROUPS));
1588 }
1589
1590 parentGroup->addChild(group.release());
1591 }
1592 }
1593
1594 } // anonymous ns
1595
createSparseBufferTests(tcu::TestContext & testCtx)1596 tcu::TestCaseGroup* createSparseBufferTests (tcu::TestContext& testCtx)
1597 {
1598 return createTestGroup(testCtx, "buffer", "Sparse buffer usage tests", populateTestGroup);
1599 }
1600
1601 } // sparse
1602 } // vkt
1603