1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktSparseResourcesBufferSparseResidency.cpp
21 * \brief Sparse partially resident buffers tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkQueryUtil.hpp"
37 #include "vkBuilderUtil.hpp"
38 #include "vkTypeUtil.hpp"
39
40 #include "deStringUtil.hpp"
41 #include "deUniquePtr.hpp"
42
43 #include <string>
44 #include <vector>
45
46 using namespace vk;
47
48 namespace vkt
49 {
50 namespace sparse
51 {
52 namespace
53 {
54
55 enum ShaderParameters
56 {
57 SIZE_OF_UINT_IN_SHADER = 4u,
58 };
59
60 class BufferSparseResidencyCase : public TestCase
61 {
62 public:
63 BufferSparseResidencyCase (tcu::TestContext& testCtx,
64 const std::string& name,
65 const std::string& description,
66 const deUint32 bufferSize,
67 const glu::GLSLVersion glslVersion);
68
69 void initPrograms (SourceCollections& sourceCollections) const;
70 TestInstance* createInstance (Context& context) const;
71
72 private:
73 const deUint32 m_bufferSize;
74 const glu::GLSLVersion m_glslVersion;
75 };
76
BufferSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 bufferSize,const glu::GLSLVersion glslVersion)77 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext& testCtx,
78 const std::string& name,
79 const std::string& description,
80 const deUint32 bufferSize,
81 const glu::GLSLVersion glslVersion)
82 : TestCase (testCtx, name, description)
83 , m_bufferSize (bufferSize)
84 , m_glslVersion (glslVersion)
85 {
86 }
87
initPrograms(SourceCollections & sourceCollections) const88 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
89 {
90 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
91 const deUint32 iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
92
93 std::ostringstream src;
94
95 src << versionDecl << "\n"
96 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
97 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
98 << "{\n"
99 << " uint data[];\n"
100 << "} sb_in;\n"
101 << "\n"
102 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
103 << "{\n"
104 << " uint result[];\n"
105 << "} sb_out;\n"
106 << "\n"
107 << "void main (void)\n"
108 << "{\n"
109 << " for(int i=0; i<" << iterationsCount << "; ++i) \n"
110 << " {\n"
111 << " sb_out.result[i] = sb_in.data[i];"
112 << " }\n"
113 << "}\n";
114
115 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
116 }
117
118 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
119 {
120 public:
121 BufferSparseResidencyInstance (Context& context,
122 const deUint32 bufferSize);
123
124 tcu::TestStatus iterate (void);
125
126 private:
127 const deUint32 m_bufferSize;
128 };
129
BufferSparseResidencyInstance(Context & context,const deUint32 bufferSize)130 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context& context,
131 const deUint32 bufferSize)
132 : SparseResourcesBaseInstance (context)
133 , m_bufferSize (bufferSize)
134 {
135 }
136
iterate(void)137 tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
138 {
139 const InstanceInterface& instance = m_context.getInstanceInterface();
140 const DeviceInterface& deviceInterface = m_context.getDeviceInterface();
141 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
142
143 VkPhysicalDeviceFeatures deviceFeatures;
144 instance.getPhysicalDeviceFeatures(physicalDevice, &deviceFeatures);
145
146 if (deviceFeatures.sparseResidencyBuffer == false)
147 {
148 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Sparse partially resident buffers not supported");
149 }
150
151 VkPhysicalDeviceProperties deviceProperties;
152 instance.getPhysicalDeviceProperties(physicalDevice, &deviceProperties);
153
154 QueueRequirementsVec queueRequirements;
155 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
156 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
157
158 // Create logical device supporting both sparse and compute oprations
159 if (!createDeviceSupportingQueues(queueRequirements))
160 {
161 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "Could not create device supporting sparse and compute queue");
162 }
163
164 const VkPhysicalDeviceMemoryProperties deviceMemoryProperties = getPhysicalDeviceMemoryProperties(instance, physicalDevice);
165
166 // Create memory allocator for device
167 const de::UniquePtr<Allocator> allocator(new SimpleAllocator(deviceInterface, *m_logicalDevice, deviceMemoryProperties));
168
169 // Create queue supporting sparse binding operations
170 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
171
172 // Create queue supporting compute and transfer operations
173 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
174
175 VkBufferCreateInfo bufferCreateInfo =
176 {
177 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
178 DE_NULL, // const void* pNext;
179 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags;
180 m_bufferSize, // VkDeviceSize size;
181 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
182 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage;
183 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
184 0u, // deUint32 queueFamilyIndexCount;
185 DE_NULL // const deUint32* pQueueFamilyIndices;
186 };
187
188 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
189
190 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
191 {
192 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
193 bufferCreateInfo.queueFamilyIndexCount = 2u;
194 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
195 }
196
197 // Create sparse buffer
198 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, *m_logicalDevice, &bufferCreateInfo));
199
200 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, *m_logicalDevice, *sparseBuffer);
201
202 if (bufferMemRequirements.size > deviceProperties.limits.sparseAddressSpaceSize)
203 {
204 return tcu::TestStatus(QP_TEST_RESULT_NOT_SUPPORTED, "Required memory size for sparse resources exceeds device limits");
205 }
206
207 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
208
209 typedef de::SharedPtr< Unique<VkDeviceMemory> > DeviceMemoryUniquePtr;
210
211 std::vector<VkSparseMemoryBind> sparseMemoryBinds;
212 std::vector<DeviceMemoryUniquePtr> deviceMemUniquePtrVec;
213 const deUint32 numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
214 const deUint32 memoryType = findMatchingMemoryType(deviceMemoryProperties, bufferMemRequirements, MemoryRequirement::Any);
215
216 if (memoryType == NO_MATCH_FOUND)
217 {
218 return tcu::TestStatus(QP_TEST_RESULT_FAIL, "No matching memory type found");
219 }
220
221 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
222 {
223 const VkMemoryAllocateInfo allocInfo =
224 {
225 VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, // VkStructureType sType;
226 DE_NULL, // const void* pNext;
227 bufferMemRequirements.alignment, // VkDeviceSize allocationSize;
228 memoryType, // deUint32 memoryTypeIndex;
229 };
230
231 VkDeviceMemory deviceMemory = 0;
232 VK_CHECK(deviceInterface.allocateMemory(*m_logicalDevice, &allocInfo, DE_NULL, &deviceMemory));
233
234 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(deviceMemory), Deleter<VkDeviceMemory>(deviceInterface, *m_logicalDevice, DE_NULL))));
235
236 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind
237 (
238 bufferMemRequirements.alignment * sparseBindNdx, //VkDeviceSize resourceOffset
239 bufferMemRequirements.alignment, //VkDeviceSize size
240 deviceMemory, //VkDeviceMemory memory
241 0u, //VkDeviceSize memoryOffset
242 0u //VkSparseMemoryBindFlags flags
243 );
244
245 sparseMemoryBinds.push_back(sparseMemoryBind);
246 }
247
248 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo
249 (
250 *sparseBuffer, //VkBuffer buffer;
251 static_cast<deUint32>(sparseMemoryBinds.size()), //deUint32 bindCount;
252 &sparseMemoryBinds[0] //const VkSparseMemoryBind* Binds;
253 );
254
255 const Unique<VkSemaphore> bufferMemoryBindSemaphore(makeSemaphore(deviceInterface, *m_logicalDevice));
256
257 const VkBindSparseInfo bindSparseInfo =
258 {
259 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
260 DE_NULL, //const void* pNext;
261 0u, //deUint32 waitSemaphoreCount;
262 DE_NULL, //const VkSemaphore* pWaitSemaphores;
263 1u, //deUint32 bufferBindCount;
264 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
265 0u, //deUint32 imageOpaqueBindCount;
266 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
267 0u, //deUint32 imageBindCount;
268 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
269 1u, //deUint32 signalSemaphoreCount;
270 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
271 };
272
273 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
274
275 // Create input buffer
276 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
277 de::UniquePtr<Buffer> inputBuffer(new Buffer(deviceInterface, *m_logicalDevice, *allocator, inputBufferCreateInfo, MemoryRequirement::HostVisible));
278
279 std::vector<deUint8> referenceData;
280 referenceData.resize(m_bufferSize);
281
282 for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
283 {
284 referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
285 }
286
287 deMemcpy(inputBuffer->getAllocation().getHostPtr(), &referenceData[0], m_bufferSize);
288
289 flushMappedMemoryRange(deviceInterface, *m_logicalDevice, inputBuffer->getAllocation().getMemory(), inputBuffer->getAllocation().getOffset(), m_bufferSize);
290
291 // Create output buffer
292 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
293 de::UniquePtr<Buffer> outputBuffer(new Buffer(deviceInterface, *m_logicalDevice, *allocator, outputBufferCreateInfo, MemoryRequirement::HostVisible));
294
295 // Create command buffer for compute and data transfer oparations
296 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, *m_logicalDevice, computeQueue.queueFamilyIndex));
297 const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, *m_logicalDevice, *commandPool));
298
299 // Start recording compute and transfer commands
300 beginCommandBuffer(deviceInterface, *commandBuffer);
301
302 // Create descriptor set
303 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
304 DescriptorSetLayoutBuilder()
305 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
306 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
307 .build(deviceInterface, *m_logicalDevice));
308
309 // Create compute pipeline
310 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, *m_logicalDevice, m_context.getBinaryCollection().get("comp"), DE_NULL));
311 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, *m_logicalDevice, *descriptorSetLayout));
312 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, *m_logicalDevice, *pipelineLayout, *shaderModule));
313
314 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
315
316 const Unique<VkDescriptorPool> descriptorPool(
317 DescriptorPoolBuilder()
318 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
319 .build(deviceInterface, *m_logicalDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
320
321 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, *m_logicalDevice, *descriptorPool, *descriptorSetLayout));
322
323 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(inputBuffer->get(), 0ull, m_bufferSize);
324 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
325
326 DescriptorSetUpdateBuilder()
327 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
328 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
329 .update(deviceInterface, *m_logicalDevice);
330
331 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
332
333 const VkBufferMemoryBarrier inputBufferBarrier
334 = makeBufferMemoryBarrier( VK_ACCESS_HOST_WRITE_BIT,
335 VK_ACCESS_SHADER_READ_BIT,
336 inputBuffer->get(),
337 0ull,
338 m_bufferSize);
339
340 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
341
342 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
343
344 const VkBufferMemoryBarrier sparseBufferBarrier
345 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
346 VK_ACCESS_TRANSFER_READ_BIT,
347 *sparseBuffer,
348 0ull,
349 m_bufferSize);
350
351 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
352
353 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
354
355 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, outputBuffer->get(), 1u, &bufferCopy);
356
357 const VkBufferMemoryBarrier outputBufferBarrier
358 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
359 VK_ACCESS_HOST_READ_BIT,
360 outputBuffer->get(),
361 0ull,
362 m_bufferSize);
363
364 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
365
366 // End recording compute and transfer commands
367 endCommandBuffer(deviceInterface, *commandBuffer);
368
369 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
370
371 // Submit transfer commands for execution and wait for completion
372 submitCommandsAndWait(deviceInterface, *m_logicalDevice, computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
373
374 // Retrieve data from output buffer to host memory
375 const Allocation& allocation = outputBuffer->getAllocation();
376
377 invalidateMappedMemoryRange(deviceInterface, *m_logicalDevice, allocation.getMemory(), allocation.getOffset(), m_bufferSize);
378
379 const deUint8* outputData = static_cast<const deUint8*>(allocation.getHostPtr());
380 tcu::TestStatus testStatus = tcu::TestStatus::pass("Passed");
381
382 // Compare output data with reference data
383 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
384 {
385 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
386 const deUint32 offset = alignment * sparseBindNdx;
387 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
388
389 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
390 {
391 testStatus = tcu::TestStatus::fail("Failed");
392 break;
393 }
394 }
395
396 if (deviceProperties.sparseProperties.residencyNonResidentStrict)
397 {
398 for (deUint32 sparseBindNdx = 1; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
399 {
400 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
401 const deUint32 offset = alignment * sparseBindNdx;
402 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
403
404 deMemset(&referenceData[offset], 0u, size);
405
406 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
407 {
408 testStatus = tcu::TestStatus::fail("Failed");
409 break;
410 }
411 }
412 }
413
414 // Wait for sparse queue to become idle
415 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
416
417 return testStatus;
418 }
419
createInstance(Context & context) const420 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
421 {
422 return new BufferSparseResidencyInstance(context, m_bufferSize);
423 }
424
425 } // anonymous ns
426
createBufferSparseResidencyTests(tcu::TestContext & testCtx)427 tcu::TestCaseGroup* createBufferSparseResidencyTests (tcu::TestContext& testCtx)
428 {
429 de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "buffer_sparse_residency", "Buffer Sparse Residency"));
430
431 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
432 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
433 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
434 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
435 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
436 testGroup->addChild(new BufferSparseResidencyCase(testCtx, "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));
437
438 return testGroup.release();
439 }
440
441 } // sparse
442 } // vkt
443