1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2016 The Khronos Group Inc.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file vktSparseResourcesBufferSparseResidency.cpp
21 * \brief Sparse partially resident buffers tests
22 *//*--------------------------------------------------------------------*/
23
24 #include "vktSparseResourcesBufferSparseResidency.hpp"
25 #include "vktSparseResourcesTestsUtil.hpp"
26 #include "vktSparseResourcesBase.hpp"
27 #include "vktTestCaseUtil.hpp"
28
29 #include "vkDefs.hpp"
30 #include "vkRef.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkPlatform.hpp"
33 #include "vkPrograms.hpp"
34 #include "vkRefUtil.hpp"
35 #include "vkMemUtil.hpp"
36 #include "vkBarrierUtil.hpp"
37 #include "vkQueryUtil.hpp"
38 #include "vkBuilderUtil.hpp"
39 #include "vkTypeUtil.hpp"
40 #include "vkCmdUtil.hpp"
41
42 #include "deStringUtil.hpp"
43 #include "deUniquePtr.hpp"
44
45 #include <string>
46 #include <vector>
47
48 using namespace vk;
49
50 namespace vkt
51 {
52 namespace sparse
53 {
54 namespace
55 {
56
57 enum ShaderParameters
58 {
59 SIZE_OF_UINT_IN_SHADER = 4u,
60 };
61
62 class BufferSparseResidencyCase : public TestCase
63 {
64 public:
65 BufferSparseResidencyCase (tcu::TestContext& testCtx,
66 const std::string& name,
67 const std::string& description,
68 const deUint32 bufferSize,
69 const glu::GLSLVersion glslVersion,
70 const bool useDeviceGroups);
71
72
73 void initPrograms (SourceCollections& sourceCollections) const;
74 TestInstance* createInstance (Context& context) const;
75
76 private:
77 const deUint32 m_bufferSize;
78 const glu::GLSLVersion m_glslVersion;
79 const bool m_useDeviceGroups;
80
81 };
82
BufferSparseResidencyCase(tcu::TestContext & testCtx,const std::string & name,const std::string & description,const deUint32 bufferSize,const glu::GLSLVersion glslVersion,const bool useDeviceGroups)83 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext& testCtx,
84 const std::string& name,
85 const std::string& description,
86 const deUint32 bufferSize,
87 const glu::GLSLVersion glslVersion,
88 const bool useDeviceGroups)
89
90 : TestCase (testCtx, name, description)
91 , m_bufferSize (bufferSize)
92 , m_glslVersion (glslVersion)
93 , m_useDeviceGroups (useDeviceGroups)
94 {
95 }
96
initPrograms(SourceCollections & sourceCollections) const97 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
98 {
99 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion);
100 const deUint32 iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
101
102 std::ostringstream src;
103
104 src << versionDecl << "\n"
105 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
106 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
107 << "{\n"
108 << " uint data[];\n"
109 << "} sb_in;\n"
110 << "\n"
111 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
112 << "{\n"
113 << " uint result[];\n"
114 << "} sb_out;\n"
115 << "\n"
116 << "void main (void)\n"
117 << "{\n"
118 << " for(int i=0; i<" << iterationsCount << "; ++i) \n"
119 << " {\n"
120 << " sb_out.result[i] = sb_in.data[i];"
121 << " }\n"
122 << "}\n";
123
124 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
125 }
126
127 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
128 {
129 public:
130 BufferSparseResidencyInstance (Context& context,
131 const deUint32 bufferSize,
132 const bool useDeviceGroups);
133
134 tcu::TestStatus iterate (void);
135
136 private:
137 const deUint32 m_bufferSize;
138 };
139
BufferSparseResidencyInstance(Context & context,const deUint32 bufferSize,const bool useDeviceGroups)140 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context& context,
141 const deUint32 bufferSize,
142 const bool useDeviceGroups)
143 : SparseResourcesBaseInstance (context, useDeviceGroups)
144 , m_bufferSize (bufferSize)
145 {
146 }
147
iterate(void)148 tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
149 {
150 const InstanceInterface& instance = m_context.getInstanceInterface();
151 {
152 // Create logical device supporting both sparse and compute operations
153 QueueRequirementsVec queueRequirements;
154 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
155 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
156
157 createDeviceSupportingQueues(queueRequirements);
158 }
159 const VkPhysicalDevice physicalDevice = getPhysicalDevice();
160 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice);
161
162 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
163 TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
164
165 const DeviceInterface& deviceInterface = getDeviceInterface();
166 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
167 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0);
168
169 // Go through all physical devices
170 for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
171 {
172 const deUint32 firstDeviceID = physDevID;
173 const deUint32 secondDeviceID = (firstDeviceID + 1) % m_numPhysicalDevices;
174
175 VkBufferCreateInfo bufferCreateInfo =
176 {
177 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
178 DE_NULL, // const void* pNext;
179 VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
180 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags;
181 m_bufferSize, // VkDeviceSize size;
182 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
183 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage;
184 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
185 0u, // deUint32 queueFamilyIndexCount;
186 DE_NULL // const deUint32* pQueueFamilyIndices;
187 };
188
189 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
190
191 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
192 {
193 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT;
194 bufferCreateInfo.queueFamilyIndexCount = 2u;
195 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices;
196 }
197
198 // Create sparse buffer
199 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
200
201 // Create sparse buffer memory bind semaphore
202 const Unique<VkSemaphore> bufferMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
203
204 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
205
206 if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
207 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
208
209 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
210
211 const deUint32 numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
212 std::vector<DeviceMemorySp> deviceMemUniquePtrVec;
213
214 {
215 std::vector<VkSparseMemoryBind> sparseMemoryBinds;
216 const deUint32 memoryType = findMatchingMemoryType(instance, getPhysicalDevice(secondDeviceID), bufferMemRequirements, MemoryRequirement::Any);
217
218 if (memoryType == NO_MATCH_FOUND)
219 return tcu::TestStatus::fail("No matching memory type found");
220
221 if (firstDeviceID != secondDeviceID)
222 {
223 VkPeerMemoryFeatureFlags peerMemoryFeatureFlags = (VkPeerMemoryFeatureFlags)0;
224 const deUint32 heapIndex = getHeapIndexForMemoryType(instance, getPhysicalDevice(secondDeviceID), memoryType);
225 deviceInterface.getDeviceGroupPeerMemoryFeatures(getDevice(), heapIndex, firstDeviceID, secondDeviceID, &peerMemoryFeatureFlags);
226
227 if (((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT) == 0) ||
228 ((peerMemoryFeatureFlags & VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT) == 0))
229 {
230 TCU_THROW(NotSupportedError, "Peer memory does not support COPY_SRC and GENERIC_DST");
231 }
232 }
233
234 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
235 {
236 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx);
237
238 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
239
240 sparseMemoryBinds.push_back(sparseMemoryBind);
241 }
242
243 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
244
245 const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
246 {
247 VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR, //VkStructureType sType;
248 DE_NULL, //const void* pNext;
249 firstDeviceID, //deUint32 resourceDeviceIndex;
250 secondDeviceID, //deUint32 memoryDeviceIndex;
251 };
252 const VkBindSparseInfo bindSparseInfo =
253 {
254 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType;
255 usingDeviceGroups() ? &devGroupBindSparseInfo : DE_NULL,//const void* pNext;
256 0u, //deUint32 waitSemaphoreCount;
257 DE_NULL, //const VkSemaphore* pWaitSemaphores;
258 1u, //deUint32 bufferBindCount;
259 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds;
260 0u, //deUint32 imageOpaqueBindCount;
261 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds;
262 0u, //deUint32 imageBindCount;
263 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds;
264 1u, //deUint32 signalSemaphoreCount;
265 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores;
266 };
267
268 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
269 }
270
271 // Create input buffer
272 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
273 const Unique<VkBuffer> inputBuffer (createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
274 const de::UniquePtr<Allocation> inputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
275
276
277 std::vector<deUint8> referenceData;
278 referenceData.resize(m_bufferSize);
279
280 for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
281 {
282 referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
283 }
284
285 deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
286
287 flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize);
288
289 // Create output buffer
290 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
291 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
292 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
293
294 // Create command buffer for compute and data transfer oparations
295 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
296 const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
297
298 // Start recording compute and transfer commands
299 beginCommandBuffer(deviceInterface, *commandBuffer);
300
301 // Create descriptor set
302 const Unique<VkDescriptorSetLayout> descriptorSetLayout(
303 DescriptorSetLayoutBuilder()
304 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
305 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
306 .build(deviceInterface, getDevice()));
307
308 // Create compute pipeline
309 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
310 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
311 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
312
313 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
314
315 const Unique<VkDescriptorPool> descriptorPool(
316 DescriptorPoolBuilder()
317 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
318 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
319
320 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
321
322 {
323 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
324 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
325
326 DescriptorSetUpdateBuilder()
327 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
328 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
329 .update(deviceInterface, getDevice());
330 }
331
332 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
333
334 {
335 const VkBufferMemoryBarrier inputBufferBarrier
336 = makeBufferMemoryBarrier( VK_ACCESS_HOST_WRITE_BIT,
337 VK_ACCESS_SHADER_READ_BIT,
338 *inputBuffer,
339 0ull,
340 m_bufferSize);
341
342 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
343 }
344
345 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
346
347 {
348 const VkBufferMemoryBarrier sparseBufferBarrier
349 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT,
350 VK_ACCESS_TRANSFER_READ_BIT,
351 *sparseBuffer,
352 0ull,
353 m_bufferSize);
354
355 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
356 }
357
358 {
359 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
360
361 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
362 }
363
364 {
365 const VkBufferMemoryBarrier outputBufferBarrier
366 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT,
367 VK_ACCESS_HOST_READ_BIT,
368 *outputBuffer,
369 0ull,
370 m_bufferSize);
371
372 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
373 }
374
375 // End recording compute and transfer commands
376 endCommandBuffer(deviceInterface, *commandBuffer);
377
378 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
379
380 // Submit transfer commands for execution and wait for completion
381 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(),
382 waitStageBits, 0, DE_NULL, usingDeviceGroups(), firstDeviceID);
383
384 // Retrieve data from output buffer to host memory
385 invalidateAlloc(deviceInterface, getDevice(), *outputBufferAlloc);
386
387 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
388
389 // Wait for sparse queue to become idle
390 deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
391
392 // Compare output data with reference data
393 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
394 {
395 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
396 const deUint32 offset = alignment * sparseBindNdx;
397 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
398
399 if (sparseBindNdx % 2u == 0u)
400 {
401 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
402 return tcu::TestStatus::fail("Failed");
403 }
404 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
405 {
406 deMemset(&referenceData[offset], 0u, size);
407
408 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
409 return tcu::TestStatus::fail("Failed");
410 }
411 }
412 }
413
414 return tcu::TestStatus::pass("Passed");
415 }
416
createInstance(Context & context) const417 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
418 {
419 return new BufferSparseResidencyInstance(context, m_bufferSize, m_useDeviceGroups);
420 }
421
422 } // anonymous ns
423
addBufferSparseResidencyTests(tcu::TestCaseGroup * group,const bool useDeviceGroups)424 void addBufferSparseResidencyTests(tcu::TestCaseGroup* group, const bool useDeviceGroups)
425 {
426 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440, useDeviceGroups));
427 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440, useDeviceGroups));
428 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440, useDeviceGroups));
429 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440, useDeviceGroups));
430 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440, useDeviceGroups));
431 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440, useDeviceGroups));
432 }
433
434 } // sparse
435 } // vkt
436