1 // Copyright 2018 The SwiftShader Authors. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //    http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "VkQueue.hpp"
16 #include "VkCommandBuffer.hpp"
17 #include "VkFence.hpp"
18 #include "VkSemaphore.hpp"
19 #include "VkStringify.hpp"
20 #include "VkTimelineSemaphore.hpp"
21 #include "Device/Renderer.hpp"
22 #include "WSI/VkSwapchainKHR.hpp"
23 
24 #include "marl/defer.h"
25 #include "marl/scheduler.h"
26 #include "marl/thread.h"
27 #include "marl/trace.h"
28 
29 #include <cstring>
30 
31 namespace {
32 
DeepCopySubmitInfo(uint32_t submitCount,const VkSubmitInfo * pSubmits)33 VkSubmitInfo *DeepCopySubmitInfo(uint32_t submitCount, const VkSubmitInfo *pSubmits)
34 {
35 	size_t submitSize = sizeof(VkSubmitInfo) * submitCount;
36 	size_t totalSize = submitSize;
37 	for(uint32_t i = 0; i < submitCount; i++)
38 	{
39 		totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
40 		totalSize += pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
41 		totalSize += pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
42 		totalSize += pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
43 
44 		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
45 		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
46 		{
47 			switch(extension->sType)
48 			{
49 				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
50 				{
51 					const auto *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
52 					totalSize += sizeof(VkTimelineSemaphoreSubmitInfo);
53 					totalSize += tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
54 					totalSize += tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
55 				}
56 				break;
57 				default:
58 					WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
59 					break;
60 			}
61 		}
62 	}
63 
64 	uint8_t *mem = static_cast<uint8_t *>(
65 	    vk::allocate(totalSize, vk::REQUIRED_MEMORY_ALIGNMENT, vk::DEVICE_MEMORY, vk::Fence::GetAllocationScope()));
66 
67 	auto submits = new(mem) VkSubmitInfo[submitCount];
68 	memcpy(mem, pSubmits, submitSize);
69 	mem += submitSize;
70 
71 	for(uint32_t i = 0; i < submitCount; i++)
72 	{
73 		size_t size = pSubmits[i].waitSemaphoreCount * sizeof(VkSemaphore);
74 		submits[i].pWaitSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
75 		memcpy(mem, pSubmits[i].pWaitSemaphores, size);
76 		mem += size;
77 
78 		size = pSubmits[i].waitSemaphoreCount * sizeof(VkPipelineStageFlags);
79 		submits[i].pWaitDstStageMask = reinterpret_cast<const VkPipelineStageFlags *>(mem);
80 		memcpy(mem, pSubmits[i].pWaitDstStageMask, size);
81 		mem += size;
82 
83 		size = pSubmits[i].signalSemaphoreCount * sizeof(VkSemaphore);
84 		submits[i].pSignalSemaphores = reinterpret_cast<const VkSemaphore *>(mem);
85 		memcpy(mem, pSubmits[i].pSignalSemaphores, size);
86 		mem += size;
87 
88 		size = pSubmits[i].commandBufferCount * sizeof(VkCommandBuffer);
89 		submits[i].pCommandBuffers = reinterpret_cast<const VkCommandBuffer *>(mem);
90 		memcpy(mem, pSubmits[i].pCommandBuffers, size);
91 		mem += size;
92 
93 		for(const auto *extension = reinterpret_cast<const VkBaseInStructure *>(pSubmits[i].pNext);
94 		    extension != nullptr; extension = reinterpret_cast<const VkBaseInStructure *>(extension->pNext))
95 		{
96 			switch(extension->sType)
97 			{
98 				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
99 				{
100 					const VkTimelineSemaphoreSubmitInfo *tlsSubmitInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(extension);
101 
102 					size = sizeof(VkTimelineSemaphoreSubmitInfo);
103 					VkTimelineSemaphoreSubmitInfo *tlsSubmitInfoCopy = reinterpret_cast<VkTimelineSemaphoreSubmitInfo *>(mem);
104 					memcpy(mem, extension, size);
105 					// Don't copy the pNext pointer at all.
106 					tlsSubmitInfoCopy->pNext = nullptr;
107 					mem += size;
108 
109 					size = tlsSubmitInfo->waitSemaphoreValueCount * sizeof(uint64_t);
110 					tlsSubmitInfoCopy->pWaitSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
111 					memcpy(mem, tlsSubmitInfo->pWaitSemaphoreValues, size);
112 					mem += size;
113 
114 					size = tlsSubmitInfo->signalSemaphoreValueCount * sizeof(uint64_t);
115 					tlsSubmitInfoCopy->pSignalSemaphoreValues = reinterpret_cast<uint64_t *>(mem);
116 					memcpy(mem, tlsSubmitInfo->pSignalSemaphoreValues, size);
117 					mem += size;
118 
119 					submits[i].pNext = tlsSubmitInfoCopy;
120 				}
121 				break;
122 				default:
123 					WARN("submitInfo[%d]->pNext sType: %s", i, vk::Stringify(extension->sType).c_str());
124 					break;
125 			}
126 		}
127 	}
128 
129 	return submits;
130 }
131 
132 }  // anonymous namespace
133 
134 namespace vk {
135 
Queue(Device * device,marl::Scheduler * scheduler)136 Queue::Queue(Device *device, marl::Scheduler *scheduler)
137     : device(device)
138 {
139 	queueThread = std::thread(&Queue::taskLoop, this, scheduler);
140 }
141 
~Queue()142 Queue::~Queue()
143 {
144 	Task task;
145 	task.type = Task::KILL_THREAD;
146 	pending.put(task);
147 
148 	queueThread.join();
149 	ASSERT_MSG(pending.count() == 0, "queue has work after worker thread shutdown");
150 
151 	garbageCollect();
152 }
153 
submit(uint32_t submitCount,const VkSubmitInfo * pSubmits,Fence * fence)154 VkResult Queue::submit(uint32_t submitCount, const VkSubmitInfo *pSubmits, Fence *fence)
155 {
156 	garbageCollect();
157 
158 	Task task;
159 	task.submitCount = submitCount;
160 	task.pSubmits = DeepCopySubmitInfo(submitCount, pSubmits);
161 	if(fence)
162 	{
163 		task.events = fence->getCountedEvent();
164 		task.events->add();
165 	}
166 
167 	pending.put(task);
168 
169 	return VK_SUCCESS;
170 }
171 
submitQueue(const Task & task)172 void Queue::submitQueue(const Task &task)
173 {
174 	if(renderer == nullptr)
175 	{
176 		renderer.reset(new sw::Renderer(device));
177 	}
178 
179 	for(uint32_t i = 0; i < task.submitCount; i++)
180 	{
181 		VkSubmitInfo &submitInfo = task.pSubmits[i];
182 		const VkTimelineSemaphoreSubmitInfo *timelineInfo = nullptr;
183 		for(const auto *nextInfo = reinterpret_cast<const VkBaseInStructure *>(submitInfo.pNext);
184 		    nextInfo != nullptr; nextInfo = nextInfo->pNext)
185 		{
186 			switch(nextInfo->sType)
187 			{
188 				case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO:
189 					timelineInfo = reinterpret_cast<const VkTimelineSemaphoreSubmitInfo *>(submitInfo.pNext);
190 					break;
191 				default:
192 					WARN("submitInfo.pNext->sType = %s", vk::Stringify(nextInfo->sType).c_str());
193 					break;
194 			}
195 		}
196 
197 		for(uint32_t j = 0; j < submitInfo.waitSemaphoreCount; j++)
198 		{
199 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pWaitSemaphores[j]))
200 			{
201 				ASSERT_MSG(timelineInfo != nullptr,
202 				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
203 				sem->wait(timelineInfo->pWaitSemaphoreValues[j]);
204 			}
205 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pWaitSemaphores[j]))
206 			{
207 				sem->wait(submitInfo.pWaitDstStageMask[j]);
208 			}
209 			else
210 			{
211 				UNSUPPORTED("Unknown semaphore type");
212 			}
213 		}
214 
215 		{
216 			CommandBuffer::ExecutionState executionState;
217 			executionState.renderer = renderer.get();
218 			executionState.events = task.events.get();
219 			for(uint32_t j = 0; j < submitInfo.commandBufferCount; j++)
220 			{
221 				Cast(submitInfo.pCommandBuffers[j])->submit(executionState);
222 			}
223 		}
224 
225 		for(uint32_t j = 0; j < submitInfo.signalSemaphoreCount; j++)
226 		{
227 			if(auto *sem = DynamicCast<TimelineSemaphore>(submitInfo.pSignalSemaphores[j]))
228 			{
229 				ASSERT_MSG(timelineInfo != nullptr,
230 				           "the pNext chain must include a VkTimelineSemaphoreSubmitInfo if timeline semaphores are used");
231 				sem->signal(timelineInfo->pSignalSemaphoreValues[j]);
232 			}
233 			else if(auto *sem = DynamicCast<BinarySemaphore>(submitInfo.pSignalSemaphores[j]))
234 			{
235 				sem->signal();
236 			}
237 			else
238 			{
239 				UNSUPPORTED("Unknown semaphore type");
240 			}
241 		}
242 	}
243 
244 	if(task.pSubmits)
245 	{
246 		toDelete.put(task.pSubmits);
247 	}
248 
249 	if(task.events)
250 	{
251 		// TODO: fix renderer signaling so that work submitted separately from (but before) a fence
252 		// is guaranteed complete by the time the fence signals.
253 		renderer->synchronize();
254 		task.events->done();
255 	}
256 }
257 
taskLoop(marl::Scheduler * scheduler)258 void Queue::taskLoop(marl::Scheduler *scheduler)
259 {
260 	marl::Thread::setName("Queue<%p>", this);
261 	scheduler->bind();
262 	defer(scheduler->unbind());
263 
264 	while(true)
265 	{
266 		Task task = pending.take();
267 
268 		switch(task.type)
269 		{
270 			case Task::KILL_THREAD:
271 				ASSERT_MSG(pending.count() == 0, "queue has remaining work!");
272 				return;
273 			case Task::SUBMIT_QUEUE:
274 				submitQueue(task);
275 				break;
276 			default:
277 				UNREACHABLE("task.type %d", static_cast<int>(task.type));
278 				break;
279 		}
280 	}
281 }
282 
waitIdle()283 VkResult Queue::waitIdle()
284 {
285 	// Wait for task queue to flush.
286 	auto event = std::make_shared<sw::CountedEvent>();
287 	event->add();  // done() is called at the end of submitQueue()
288 
289 	Task task;
290 	task.events = event;
291 	pending.put(task);
292 
293 	event->wait();
294 
295 	garbageCollect();
296 
297 	return VK_SUCCESS;
298 }
299 
garbageCollect()300 void Queue::garbageCollect()
301 {
302 	while(true)
303 	{
304 		auto v = toDelete.tryTake();
305 		if(!v.second) { break; }
306 		vk::deallocate(v.first, DEVICE_MEMORY);
307 	}
308 }
309 
310 #ifndef __ANDROID__
present(const VkPresentInfoKHR * presentInfo)311 VkResult Queue::present(const VkPresentInfoKHR *presentInfo)
312 {
313 	// This is a hack to deal with screen tearing for now.
314 	// Need to correctly implement threading using VkSemaphore
315 	// to get rid of it. b/132458423
316 	waitIdle();
317 
318 	for(uint32_t i = 0; i < presentInfo->waitSemaphoreCount; i++)
319 	{
320 		auto *semaphore = vk::DynamicCast<BinarySemaphore>(presentInfo->pWaitSemaphores[i]);
321 		semaphore->wait();
322 	}
323 
324 	VkResult commandResult = VK_SUCCESS;
325 
326 	for(uint32_t i = 0; i < presentInfo->swapchainCount; i++)
327 	{
328 		auto *swapchain = vk::Cast(presentInfo->pSwapchains[i]);
329 		VkResult perSwapchainResult = swapchain->present(presentInfo->pImageIndices[i]);
330 
331 		if(presentInfo->pResults)
332 		{
333 			presentInfo->pResults[i] = perSwapchainResult;
334 		}
335 
336 		// Keep track of the worst result code. VK_SUBOPTIMAL_KHR is a success code so it should
337 		// not override failure codes, but should not get replaced by a VK_SUCCESS result itself.
338 		if(perSwapchainResult != VK_SUCCESS)
339 		{
340 			if(commandResult == VK_SUCCESS || commandResult == VK_SUBOPTIMAL_KHR)
341 			{
342 				commandResult = perSwapchainResult;
343 			}
344 		}
345 	}
346 
347 	return commandResult;
348 }
349 #endif
350 
beginDebugUtilsLabel(const VkDebugUtilsLabelEXT * pLabelInfo)351 void Queue::beginDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)
352 {
353 	// Optional debug label region
354 }
355 
endDebugUtilsLabel()356 void Queue::endDebugUtilsLabel()
357 {
358 	// Close debug label region opened with beginDebugUtilsLabel()
359 }
360 
insertDebugUtilsLabel(const VkDebugUtilsLabelEXT * pLabelInfo)361 void Queue::insertDebugUtilsLabel(const VkDebugUtilsLabelEXT *pLabelInfo)
362 {
363 	// Optional single debug label
364 }
365 
366 }  // namespace vk
367