1 // Copyright 2024 The Android Open Source Project
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either expresso or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "DeviceOpTracker.h"
16 
17 #include <algorithm>
18 #include <type_traits>
19 
20 #include "host-common/GfxstreamFatalError.h"
21 #include "host-common/logging.h"
22 
23 namespace gfxstream {
24 namespace vk {
25 namespace {
26 
27 using emugl::ABORT_REASON_OTHER;
28 using emugl::FatalError;
29 
30 constexpr const size_t kSizeLoggingThreshold = 20;
31 
32 constexpr const auto kTimeThreshold = std::chrono::seconds(5);
33 
34 template <typename T>
35 inline constexpr bool always_false_v = false;
36 
37 }  // namespace
38 
DeviceOpTracker(VkDevice device,VulkanDispatch * deviceDispatch)39 DeviceOpTracker::DeviceOpTracker(VkDevice device, VulkanDispatch* deviceDispatch)
40     : mDevice(device), mDeviceDispatch(deviceDispatch) {}
41 
AddPendingGarbage(DeviceOpWaitable waitable,VkFence fence)42 void DeviceOpTracker::AddPendingGarbage(DeviceOpWaitable waitable, VkFence fence) {
43     std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
44 
45     mPendingGarbage.push_back(PendingGarabage{
46         .waitable = std::move(waitable),
47         .obj = fence,
48         .timepoint = std::chrono::system_clock::now(),
49     });
50 
51     if (mPendingGarbage.size() > kSizeLoggingThreshold) {
52         WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
53     }
54 }
55 
AddPendingGarbage(DeviceOpWaitable waitable,VkSemaphore semaphore)56 void DeviceOpTracker::AddPendingGarbage(DeviceOpWaitable waitable, VkSemaphore semaphore) {
57     std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
58 
59     mPendingGarbage.push_back(PendingGarabage{
60         .waitable = std::move(waitable),
61         .obj = semaphore,
62         .timepoint = std::chrono::system_clock::now(),
63     });
64 
65     if (mPendingGarbage.size() > kSizeLoggingThreshold) {
66         WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
67     }
68 }
69 
PollAndProcessGarbage()70 void DeviceOpTracker::PollAndProcessGarbage() {
71     {
72         std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
73 
74         // Assuming that polling functions are added to the queue in the roughly the order
75         // they are used, encountering an unsignaled/pending polling functions likely means
76         // that all polling functions after are also still pending. This might not necessarily
77         // always be the case but it is a simple heuristic to try to minimize the amount of
78         // work performed here as it is expected that this function will be called while
79         // processing other guest vulkan functions.
80         auto firstPendingIt = std::find_if(mPollFunctions.begin(), mPollFunctions.end(),
81                                            [](const OpPollingFunction& pollingFunc) {
82                                                DeviceOpStatus status = pollingFunc();
83                                                return status == DeviceOpStatus::kPending;
84                                            });
85         mPollFunctions.erase(mPollFunctions.begin(), firstPendingIt);
86 
87         if (mPollFunctions.size() > kSizeLoggingThreshold) {
88             WARN("VkDevice:%p has %d pending waitables.", mDevice, mPollFunctions.size());
89         }
90     }
91 
92     const auto now = std::chrono::system_clock::now();
93     const auto old = now - kTimeThreshold;
94     {
95         std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
96 
97         // Assuming that pending garbage is added to the queue in the roughly the order
98         // they are used, encountering an unsignaled/pending waitable likely means that
99         // all pending garbage after is also still pending. This might not necessarily
100         // always be the case but it is a simple heuristic to try to minimize the amount
101         // of work performed here as it is expected that this function will be called
102         // while processing other guest vulkan functions.
103         auto firstPendingIt = std::find_if(mPendingGarbage.begin(), mPendingGarbage.end(),
104                                            [&](const PendingGarabage& pendingGarbage) {
105                                                if (pendingGarbage.timepoint < old) {
106                                                    return /*still pending=*/false;
107                                                }
108                                                return !IsDone(pendingGarbage.waitable);
109                                            });
110 
111         for (auto it = mPendingGarbage.begin(); it != firstPendingIt; it++) {
112             PendingGarabage& pendingGarbage = *it;
113 
114             if (pendingGarbage.timepoint < old) {
115                 const auto difference = std::chrono::duration_cast<std::chrono::milliseconds>(
116                     pendingGarbage.timepoint - now);
117                 WARN("VkDevice:%p had a waitable pending for %d milliseconds. Leaking object.",
118                      mDevice, difference.count());
119                 continue;
120             }
121 
122             std::visit(
123                 [this](auto&& arg) {
124                     using T = std::decay_t<decltype(arg)>;
125                     if constexpr (std::is_same_v<T, VkFence>) {
126                         mDeviceDispatch->vkDestroyFence(mDevice, arg, nullptr);
127                     } else if constexpr (std::is_same_v<T, VkSemaphore>) {
128                         mDeviceDispatch->vkDestroySemaphore(mDevice, arg, nullptr);
129                     } else {
130                         static_assert(always_false_v<T>, "non-exhaustive visitor!");
131                     }
132                 },
133                 pendingGarbage.obj);
134         }
135 
136         mPendingGarbage.erase(mPendingGarbage.begin(), firstPendingIt);
137 
138         if (mPendingGarbage.size() > kSizeLoggingThreshold) {
139             WARN("VkDevice:%p has %d pending garbage objects.", mDevice, mPendingGarbage.size());
140         }
141     }
142 }
143 
OnDestroyDevice()144 void DeviceOpTracker::OnDestroyDevice() {
145     mDeviceDispatch->vkDeviceWaitIdle(mDevice);
146 
147     PollAndProcessGarbage();
148 
149     {
150         std::lock_guard<std::mutex> lock(mPendingGarbageMutex);
151         if (!mPendingGarbage.empty()) {
152             WARN("VkDevice:%p has %d leaking garbage objects on destruction.", mDevice,
153                  mPendingGarbage.size());
154         }
155     }
156 }
157 
AddPendingDeviceOp(std::function<DeviceOpStatus ()> pollFunction)158 void DeviceOpTracker::AddPendingDeviceOp(std::function<DeviceOpStatus()> pollFunction) {
159     std::lock_guard<std::mutex> lock(mPollFunctionsMutex);
160     mPollFunctions.push_back(std::move(pollFunction));
161 }
162 
DeviceOpBuilder(DeviceOpTracker & tracker)163 DeviceOpBuilder::DeviceOpBuilder(DeviceOpTracker& tracker) : mTracker(tracker) {}
164 
~DeviceOpBuilder()165 DeviceOpBuilder::~DeviceOpBuilder() {
166     if (!mSubmittedFence) {
167         GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
168             << "Invalid usage: failed to call OnQueueSubmittedWithFence().";
169     }
170 }
171 
CreateFenceForOp()172 VkFence DeviceOpBuilder::CreateFenceForOp() {
173     const VkFenceCreateInfo fenceCreateInfo = {
174         .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
175         .pNext = nullptr,
176         .flags = 0,
177     };
178     VkFence fence = VK_NULL_HANDLE;
179     VkResult result = mTracker.mDeviceDispatch->vkCreateFence(mTracker.mDevice, &fenceCreateInfo,
180                                                               nullptr, &fence);
181 
182     mCreatedFence = fence;
183     if (result != VK_SUCCESS) {
184         ERR("DeviceOpBuilder failed to create VkFence!");
185         return VK_NULL_HANDLE;
186     }
187     return fence;
188 }
189 
OnQueueSubmittedWithFence(VkFence fence)190 DeviceOpWaitable DeviceOpBuilder::OnQueueSubmittedWithFence(VkFence fence) {
191     if (mCreatedFence.has_value() && fence != mCreatedFence) {
192         GFXSTREAM_ABORT(FatalError(ABORT_REASON_OTHER))
193             << "Invalid usage: failed to call OnQueueSubmittedWithFence() with the fence "
194             << "requested from CreateFenceForOp.";
195     }
196     mSubmittedFence = fence;
197 
198     const bool destroyFenceOnCompletion = mCreatedFence.has_value();
199 
200     std::shared_ptr<std::promise<void>> promise = std::make_shared<std::promise<void>>();
201     DeviceOpWaitable future = promise->get_future().share();
202 
203     mTracker.AddPendingDeviceOp([device = mTracker.mDevice,
204                                  deviceDispatch = mTracker.mDeviceDispatch, fence,
205                                  promise = std::move(promise), destroyFenceOnCompletion] {
206         if (fence == VK_NULL_HANDLE) {
207             return DeviceOpStatus::kDone;
208         }
209 
210         VkResult result =
211             deviceDispatch->vkWaitForFences(device, 1, &fence, /*waitAll=*/VK_TRUE, /*timeout=*/0);
212         if (result == VK_TIMEOUT) {
213             return DeviceOpStatus::kPending;
214         }
215 
216         if (destroyFenceOnCompletion) {
217             deviceDispatch->vkDestroyFence(device, fence, nullptr);
218         }
219         promise->set_value();
220 
221         return result == VK_SUCCESS ? DeviceOpStatus::kDone : DeviceOpStatus::kFailure;
222     });
223 
224     return future;
225 }
226 
227 }  // namespace vk
228 }  // namespace gfxstream