1 /*
2  * Copyright (C) 2020 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "ResilientDevice.h"
18 
19 #include "InvalidBuffer.h"
20 #include "InvalidDevice.h"
21 #include "InvalidPreparedModel.h"
22 #include "ResilientBuffer.h"
23 #include "ResilientPreparedModel.h"
24 
25 #include <android-base/logging.h>
26 #include <nnapi/IBuffer.h>
27 #include <nnapi/IDevice.h>
28 #include <nnapi/IPreparedModel.h>
29 #include <nnapi/Result.h>
30 #include <nnapi/TypeUtils.h>
31 #include <nnapi/Types.h>
32 
33 #include <algorithm>
34 #include <memory>
35 #include <string>
36 #include <vector>
37 
38 namespace android::hardware::neuralnetworks::utils {
39 namespace {
40 
41 template <typename FnType>
protect(const ResilientDevice & resilientDevice,const FnType & fn,bool blocking)42 auto protect(const ResilientDevice& resilientDevice, const FnType& fn, bool blocking)
43         -> decltype(fn(*resilientDevice.getDevice())) {
44     auto device = resilientDevice.getDevice();
45     auto result = fn(*device);
46 
47     // Immediately return if device is not dead.
48     if (result.has_value() || result.error().code != nn::ErrorStatus::DEAD_OBJECT) {
49         return result;
50     }
51 
52     // Attempt recovery and return if it fails.
53     auto maybeDevice = resilientDevice.recover(device.get(), blocking);
54     if (!maybeDevice.has_value()) {
55         const auto& [resultErrorMessage, resultErrorCode] = result.error();
56         const auto& [recoveryErrorMessage, recoveryErrorCode] = maybeDevice.error();
57         return nn::error(resultErrorCode)
58                << resultErrorMessage << ", and failed to recover dead device with error "
59                << recoveryErrorCode << ": " << recoveryErrorMessage;
60     }
61     device = std::move(maybeDevice).value();
62 
63     return fn(*device);
64 }
65 
66 }  // namespace
67 
create(Factory makeDevice)68 nn::GeneralResult<std::shared_ptr<const ResilientDevice>> ResilientDevice::create(
69         Factory makeDevice) {
70     if (makeDevice == nullptr) {
71         return NN_ERROR(nn::ErrorStatus::INVALID_ARGUMENT)
72                << "utils::ResilientDevice::create must have non-empty makeDevice";
73     }
74     auto device = NN_TRY(makeDevice(/*blocking=*/true));
75     CHECK(device != nullptr);
76 
77     auto name = device->getName();
78     auto versionString = device->getVersionString();
79     auto extensions = device->getSupportedExtensions();
80     auto capabilities = device->getCapabilities();
81 
82     return std::make_shared<ResilientDevice>(PrivateConstructorTag{}, std::move(makeDevice),
83                                              std::move(name), std::move(versionString),
84                                              std::move(extensions), std::move(capabilities),
85                                              std::move(device));
86 }
87 
ResilientDevice(PrivateConstructorTag,Factory makeDevice,std::string name,std::string versionString,std::vector<nn::Extension> extensions,nn::Capabilities capabilities,nn::SharedDevice device)88 ResilientDevice::ResilientDevice(PrivateConstructorTag /*tag*/, Factory makeDevice,
89                                  std::string name, std::string versionString,
90                                  std::vector<nn::Extension> extensions,
91                                  nn::Capabilities capabilities, nn::SharedDevice device)
92     : kMakeDevice(std::move(makeDevice)),
93       kName(std::move(name)),
94       kVersionString(std::move(versionString)),
95       kExtensions(std::move(extensions)),
96       kCapabilities(std::move(capabilities)),
97       mDevice(std::move(device)) {
98     CHECK(kMakeDevice != nullptr);
99     CHECK(mDevice != nullptr);
100 }
101 
getDevice() const102 nn::SharedDevice ResilientDevice::getDevice() const {
103     std::lock_guard guard(mMutex);
104     return mDevice;
105 }
106 
recover(const nn::IDevice * failingDevice,bool blocking) const107 nn::GeneralResult<nn::SharedDevice> ResilientDevice::recover(const nn::IDevice* failingDevice,
108                                                              bool blocking) const {
109     std::lock_guard guard(mMutex);
110 
111     // Another caller updated the failing device.
112     if (mDevice.get() != failingDevice) {
113         return mDevice;
114     }
115 
116     auto device = NN_TRY(kMakeDevice(blocking));
117 
118     // If recovered device has different metadata than what is cached (i.e., because it was
119     // updated), mark the device as invalid and preserve the cached data.
120     auto compare = [this, &device](auto fn) REQUIRES(mMutex) {
121         return std::invoke(fn, mDevice) != std::invoke(fn, device);
122     };
123     if (compare(&IDevice::getName) || compare(&IDevice::getVersionString) ||
124         compare(&IDevice::getFeatureLevel) || compare(&IDevice::getType) ||
125         compare(&IDevice::getSupportedExtensions) || compare(&IDevice::getCapabilities)) {
126         LOG(ERROR) << "Recovered device has different metadata than what is cached. Marking "
127                       "IDevice object as invalid.";
128         device = std::make_shared<const InvalidDevice>(
129                 kName, kVersionString, mDevice->getFeatureLevel(), mDevice->getType(), kExtensions,
130                 kCapabilities, mDevice->getNumberOfCacheFilesNeeded());
131         mIsValid = false;
132     }
133 
134     mDevice = std::move(device);
135     return mDevice;
136 }
137 
getName() const138 const std::string& ResilientDevice::getName() const {
139     return kName;
140 }
141 
getVersionString() const142 const std::string& ResilientDevice::getVersionString() const {
143     return kVersionString;
144 }
145 
getFeatureLevel() const146 nn::Version ResilientDevice::getFeatureLevel() const {
147     return getDevice()->getFeatureLevel();
148 }
149 
getType() const150 nn::DeviceType ResilientDevice::getType() const {
151     return getDevice()->getType();
152 }
153 
getSupportedExtensions() const154 const std::vector<nn::Extension>& ResilientDevice::getSupportedExtensions() const {
155     return kExtensions;
156 }
157 
getCapabilities() const158 const nn::Capabilities& ResilientDevice::getCapabilities() const {
159     return kCapabilities;
160 }
161 
getNumberOfCacheFilesNeeded() const162 std::pair<uint32_t, uint32_t> ResilientDevice::getNumberOfCacheFilesNeeded() const {
163     return getDevice()->getNumberOfCacheFilesNeeded();
164 }
165 
wait() const166 nn::GeneralResult<void> ResilientDevice::wait() const {
167     const auto fn = [](const nn::IDevice& device) { return device.wait(); };
168     return protect(*this, fn, /*blocking=*/true);
169 }
170 
getSupportedOperations(const nn::Model & model) const171 nn::GeneralResult<std::vector<bool>> ResilientDevice::getSupportedOperations(
172         const nn::Model& model) const {
173     const auto fn = [&model](const nn::IDevice& device) {
174         return device.getSupportedOperations(model);
175     };
176     return protect(*this, fn, /*blocking=*/false);
177 }
178 
prepareModel(const nn::Model & model,nn::ExecutionPreference preference,nn::Priority priority,nn::OptionalTimePoint deadline,const std::vector<nn::SharedHandle> & modelCache,const std::vector<nn::SharedHandle> & dataCache,const nn::CacheToken & token) const179 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModel(
180         const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority,
181         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
182         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
183 #if 0
184     auto self = shared_from_this();
185     ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), model,
186                                                          preference, priority, deadline, modelCache,
187                                                          dataCache, token] {
188         return device->prepareModelInternal(model, preference, priority, deadline, modelCache,
189                                             dataCache, token);
190     };
191     return ResilientPreparedModel::create(std::move(makePreparedModel));
192 #else
193     return prepareModelInternal(model, preference, priority, deadline, modelCache, dataCache,
194                                 token);
195 #endif
196 }
197 
prepareModelFromCache(nn::OptionalTimePoint deadline,const std::vector<nn::SharedHandle> & modelCache,const std::vector<nn::SharedHandle> & dataCache,const nn::CacheToken & token) const198 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelFromCache(
199         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
200         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
201 #if 0
202     auto self = shared_from_this();
203     ResilientPreparedModel::Factory makePreparedModel = [device = std::move(self), deadline,
204                                                          modelCache, dataCache, token] {
205         return device->prepareModelFromCacheInternal(deadline, modelCache, dataCache, token);
206     };
207     return ResilientPreparedModel::create(std::move(makePreparedModel));
208 #else
209     return prepareModelFromCacheInternal(deadline, modelCache, dataCache, token);
210 #endif
211 }
212 
allocate(const nn::BufferDesc & desc,const std::vector<nn::SharedPreparedModel> & preparedModels,const std::vector<nn::BufferRole> & inputRoles,const std::vector<nn::BufferRole> & outputRoles) const213 nn::GeneralResult<nn::SharedBuffer> ResilientDevice::allocate(
214         const nn::BufferDesc& desc, const std::vector<nn::SharedPreparedModel>& preparedModels,
215         const std::vector<nn::BufferRole>& inputRoles,
216         const std::vector<nn::BufferRole>& outputRoles) const {
217 #if 0
218     auto self = shared_from_this();
219     ResilientBuffer::Factory makeBuffer = [device = std::move(self), desc, preparedModels,
220                                            inputRoles, outputRoles] {
221         return device->allocateInternal(desc, preparedModels, inputRoles, outputRoles);
222     };
223     return ResilientBuffer::create(std::move(makeBuffer));
224 #else
225     return allocateInternal(desc, preparedModels, inputRoles, outputRoles);
226 #endif
227 }
228 
isValidInternal() const229 bool ResilientDevice::isValidInternal() const {
230     std::lock_guard hold(mMutex);
231     return mIsValid;
232 }
233 
prepareModelInternal(const nn::Model & model,nn::ExecutionPreference preference,nn::Priority priority,nn::OptionalTimePoint deadline,const std::vector<nn::SharedHandle> & modelCache,const std::vector<nn::SharedHandle> & dataCache,const nn::CacheToken & token) const234 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelInternal(
235         const nn::Model& model, nn::ExecutionPreference preference, nn::Priority priority,
236         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
237         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
238     if (!isValidInternal()) {
239         return std::make_shared<const InvalidPreparedModel>();
240     }
241     const auto fn = [&model, preference, priority, &deadline, &modelCache, &dataCache,
242                      &token](const nn::IDevice& device) {
243         return device.prepareModel(model, preference, priority, deadline, modelCache, dataCache,
244                                    token);
245     };
246     return protect(*this, fn, /*blocking=*/false);
247 }
248 
prepareModelFromCacheInternal(nn::OptionalTimePoint deadline,const std::vector<nn::SharedHandle> & modelCache,const std::vector<nn::SharedHandle> & dataCache,const nn::CacheToken & token) const249 nn::GeneralResult<nn::SharedPreparedModel> ResilientDevice::prepareModelFromCacheInternal(
250         nn::OptionalTimePoint deadline, const std::vector<nn::SharedHandle>& modelCache,
251         const std::vector<nn::SharedHandle>& dataCache, const nn::CacheToken& token) const {
252     if (!isValidInternal()) {
253         return std::make_shared<const InvalidPreparedModel>();
254     }
255     const auto fn = [&deadline, &modelCache, &dataCache, &token](const nn::IDevice& device) {
256         return device.prepareModelFromCache(deadline, modelCache, dataCache, token);
257     };
258     return protect(*this, fn, /*blocking=*/false);
259 }
260 
allocateInternal(const nn::BufferDesc & desc,const std::vector<nn::SharedPreparedModel> & preparedModels,const std::vector<nn::BufferRole> & inputRoles,const std::vector<nn::BufferRole> & outputRoles) const261 nn::GeneralResult<nn::SharedBuffer> ResilientDevice::allocateInternal(
262         const nn::BufferDesc& desc, const std::vector<nn::SharedPreparedModel>& preparedModels,
263         const std::vector<nn::BufferRole>& inputRoles,
264         const std::vector<nn::BufferRole>& outputRoles) const {
265     if (!isValidInternal()) {
266         return std::make_shared<const InvalidBuffer>();
267     }
268     const auto fn = [&desc, &preparedModels, &inputRoles, &outputRoles](const nn::IDevice& device) {
269         return device.allocate(desc, preparedModels, inputRoles, outputRoles);
270     };
271     return protect(*this, fn, /*blocking=*/false);
272 }
273 
274 }  // namespace android::hardware::neuralnetworks::utils
275