1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "Manager"
18
19 #include "Manager.h"
20
21 #include <CpuExecutor.h>
22 #include <LegacyUtils.h>
23 #include <MetaModel.h>
24 #include <Tracing.h>
25 #include <android-base/properties.h>
26 #include <nnapi/IBurst.h>
27 #include <nnapi/IDevice.h>
28 #include <nnapi/IExecution.h>
29 #include <nnapi/IPreparedModel.h>
30 #include <nnapi/SharedMemory.h>
31 #include <nnapi/TypeUtils.h>
32 #include <nnapi/Types.h>
33 #include <nnapi/Validation.h>
34
35 #include <algorithm>
36 #include <functional>
37 #include <iterator>
38 #include <map>
39 #include <memory>
40 #include <regex>
41 #include <set>
42 #include <string>
43 #include <tuple>
44 #include <utility>
45 #include <vector>
46
47 #include "ExecutionCallback.h"
48 #include "Memory.h"
49 #include "ModelArgumentInfo.h"
50 #include "ServerFlag.h"
51 #include "TypeManager.h"
52
53 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
54 #include <build/version.h>
55 #include <cutils/native_handle.h>
56 #include <nnapi/hal/1.3/Buffer.h>
57 #include <nnapi/hal/Service.h>
58 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
59
60 #ifdef NN_EXPERIMENTAL_FEATURE
61 #include "NeuralNetworksExperimentalFeatures.h"
62 #endif // NN_EXPERIMENTAL_FEATURE
63
64 namespace android {
65 namespace nn {
66 namespace {
67
getRuntimeFeatureLevelVersionHelper()68 Version getRuntimeFeatureLevelVersionHelper() {
69 #if defined(NN_EXPERIMENTAL_FEATURE) && defined(NN_COMPATIBILITY_LIBRARY_BUILD)
70 #error "NN_EXPERIMENTAL_FEATURE is not supported when NN_COMPATIBILITY_LIBRARY_BUILD is defined"
71 #elif defined(NN_EXPERIMENTAL_FEATURE)
72 auto version = kVersionFeatureLevelExperimental;
73 // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
74 // features that are only available in the runtime.
75 version.runtimeOnlyFeatures = true;
76 #elif defined(NN_COMPATIBILITY_LIBRARY_BUILD)
77 auto version = serverFeatureLevelToVersion(kMaxFeatureLevelNum);
78 #else // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
79 auto version = serverFeatureLevelToVersion(getServerFeatureLevelFlag());
80 // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
81 // features that are only available in the runtime.
82 version.runtimeOnlyFeatures = true;
83 #endif // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
84 return version;
85 }
86
getRuntimeFeatureLevelVersion()87 Version getRuntimeFeatureLevelVersion() {
88 static const Version version = getRuntimeFeatureLevelVersionHelper();
89 return version;
90 }
91
getWhetherPlatformTelemetryIsEnabled()92 bool getWhetherPlatformTelemetryIsEnabled() {
93 #if !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
94 // b/287186978, force enable telemetry on the platform NNAPI
95 // return getServerTelemetryEnableFlag();
96 return true;
97 #else // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
98 return false;
99 #endif // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
100 }
101
102 } // namespace
103
104 // A Device with actual underlying driver
105 class DriverDevice : public Device {
106 public:
107 // Create a DriverDevice from a name and a DeviceFactory function.
108 // Returns nullptr on failure.
109 static std::shared_ptr<DriverDevice> create(SharedDevice device);
110
111 // Prefer using DriverDevice::create
112 explicit DriverDevice(SharedDevice device);
113
getName() const114 const std::string& getName() const override { return kInterface->getName(); }
getVersionString() const115 const std::string& getVersionString() const override { return kInterface->getVersionString(); }
getFeatureLevel() const116 Version getFeatureLevel() const override { return kInterface->getFeatureLevel(); }
getType() const117 int32_t getType() const override { return static_cast<int32_t>(kInterface->getType()); }
getSupportedExtensions() const118 const std::vector<Extension>& getSupportedExtensions() const override {
119 return kInterface->getSupportedExtensions();
120 }
121 std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const122 const Capabilities& getCapabilities() const override { return kInterface->getCapabilities(); }
getPerformance(OperandType type) const123 Capabilities::PerformanceInfo getPerformance(OperandType type) const override {
124 return getCapabilities().operandPerformance.lookup(type);
125 }
getRelaxedFloat32toFloat16PerformanceScalar() const126 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
127 return getCapabilities().relaxedFloat32toFloat16PerformanceScalar;
128 }
getRelaxedFloat32toFloat16PerformanceTensor() const129 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
130 return getCapabilities().relaxedFloat32toFloat16PerformanceTensor;
131 }
getIfPerformance() const132 Capabilities::PerformanceInfo getIfPerformance() const override {
133 return getCapabilities().ifPerformance;
134 }
getWhilePerformance() const135 Capabilities::PerformanceInfo getWhilePerformance() const override {
136 return getCapabilities().whilePerformance;
137 }
getNumberOfCacheFilesNeeded() const138 std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
139 return kInterface->getNumberOfCacheFilesNeeded();
140 }
isCachingSupported() const141 bool isCachingSupported() const override {
142 // Caching is supported if either of numModelCache or numDataCache is greater than 0.
143 const auto [numModelCacheFiles, numDataCacheFiles] = getNumberOfCacheFilesNeeded();
144 return numModelCacheFiles > 0 || numDataCacheFiles > 0;
145 }
wait() const146 int wait() const override {
147 auto result = kInterface->wait();
148 if (!result.ok()) {
149 LOG(ERROR) << "DriverDevice::wait error: " << result.error().message;
150 return convertErrorStatusToResultCode(result.error().code);
151 }
152 return ANEURALNETWORKS_NO_ERROR;
153 }
154
155 std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
156 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
157 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
158 const std::optional<CacheToken>& maybeToken,
159 const std::vector<TokenValuePair>& metaData,
160 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
161
162 std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
163 OperandType) const override;
164
165 private:
166 const SharedDevice kInterface;
167
168 GeneralResult<std::vector<bool>> getSupportedOperationsImpl(const MetaModel& metaModel) const;
169 GeneralResult<SharedPreparedModel> prepareModelFromCacheInternal(
170 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
171 const CacheToken& token) const;
172
173 #ifdef NN_DEBUGGABLE
174 // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
175 // 0 - all operations reported by IDevice::getSupportedOperations() supported
176 // 1 - some operations reported by IDevice::getSupportedOperations() supported
177 uint32_t mSupported = 0;
178 #endif // NN_DEBUGGABLE
179 };
180
181 // A RuntimePreparedModel with underlying IPreparedModel instance return by actual driver.
182 class DriverPreparedModel : public RuntimePreparedModel {
183 public:
DriverPreparedModel(const Device * device,const SharedPreparedModel & preparedModel)184 DriverPreparedModel(const Device* device, const SharedPreparedModel& preparedModel)
185 : mDevice(device), mPreparedModel(preparedModel) {
186 CHECK(mDevice != nullptr);
187 CHECK(mPreparedModel != nullptr);
188 }
189
getDevice() const190 const Device* getDevice() const override { return mDevice; }
getInterface() const191 SharedPreparedModel getInterface() const override { return mPreparedModel; }
192
193 std::tuple<int, std::vector<OutputShape>, Timing> execute(
194 const std::vector<ModelArgumentInfo>& inputs,
195 const std::vector<ModelArgumentInfo>& outputs,
196 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
197 MeasureTiming measure, const OptionalTimePoint& deadline,
198 const OptionalDuration& loopTimeoutDuration,
199 const std::vector<TokenValuePair>& metaData) const override;
200
201 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
202 const std::vector<ModelArgumentInfo>& inputs,
203 const std::vector<ModelArgumentInfo>& outputs,
204 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
205 MeasureTiming measure, const OptionalTimePoint& deadline,
206 const OptionalDuration& loopTimeoutDuration,
207 const OptionalDuration& timeoutDurationAfterFence,
208 const std::vector<TokenValuePair>& metaData) const override;
209
210 std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
211 const std::vector<ModelArgumentInfo>& inputs,
212 const std::vector<ModelArgumentInfo>& outputs,
213 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
214 const OptionalDuration& loopTimeoutDuration,
215 const std::vector<TokenValuePair>& metaData) const override;
216
configureExecutionBurst() const217 GeneralResult<SharedBurst> configureExecutionBurst() const override {
218 return mPreparedModel->configureExecutionBurst();
219 }
220
getMemoryPreference() const221 MemoryPreference getMemoryPreference() const override {
222 if (isCompliantVersion(kVersionFeatureLevel5, mDevice->getFeatureLevel())) {
223 return {kDefaultRequestMemoryAlignment, kDefaultRequestMemoryPadding};
224 } else {
225 // We are not able to pass memory padding information to HIDL drivers, so return the
226 // minimum padding.
227 return {kDefaultRequestMemoryAlignment, kMinMemoryPadding};
228 }
229 }
230
231 private:
232 const Device* mDevice;
233 const SharedPreparedModel mPreparedModel;
234 };
235
236 class DriverExecution : public RuntimeExecution {
237 public:
DriverExecution(SharedExecution execution,Request request,std::vector<const RuntimeMemory * > memories,MeasureTiming measure,OptionalDuration loopTimeoutDuration,Version deviceFeatureLevel,const std::vector<TokenValuePair> & metaData)238 DriverExecution(SharedExecution execution, Request request,
239 std::vector<const RuntimeMemory*> memories, MeasureTiming measure,
240 OptionalDuration loopTimeoutDuration, Version deviceFeatureLevel,
241 const std::vector<TokenValuePair>& metaData)
242 : kExecution(std::move(execution)),
243 kRequest(std::move(request)),
244 kMemories(std::move(memories)),
245 kMeasure(measure),
246 kLoopTimeoutDuration(std::move(loopTimeoutDuration)),
247 kDeviceFeatureLevel(deviceFeatureLevel),
248 kMetaData(metaData) {
249 CHECK(kExecution != nullptr);
250 }
251
252 std::tuple<int, std::vector<OutputShape>, Timing> compute(
253 const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
254
255 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
256 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
257 const OptionalDuration& timeoutDurationAfterFence) const override;
258
259 private:
260 const SharedExecution kExecution;
261
262 // For burst execution.
263 const Request kRequest;
264 const std::vector<const RuntimeMemory*> kMemories;
265 const MeasureTiming kMeasure;
266 const OptionalDuration kLoopTimeoutDuration;
267 mutable std::map<const IBurst*, SharedExecution> mCachedBurstExecutions;
268
269 // For fenced execution.
270 const Version kDeviceFeatureLevel;
271
272 // Execution metadata.
273 std::vector<TokenValuePair> kMetaData;
274 };
275
DriverDevice(SharedDevice device)276 DriverDevice::DriverDevice(SharedDevice device) : kInterface(std::move(device)) {
277 CHECK(kInterface != nullptr);
278 #ifdef NN_DEBUGGABLE
279 static const char samplePrefix[] = "sample";
280 if (getName().substr(0, sizeof(samplePrefix) - 1) == samplePrefix) {
281 mSupported = getProp("debug.nn.sample.supported");
282 }
283 #endif // NN_DEBUGGABLE
284 }
285
create(SharedDevice device)286 std::shared_ptr<DriverDevice> DriverDevice::create(SharedDevice device) {
287 if (device == nullptr) {
288 LOG(ERROR) << "DriverDevice::create called with nullptr";
289 return nullptr;
290 }
291
292 return std::make_shared<DriverDevice>(std::move(device));
293 }
294
versionToFeatureLevel(Version::Level versionLevel)295 int64_t DeviceManager::versionToFeatureLevel(Version::Level versionLevel) {
296 switch (versionLevel) {
297 case Version::Level::FEATURE_LEVEL_1:
298 return ANEURALNETWORKS_FEATURE_LEVEL_1;
299 case Version::Level::FEATURE_LEVEL_2:
300 return ANEURALNETWORKS_FEATURE_LEVEL_2;
301 case Version::Level::FEATURE_LEVEL_3:
302 return ANEURALNETWORKS_FEATURE_LEVEL_3;
303 case Version::Level::FEATURE_LEVEL_4:
304 return ANEURALNETWORKS_FEATURE_LEVEL_4;
305 case Version::Level::FEATURE_LEVEL_5:
306 return ANEURALNETWORKS_FEATURE_LEVEL_5;
307 case Version::Level::FEATURE_LEVEL_6:
308 return ANEURALNETWORKS_FEATURE_LEVEL_6;
309 case Version::Level::FEATURE_LEVEL_7:
310 return ANEURALNETWORKS_FEATURE_LEVEL_7;
311 case Version::Level::FEATURE_LEVEL_8:
312 return ANEURALNETWORKS_FEATURE_LEVEL_8;
313 #ifdef NN_EXPERIMENTAL_FEATURE
314 case Version::Level::FEATURE_LEVEL_EXPERIMENTAL:
315 return ANEURALNETWORKS_FEATURE_LEVEL_EXPERIMENTAL;
316 #endif // NN_EXPERIMENTAL_FEATURE
317 }
318 LOG(FATAL) << "Unrecognized version " << versionLevel;
319 return -1;
320 }
321
getSupportedOperationsImpl(const MetaModel & metaModel) const322 GeneralResult<std::vector<bool>> DriverDevice::getSupportedOperationsImpl(
323 const MetaModel& metaModel) const {
324 const auto featureLevel = kInterface->getFeatureLevel();
325 const auto slice = metaModel.getSlice(featureLevel);
326 if (!slice.has_value()) {
327 return NN_ERROR() << "getSlice(" << featureLevel << ") failed";
328 }
329
330 const auto& [sliceModel, slicedModelOperationIndexToModelOperationIndex] = *slice;
331 const std::vector<bool> supported = NN_TRY(kInterface->getSupportedOperations(sliceModel));
332 const uint32_t slicedOperationCount = sliceModel.main.operations.size();
333 if (supported.size() != slicedOperationCount) {
334 return NN_ERROR() << "IDevice::getSupportedOperations returned a vector of length "
335 << supported.size() << " when expecting " << slicedOperationCount;
336 }
337
338 const Model& model = metaModel.getModel();
339 const uint32_t operationCount = model.main.operations.size();
340 std::vector<bool> remappedSupported(operationCount, false);
341 for (size_t i = 0; i < supported.size(); ++i) {
342 if (supported[i]) {
343 remappedSupported[slicedModelOperationIndexToModelOperationIndex(i)] = true;
344 }
345 }
346 return remappedSupported;
347 }
348
getSupportedOperations(const MetaModel & metaModel) const349 std::vector<bool> DriverDevice::getSupportedOperations(const MetaModel& metaModel) const {
350 const Model& model = metaModel.getModel();
351
352 auto result = getSupportedOperationsImpl(metaModel);
353 if (!result.ok()) {
354 LOG(ERROR) << "getSupportedOperations failed with code " << result.error().code << ": "
355 << result.error().message;
356 // Set the supported operation vectors to all false, so we won't use this driver.
357 return std::vector<bool>(model.main.operations.size(), false);
358 }
359
360 std::vector<bool>& supportedOperations = result.value();
361 #ifdef NN_DEBUGGABLE
362 if (mSupported != 1) {
363 return supportedOperations;
364 }
365
366 const uint32_t baseAccumulator = std::hash<std::string>{}(getName());
367 for (size_t operationIndex = 0; operationIndex < supportedOperations.size(); operationIndex++) {
368 if (!supportedOperations[operationIndex]) {
369 continue;
370 }
371
372 uint32_t accumulator = baseAccumulator;
373 const Operation& operation = model.main.operations[operationIndex];
374 accumulator ^= static_cast<uint32_t>(operation.type);
375 auto accumulateOperands = [&model, &accumulator](const std::vector<uint32_t>& operands) {
376 for (uint32_t operandIndex : operands) {
377 const Operand& operand = model.main.operands[operandIndex];
378 accumulator ^= static_cast<uint32_t>(operand.type);
379 accumulator ^= operand.dimensions.size();
380 for (const Dimension& dimension : operand.dimensions) {
381 accumulator ^= dimension;
382 if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY ||
383 operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
384 operand.lifetime == Operand::LifeTime::POINTER) {
385 accumulator ^= 1;
386 }
387 }
388 }
389 };
390 accumulateOperands(operation.inputs);
391 accumulateOperands(operation.outputs);
392 if (accumulator & 1) {
393 supportedOperations[operationIndex] = false;
394 }
395 }
396 #endif // NN_DEBUGGABLE
397
398 return supportedOperations;
399 }
400
401 // Opens a cache file for reading and writing and returns a shared handle.
createCacheHandle(const std::string & filename,bool createIfNotExist)402 static GeneralResult<SharedHandle> createCacheHandle(const std::string& filename,
403 bool createIfNotExist) {
404 auto fd = base::unique_fd(open(filename.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR,
405 S_IRUSR | S_IWUSR));
406 if (!fd.ok()) {
407 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
408 << "Failed to " << (createIfNotExist ? "open or create" : "open") << " cache file "
409 << filename;
410 }
411 return std::make_shared<const Handle>(std::move(fd));
412 }
413
414 // Opens a list of cache files and returns a vector of shared handles. The files
415 // are always opened with both read and write permissions.
createCacheHandleVec(uint32_t numCacheFiles,const std::string & baseFilename,bool createIfNotExist)416 static GeneralResult<std::vector<SharedHandle>> createCacheHandleVec(
417 uint32_t numCacheFiles, const std::string& baseFilename, bool createIfNotExist) {
418 CHECK(numCacheFiles <= kMaxNumberOfCacheFiles);
419 std::vector<SharedHandle> handles;
420 handles.reserve(numCacheFiles);
421 for (uint32_t i = 0; i < numCacheFiles; i++) {
422 std::string filename = baseFilename + std::to_string(i);
423 VLOG(COMPILATION) << "Cache " << i << ": " << filename;
424 handles.push_back(NN_TRY(createCacheHandle(filename, createIfNotExist)));
425 }
426 return handles;
427 }
428
429 // Maps a token to cache file names and returns a pair of vectors of shared
430 // handles to the opened files.
getCacheHandles(const CacheInfo & cacheInfo,const CacheToken & token,const std::pair<uint32_t,uint32_t> & numCacheFiles,bool createIfNotExist)431 static GeneralResult<CacheHandles> getCacheHandles(
432 const CacheInfo& cacheInfo, const CacheToken& token,
433 const std::pair<uint32_t, uint32_t>& numCacheFiles, bool createIfNotExist) {
434 if (const auto* cacheHandles = std::get_if<CacheHandles>(&cacheInfo.variant)) {
435 if (cacheHandles->modelCache.size() != numCacheFiles.first) {
436 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
437 << "Expected " << numCacheFiles.first << " model cache handles, got "
438 << cacheHandles->modelCache.size();
439 }
440 if (cacheHandles->dataCache.size() != numCacheFiles.second) {
441 return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
442 << "Expected " << numCacheFiles.second << " data cache handles, got "
443 << cacheHandles->dataCache.size();
444 }
445 return *cacheHandles;
446 }
447
448 // The filename includes kByteSizeOfCacheToken * 2 characters for token,
449 // and 1 character for model/data cache identifier.
450 std::string filename(kByteSizeOfCacheToken * 2 + 1, '0');
451 for (uint32_t i = 0; i < kByteSizeOfCacheToken; i++) {
452 filename[i * 2] = 'A' + (token[i] & 0x0F);
453 filename[i * 2 + 1] = 'A' + (token[i] >> 4);
454 }
455
456 const auto& cacheDir = std::get<CacheDir>(cacheInfo.variant);
457 CHECK(cacheDir.empty() || cacheDir.back() == '/');
458 std::string cacheFileName = cacheDir + filename;
459 const uint32_t cacheTypeIdentifierIndex = cacheDir.size() + kByteSizeOfCacheToken * 2;
460
461 cacheFileName[cacheTypeIdentifierIndex] = '1';
462 std::vector<SharedHandle> modelCache =
463 NN_TRY(createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist));
464
465 cacheFileName[cacheTypeIdentifierIndex] = '2';
466 std::vector<SharedHandle> dataCache =
467 NN_TRY(createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist));
468
469 return CacheHandles{
470 .modelCache = std::move(modelCache),
471 .dataCache = std::move(dataCache),
472 };
473 }
474
prepareModelFromCacheInternal(const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const CacheToken & token) const475 GeneralResult<SharedPreparedModel> DriverDevice::prepareModelFromCacheInternal(
476 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
477 const CacheToken& token) const {
478 // Get cache files if they exist, otherwise return from the function early.
479 auto cache = NN_TRY(getCacheHandles(cacheInfo, token, kInterface->getNumberOfCacheFilesNeeded(),
480 /*createIfNotExist=*/false));
481 return kInterface->prepareModelFromCache(deadline, cache.modelCache, cache.dataCache, token);
482 }
483
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> & metaData,const std::vector<ExtensionNameAndPrefix> & extensionNameAndPrefix) const484 std::pair<int, std::shared_ptr<RuntimePreparedModel>> DriverDevice::prepareModel(
485 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
486 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
487 const std::optional<CacheToken>& maybeToken, const std::vector<TokenValuePair>& metaData,
488 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const {
489 // Attempt to compile from cache if token is present.
490 if (maybeToken.has_value()) {
491 auto result = prepareModelFromCacheInternal(deadline, cacheInfo, *maybeToken);
492 if (result.has_value()) {
493 LOG(INFO) << "prepareModelFromCache: successfully prepared model from cache";
494 return {ANEURALNETWORKS_NO_ERROR,
495 std::make_shared<DriverPreparedModel>(this, std::move(result).value())};
496 } else {
497 LOG(ERROR) << "prepareModelFromCache failure (" << result.error().code
498 << "): " << result.error().message;
499 }
500 }
501
502 // Get cache files if they exist, otherwise create them.
503 CacheHandles cache;
504 if (maybeToken.has_value()) {
505 auto result =
506 getCacheHandles(cacheInfo, *maybeToken, kInterface->getNumberOfCacheFilesNeeded(),
507 /*createIfNotExist=*/true);
508 if (result.has_value()) {
509 cache = std::move(result).value();
510 } else {
511 LOG(ERROR) << "getCacheHandles failure (" << result.error().code
512 << "): " << result.error().message;
513 }
514 }
515
516 // Get the token if it exists, otherwise get a null token.
517 static constexpr CacheToken kNullToken = {};
518 const CacheToken token = maybeToken.value_or(kNullToken);
519
520 // Fallback to full compilation (possibly with token) if
521 // prepareModelFromCache could not be used or failed.
522 const Model model = makeModel();
523 auto result =
524 kInterface->prepareModel(model, preference, priority, deadline, cache.modelCache,
525 cache.dataCache, token, metaData, extensionNameAndPrefix);
526 if (!result.ok()) {
527 LOG(ERROR) << "IDevice::prepareModel() error: " << result.error().message;
528 return {convertErrorStatusToResultCode(result.error().code), nullptr};
529 }
530 SharedPreparedModel preparedModel = std::move(result).value();
531 CHECK(preparedModel != nullptr)
532 << "IDevice::prepareModel() returned nullptr without error code";
533 return {ANEURALNETWORKS_NO_ERROR,
534 std::make_shared<DriverPreparedModel>(this, std::move(preparedModel))};
535 }
536
allocate(const MemoryDescriptor & desc,OperandType) const537 std::pair<int, std::unique_ptr<RuntimeMemory>> DriverDevice::allocate(const MemoryDescriptor& desc,
538 OperandType) const {
539 const BufferDesc bufferDesc = {.dimensions = desc.dimensions};
540 std::vector<SharedPreparedModel> preparedModels(desc.preparedModels.size());
541 std::transform(desc.preparedModels.begin(), desc.preparedModels.end(), preparedModels.begin(),
542 [](const auto* preparedModel) {
543 const auto versionedPreparedModel = preparedModel->getInterface();
544 CHECK(versionedPreparedModel != nullptr);
545 return versionedPreparedModel;
546 });
547 auto result =
548 kInterface->allocate(bufferDesc, preparedModels, desc.inputRoles, desc.outputRoles);
549 if (!result.ok()) {
550 LOG(ERROR) << "DriverDevice::allocate -- memory allocation on device " << getName()
551 << " failed!";
552 return {convertErrorStatusToResultCode(result.error().code), nullptr};
553 }
554 return MemoryFromDevice::create(std::move(result).value());
555 }
556
createDriverRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)557 static Request createDriverRequest(const std::vector<ModelArgumentInfo>& inputs,
558 const std::vector<ModelArgumentInfo>& outputs,
559 const std::vector<const RuntimeMemory*>& memories) {
560 Request request;
561 request.inputs.reserve(inputs.size());
562 std::transform(inputs.begin(), inputs.end(), std::back_inserter(request.inputs),
563 [](const auto& input) { return input.createRequestArgument(); });
564 request.outputs.reserve(outputs.size());
565 std::transform(outputs.begin(), outputs.end(), std::back_inserter(request.outputs),
566 [](const auto& output) { return output.createRequestArgument(); });
567 request.pools.reserve(memories.size());
568 std::transform(memories.begin(), memories.end(), std::back_inserter(request.pools),
569 [](const RuntimeMemory* memory) { return memory->getMemoryPool(); });
570 return request;
571 }
572
573 // Perform computation on an actual device driver.
574 //
575 // Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
576 // outputs specified by pointers. The input pointer data will be copied to the input pool prior to
577 // execution, and the output pointer data will be copied out from the output pool after the
578 // execution.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst & burstController,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const579 std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
580 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
581 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
582 MeasureTiming measure, const OptionalTimePoint& deadline,
583 const OptionalDuration& loopTimeoutDuration,
584 const std::vector<TokenValuePair>& metaData) const {
585 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
586
587 auto request = createDriverRequest(inputs, outputs, memories);
588
589 NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::execute::execute");
590
591 ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> result;
592
593 // compute using burst if present, otherwise compute from IPreparedModel
594 const bool burstCompute = (burstController != nullptr);
595 if (burstCompute) {
596 for (const RuntimeMemory* memory : memories) {
597 const auto pool = memory->getMemoryPool();
598 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
599 auto cacheHold = burstController->cacheMemory(*maybeMemory);
600 memory->hold(cacheHold);
601 }
602 }
603
604 VLOG(EXECUTION) << "Before burstController->execute() " << SHOW_IF_DEBUG(request);
605 result = burstController->execute(request, measure, deadline, loopTimeoutDuration, metaData,
606 TypeManager::get()->getExtensionNameAndPrefix(metaData));
607 } else {
608 result = mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
609 TypeManager::get()->getExtensionNameAndPrefix(metaData));
610 }
611
612 int n = ANEURALNETWORKS_OP_FAILED;
613 std::vector<OutputShape> outputShapes;
614 Timing timing;
615
616 if (result.ok()) {
617 n = ANEURALNETWORKS_NO_ERROR;
618 std::tie(outputShapes, timing) = std::move(result).value();
619 } else {
620 auto [message, code, returnedOutputShapes] = std::move(result).error();
621 VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
622 LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
623 << "::execute(...) error: " << message;
624 n = convertErrorStatusToResultCode(code);
625 if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
626 outputShapes = std::move(returnedOutputShapes);
627 }
628 return {n, std::move(outputShapes), timing};
629 }
630
631 VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
632 return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
633 }
634
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & timeoutDurationAfterFence,const std::vector<TokenValuePair> & metaData) const635 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverPreparedModel::executeFenced(
636 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
637 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
638 MeasureTiming measure, const OptionalTimePoint& deadline,
639 const OptionalDuration& loopTimeoutDuration,
640 const OptionalDuration& timeoutDurationAfterFence,
641 const std::vector<TokenValuePair>& metaData) const {
642 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
643 CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
644
645 auto request = createDriverRequest(inputs, outputs, memories);
646
647 NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::executeFenced");
648
649 std::vector<SyncFence> waitForHandles;
650 waitForHandles.reserve(waitFor.size());
651 for (int fd : waitFor) {
652 int dupFd = dup(fd);
653 if (dupFd < 0) {
654 LOG(ERROR) << "Unable to dup the file descriptor";
655 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
656 }
657 waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
658 }
659
660 SyncFence syncFence = SyncFence::createAsSignaled();
661 ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
662 Timing timing = {};
663 if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, mDevice->getFeatureLevel())) {
664 auto result = mPreparedModel->executeFenced(
665 request, waitForHandles, measure, deadline, loopTimeoutDuration,
666 timeoutDurationAfterFence, metaData,
667 TypeManager::get()->getExtensionNameAndPrefix(metaData));
668 if (!result.ok()) {
669 LOG(ERROR) << "IPreparedModel::executeFenced() error: " << result.error().message;
670 VLOG(EXECUTION) << "**executeFenced failed**";
671 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
672 }
673 std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
674 } else {
675 // Fallback to synchronous execution if executeFenced is not supported.
676 // First wait for all sync fences to be ready.
677 LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
678 for (const auto& fence : waitForHandles) {
679 if (!fence.hasFd() || fence.getFd() < 0) {
680 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
681 }
682 auto r = fence.syncWait({/* no timeout */});
683 if (r != SyncFence::FenceState::SIGNALED) {
684 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
685 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
686 }
687 }
688 auto result =
689 mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
690 TypeManager::get()->getExtensionNameAndPrefix(metaData));
691 if (!result.ok()) {
692 LOG(ERROR) << "IPreparedModel::execute() error: " << result.error().message;
693 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
694 }
695 std::tie(std::ignore, timing) = result.value();
696 }
697
698 int syncFenceFd = -1;
699 if (syncFence.hasFd()) {
700 syncFenceFd = dup(syncFence.getFd());
701 if (syncFenceFd < 0) {
702 LOG(ERROR) << "Failed to dup the file descriptor";
703 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
704 }
705 }
706
707 VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
708 return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
709 }
710
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming measure,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const711 std::pair<int, std::shared_ptr<RuntimeExecution>> DriverPreparedModel::createReusableExecution(
712 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
713 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
714 const OptionalDuration& loopTimeoutDuration,
715 const std::vector<TokenValuePair>& metaData) const {
716 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::createReusableExecution");
717
718 auto request = createDriverRequest(inputs, outputs, memories);
719 auto result = mPreparedModel->createReusableExecution(
720 request, measure, loopTimeoutDuration, metaData,
721 TypeManager::get()->getExtensionNameAndPrefix(metaData));
722 if (!result.ok()) {
723 LOG(ERROR) << "IPreparedModel::createReusableExecution() error: " << result.error().message;
724 const int n = convertErrorStatusToResultCode(result.error().code);
725 return {n, nullptr};
726 }
727 auto execution = std::make_shared<DriverExecution>(
728 std::move(result).value(), std::move(request), memories, measure, loopTimeoutDuration,
729 mDevice->getFeatureLevel(), metaData);
730 return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
731 }
732
compute(const SharedBurst & burstController,const OptionalTimePoint & deadline) const733 std::tuple<int, std::vector<OutputShape>, Timing> DriverExecution::compute(
734 const SharedBurst& burstController, const OptionalTimePoint& deadline) const {
735 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::compute");
736
737 // compute using burst if present, otherwise compute from IPreparedModel
738 SharedExecution execution;
739 const bool burstCompute = (burstController != nullptr);
740 if (burstCompute) {
741 // create a reusable burst execution if the controller is not seen before
742 auto burstExecution = mCachedBurstExecutions.find(burstController.get());
743 if (burstExecution == mCachedBurstExecutions.end()) {
744 for (const RuntimeMemory* memory : kMemories) {
745 const auto pool = memory->getMemoryPool();
746 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
747 auto cacheHold = burstController->cacheMemory(*maybeMemory);
748 memory->hold(cacheHold);
749 }
750 }
751 auto createResult = burstController->createReusableExecution(
752 kRequest, kMeasure, kLoopTimeoutDuration, kMetaData,
753 TypeManager::get()->getExtensionNameAndPrefix(kMetaData));
754 if (!createResult.ok()) {
755 LOG(ERROR) << "IBurst::createReusableExecution() error: "
756 << createResult.error().message;
757 const int n = convertErrorStatusToResultCode(createResult.error().code);
758 return {n, {}, {}};
759 }
760 execution = std::move(createResult).value();
761 mCachedBurstExecutions.emplace(burstController.get(), execution);
762 } else {
763 execution = burstExecution->second;
764 }
765 VLOG(EXECUTION) << "Before mBurstExecution->compute() " << SHOW_IF_DEBUG(kRequest);
766 } else {
767 execution = kExecution;
768 }
769
770 CHECK(execution != nullptr);
771 auto result = execution->compute(deadline);
772 if (!result.ok()) {
773 auto [message, code, returnedOutputShapes] = std::move(result).error();
774 int n = convertErrorStatusToResultCode(code);
775 VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
776 LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
777 << "::execute(...) error: " << message;
778 if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
779 return {n, std::move(returnedOutputShapes), {}};
780 }
781 return {n, {}, {}};
782 }
783
784 VLOG(EXECUTION) << "DriverExecution::compute completed";
785 auto [outputShapes, timing] = std::move(result).value();
786 return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
787 }
788
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & timeoutDurationAfterFence) const789 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverExecution::computeFenced(
790 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
791 const OptionalDuration& timeoutDurationAfterFence) const {
792 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::computeFenced");
793 CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
794
795 std::vector<SyncFence> waitForHandles;
796 waitForHandles.reserve(waitFor.size());
797 for (int fd : waitFor) {
798 int dupFd = dup(fd);
799 if (dupFd < 0) {
800 LOG(ERROR) << "Unable to dup the file descriptor";
801 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
802 }
803 waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
804 }
805
806 SyncFence syncFence = SyncFence::createAsSignaled();
807 ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
808 Timing timing = {};
809 if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, kDeviceFeatureLevel)) {
810 auto result =
811 kExecution->computeFenced(waitForHandles, deadline, timeoutDurationAfterFence);
812 if (!result.ok()) {
813 LOG(ERROR) << "IExecution::computeFenced() error: " << result.error().message;
814 VLOG(EXECUTION) << "**computeFenced failed**";
815 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
816 }
817 std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
818 } else {
819 // Fallback to synchronous execution if computeFenced is not supported.
820 // First wait for all sync fences to be ready.
821 LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
822 for (const auto& fence : waitForHandles) {
823 if (!fence.hasFd() || fence.getFd() < 0) {
824 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
825 }
826 auto r = fence.syncWait({/* no timeout */});
827 if (r != SyncFence::FenceState::SIGNALED) {
828 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
829 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
830 }
831 }
832 auto result = kExecution->compute(deadline);
833 if (!result.ok()) {
834 LOG(ERROR) << "IExecution::compute() error: " << result.error().message;
835 return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
836 }
837 std::tie(std::ignore, timing) = result.value();
838 }
839
840 int syncFenceFd = -1;
841 if (syncFence.hasFd()) {
842 syncFenceFd = dup(syncFence.getFd());
843 if (syncFenceFd < 0) {
844 LOG(ERROR) << "Failed to dup the file descriptor";
845 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
846 }
847 }
848
849 VLOG(EXECUTION) << "DriverExecution::computeFenced completed";
850 return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
851 }
852
createCpuCapabilities()853 static Capabilities createCpuCapabilities() {
854 constexpr Capabilities::PerformanceInfo kPerf = {.execTime = 1.0f, .powerUsage = 1.0f};
855 return makeCapabilities(kPerf, kPerf, kPerf);
856 }
857
858 // A special abstracted device for the CPU. Only one instance of this class will exist.
859 // Use get() to retrieve it.
860 class CpuDevice : public Device {
861 public:
862 // Returns the singleton CPU fallback device.
get()863 static std::shared_ptr<CpuDevice> get() {
864 static std::shared_ptr<CpuDevice> instance(new CpuDevice);
865 return instance;
866 }
867
getName() const868 const std::string& getName() const override { return kName; }
getVersionString() const869 const std::string& getVersionString() const override { return kVersionString; }
getFeatureLevel() const870 Version getFeatureLevel() const override { return kVersion; }
getType() const871 int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
getSupportedExtensions() const872 const std::vector<Extension>& getSupportedExtensions() const override {
873 return kSupportedExtensions;
874 }
875 std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const876 const Capabilities& getCapabilities() const override { return kCapabilities; }
getPerformance(OperandType) const877 Capabilities::PerformanceInfo getPerformance(OperandType) const override {
878 return kPerformance;
879 }
getRelaxedFloat32toFloat16PerformanceScalar() const880 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
881 return kPerformance;
882 }
getRelaxedFloat32toFloat16PerformanceTensor() const883 Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
884 return kPerformance;
885 }
getIfPerformance() const886 Capabilities::PerformanceInfo getIfPerformance() const override { return kPerformance; }
getWhilePerformance() const887 Capabilities::PerformanceInfo getWhilePerformance() const override { return kPerformance; }
getNumberOfCacheFilesNeeded() const888 std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
889 return {/*numModelCache=*/0, /*numDataCache=*/0};
890 }
isCachingSupported() const891 bool isCachingSupported() const override { return false; }
wait() const892 int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
893
894 std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
895 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
896 const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
897 const std::optional<CacheToken>& maybeToken,
898 const std::vector<TokenValuePair>& metaData,
899 const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
900
901 std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
902 OperandType type) const override;
903
904 private:
905 CpuDevice() = default;
906 const Version kVersion = getRuntimeFeatureLevelVersion();
907 const std::string kName = "nnapi-reference";
908 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
909 const std::string kVersionString = build::GetBuildNumber();
910 #else
911 const std::string kVersionString = "UNKNOWN";
912 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
913 // Since the performance is a ratio compared to the CPU performance,
914 // by definition the performance of the CPU is 1.0.
915 const Capabilities::PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
916 const Capabilities kCapabilities = createCpuCapabilities();
917 const std::vector<Extension> kSupportedExtensions{/* No extensions. */};
918 };
919
920 // A special abstracted RuntimePreparedModel for the CPU, constructed by CpuDevice.
921 class CpuPreparedModel : public RuntimePreparedModel {
922 public:
923 // Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
924 // a prepared model object if successfully created. Returns an error code
925 // and nullptr otherwise.
926 static std::pair<int, std::shared_ptr<RuntimePreparedModel>> create(Model model);
927
getDevice() const928 const Device* getDevice() const override { return CpuDevice::get().get(); }
getInterface() const929 SharedPreparedModel getInterface() const override { return nullptr; }
930
931 std::tuple<int, std::vector<OutputShape>, Timing> execute(
932 const std::vector<ModelArgumentInfo>& inputs,
933 const std::vector<ModelArgumentInfo>& outputs,
934 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
935 MeasureTiming measure, const OptionalTimePoint& deadline,
936 const OptionalDuration& loopTimeoutDuration,
937 const std::vector<TokenValuePair>& metaData) const override;
938
configureExecutionBurst() const939 GeneralResult<SharedBurst> configureExecutionBurst() const override { return nullptr; }
940
941 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
942 const std::vector<ModelArgumentInfo>& inputs,
943 const std::vector<ModelArgumentInfo>& outputs,
944 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
945 MeasureTiming measure, const OptionalTimePoint& deadline,
946 const OptionalDuration& loopTimeoutDuration,
947 const OptionalDuration& timeoutDurationAfterFence,
948 const std::vector<TokenValuePair>& metaData) const override;
949
950 std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
951 const std::vector<ModelArgumentInfo>& inputs,
952 const std::vector<ModelArgumentInfo>& outputs,
953 const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
954 const OptionalDuration& loopTimeoutDuration,
955 const std::vector<TokenValuePair>& metaData) const override;
956
getMemoryPreference() const957 MemoryPreference getMemoryPreference() const override {
958 return {kPreferredAlignment, kPreferredPadding};
959 }
960
961 // Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model,std::vector<RunTimePoolInfo> poolInfos)962 CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
963 : mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
964
getModel() const965 const Model& getModel() const { return mModel; }
getModelPoolInfos() const966 const std::vector<RunTimePoolInfo>& getModelPoolInfos() const { return mModelPoolInfos; }
967
968 private:
969 // TFLite kernels prefers 64 bytes for padding and alignment.
970 static constexpr uint32_t kPreferredAlignment = 64;
971 static constexpr uint32_t kPreferredPadding = 64;
972
973 const Model mModel;
974 const std::vector<RunTimePoolInfo> mModelPoolInfos;
975 };
976
977 class CpuExecution : public RuntimeExecution {
978 public:
CpuExecution(const CpuPreparedModel & preparedModel,Request request,std::vector<RunTimePoolInfo> requestPoolInfos,OptionalDuration loopTimeoutDuration)979 CpuExecution(const CpuPreparedModel& preparedModel, Request request,
980 std::vector<RunTimePoolInfo> requestPoolInfos,
981 OptionalDuration loopTimeoutDuration)
982 : kPreparedModel(preparedModel),
983 kRequest(std::move(request)),
984 kRequestPoolInfos(std::move(requestPoolInfos)),
985 kLoopTimeoutDuration(std::move(loopTimeoutDuration)) {}
986
987 std::tuple<int, std::vector<OutputShape>, Timing> compute(
988 const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
989
990 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
991 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
992 const OptionalDuration& timeoutDurationAfterFence) const override;
993
994 private:
995 const CpuPreparedModel& kPreparedModel;
996 Request kRequest;
997 std::vector<RunTimePoolInfo> kRequestPoolInfos;
998 const OptionalDuration kLoopTimeoutDuration;
999 };
1000
getSupportedOperations(const MetaModel & metaModel) const1001 std::vector<bool> CpuDevice::getSupportedOperations(const MetaModel& metaModel) const {
1002 const Model& model = metaModel.getModel();
1003 const size_t count = model.main.operations.size();
1004 std::vector<bool> result(count, false);
1005 for (size_t i = 0; i < count; i++) {
1006 // TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
1007 // We may want to use the slicer for CpuDevice just as we do for
1008 // DriverDevice.
1009 OperationType operationType = model.main.operations[i].type;
1010 result[i] = !isExtension(operationType) && operationType != OperationType::OEM_OPERATION;
1011 }
1012 return result;
1013 }
1014
1015 template <typename Type>
validateAndCheckCompliance(const Type & object)1016 static Result<void> validateAndCheckCompliance(const Type& object) {
1017 const auto version = NN_TRY(validate(object));
1018 if (!isCompliantVersion(version, DeviceManager::get()->getRuntimeVersion())) {
1019 return NN_ERROR() << "Object than is newer what is allowed. Version needed: " << version
1020 << ", current runtime version supported: "
1021 << DeviceManager::get()->getRuntimeVersion();
1022 }
1023 return {};
1024 }
1025
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo &,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> &,const std::vector<ExtensionNameAndPrefix> &) const1026 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuDevice::prepareModel(
1027 const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
1028 const OptionalTimePoint& deadline, const CacheInfo& /*cacheInfo*/,
1029 const std::optional<CacheToken>& maybeToken,
1030 const std::vector<TokenValuePair>& /*metaData*/,
1031 const std::vector<ExtensionNameAndPrefix>& /*extensionNameAndPrefix*/) const {
1032 CHECK(!maybeToken.has_value())
1033 << "Should never call prepareModel with cache information on CpuDevice";
1034
1035 const Model model = makeModel();
1036 if (auto result = validateAndCheckCompliance(model); !result.ok()) {
1037 LOG(ERROR) << "Invalid Model: " << result.error();
1038 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1039 }
1040 if (auto result = validateAndCheckCompliance(preference); !result.ok()) {
1041 LOG(ERROR) << "Invalid ExecutionPreference: " << result.error();
1042 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1043 }
1044 if (auto result = validateAndCheckCompliance(priority); !result.ok()) {
1045 LOG(ERROR) << "Invalid Priority: " << result.error();
1046 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1047 }
1048 if (hasDeadlinePassed(deadline)) {
1049 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
1050 }
1051
1052 return CpuPreparedModel::create(model);
1053 }
1054
allocate(const MemoryDescriptor & desc,OperandType type) const1055 std::pair<int, std::unique_ptr<RuntimeMemory>> CpuDevice::allocate(const MemoryDescriptor& desc,
1056 OperandType type) const {
1057 uint32_t size = TypeManager::get()->getSizeOfData(type, desc.dimensions);
1058 if (size == 0) {
1059 LOG(ERROR) << "CpuDevice::allocate -- does not support unknown dimensions.";
1060 return {ANEURALNETWORKS_OP_FAILED, nullptr};
1061 }
1062 return MemoryAshmem::create(size);
1063 }
1064
create(Model model)1065 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuPreparedModel::create(Model model) {
1066 std::vector<RunTimePoolInfo> poolInfos;
1067 if (!setRunTimePoolInfosFromCanonicalMemories(&poolInfos, model.pools)) {
1068 return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
1069 }
1070
1071 std::shared_ptr<RuntimePreparedModel> preparedModel =
1072 std::make_shared<CpuPreparedModel>(std::move(model), std::move(poolInfos));
1073 return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
1074 }
1075
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration)1076 static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
1077 const Model& model, const Request& request,
1078 const std::vector<RunTimePoolInfo>& modelPoolInfos,
1079 const std::vector<RunTimePoolInfo>& requestPoolInfos, const OptionalTimePoint& deadline,
1080 const OptionalDuration& loopTimeoutDuration) {
1081 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1082 CpuExecutor executor;
1083 if (loopTimeoutDuration.has_value()) {
1084 executor.setLoopTimeout(loopTimeoutDuration->count());
1085 }
1086 if (deadline.has_value()) {
1087 executor.setDeadline(*deadline);
1088 }
1089 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1090 const auto& outputShapes = executor.getOutputShapes();
1091 return {err, outputShapes, {}};
1092 }
1093
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & duration,const std::vector<TokenValuePair> &) const1094 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuPreparedModel::executeFenced(
1095 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1096 const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
1097 MeasureTiming measure, const OptionalTimePoint& deadline,
1098 const OptionalDuration& loopTimeoutDuration, const OptionalDuration& duration,
1099 const std::vector<TokenValuePair>& /*metaData*/) const {
1100 VLOG(EXECUTION)
1101 << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
1102 for (int syncFd : waitFor) {
1103 if (syncFd > 0) {
1104 auto r = syncWait(syncFd, -1);
1105 if (r != FenceState::SIGNALED) {
1106 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1107 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1108 }
1109 }
1110 }
1111
1112 // Update deadline if the timeout duration is closer than the deadline.
1113 auto closestDeadline = deadline;
1114 if (duration.has_value()) {
1115 const auto timeoutDurationDeadline = makeDeadline(*duration);
1116 if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1117 closestDeadline = timeoutDurationDeadline;
1118 }
1119 }
1120
1121 const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
1122 closestDeadline, loopTimeoutDuration, {});
1123 return {result, -1, nullptr, timing};
1124 }
1125
createCpuRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)1126 static std::tuple<int, Request, std::vector<RunTimePoolInfo>> createCpuRequest(
1127 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1128 const std::vector<const RuntimeMemory*>& memories) {
1129 std::vector<RunTimePoolInfo> requestPoolInfos;
1130 requestPoolInfos.reserve(memories.size());
1131 for (const RuntimeMemory* mem : memories) {
1132 if (std::optional<RunTimePoolInfo> poolInfo = mem->getRunTimePoolInfo()) {
1133 requestPoolInfos.emplace_back(*poolInfo);
1134 } else {
1135 return {ANEURALNETWORKS_UNMAPPABLE, {}, {}};
1136 }
1137 }
1138 // Create as many pools as there are input / output.
1139 auto fixPointerArguments =
1140 [&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
1141 std::vector<DataLocation> ptrArgsLocations;
1142 for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
1143 if (argumentInfo.state() == ModelArgumentInfo::POINTER) {
1144 ptrArgsLocations.push_back(
1145 {.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
1146 .offset = 0,
1147 .length = argumentInfo.length(),
1148 .padding = argumentInfo.padding()});
1149 requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1150 static_cast<uint8_t*>(argumentInfo.buffer())));
1151 }
1152 }
1153 return ptrArgsLocations;
1154 };
1155 const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
1156 const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
1157
1158 Request request;
1159 request.inputs = createRequestArguments(inputs, inputPtrArgsLocations);
1160 request.outputs = createRequestArguments(outputs, outputPtrArgsLocations);
1161 return {ANEURALNETWORKS_NO_ERROR, std::move(request), std::move(requestPoolInfos)};
1162 }
1163
1164 // Perform computation on NNAPI CPU reference implementation.
1165 //
1166 // Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
1167 // same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
1168 // there are input/output in this method to avoid data copying.
1169 //
1170 // Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst &,MeasureTiming,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1171 std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
1172 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1173 const std::vector<const RuntimeMemory*>& memories, const SharedBurst& /*burstController*/,
1174 MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
1175 const OptionalDuration& loopTimeoutDuration,
1176 const std::vector<TokenValuePair>& /*metaData*/) const {
1177 if (hasDeadlinePassed(deadline)) {
1178 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1179 }
1180
1181 int nCreateRequest;
1182 Request request;
1183 std::vector<RunTimePoolInfo> requestPoolInfos;
1184 std::tie(nCreateRequest, request, requestPoolInfos) =
1185 createCpuRequest(inputs, outputs, memories);
1186 if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1187 return {nCreateRequest, {}, {}};
1188 }
1189
1190 if (!DeviceManager::get()->syncExecCpu()) {
1191 // TODO: use a thread pool
1192 // TODO(mikie): this could have NNTRACE so we could measure the overhead
1193 // of spinning up a new thread.
1194 std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1195 std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
1196 result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1197 loopTimeoutDuration);
1198 }).join();
1199 return result;
1200 }
1201
1202 return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1203 loopTimeoutDuration);
1204 }
1205
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1206 std::pair<int, std::shared_ptr<RuntimeExecution>> CpuPreparedModel::createReusableExecution(
1207 const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1208 const std::vector<const RuntimeMemory*>& memories, MeasureTiming /*measure*/,
1209 const OptionalDuration& loopTimeoutDuration,
1210 const std::vector<TokenValuePair>& /*metaData*/) const {
1211 auto [nCreateRequest, request, requestPoolInfos] = createCpuRequest(inputs, outputs, memories);
1212 if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1213 return {nCreateRequest, nullptr};
1214 }
1215 auto execution = std::make_shared<CpuExecution>(
1216 *this, std::move(request), std::move(requestPoolInfos), loopTimeoutDuration);
1217 return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
1218 }
1219
compute(const SharedBurst &,const OptionalTimePoint & deadline) const1220 std::tuple<int, std::vector<OutputShape>, Timing> CpuExecution::compute(
1221 const SharedBurst& /*burstController*/, const OptionalTimePoint& deadline) const {
1222 if (hasDeadlinePassed(deadline)) {
1223 return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1224 }
1225
1226 if (!DeviceManager::get()->syncExecCpu()) {
1227 // TODO: use a thread pool
1228 // TODO(mikie): this could have NNTRACE so we could measure the overhead
1229 // of spinning up a new thread.
1230 std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1231 std::thread([this, &deadline, &result] {
1232 result = computeOnCpu(kPreparedModel.getModel(), kRequest,
1233 kPreparedModel.getModelPoolInfos(), kRequestPoolInfos, deadline,
1234 kLoopTimeoutDuration);
1235 }).join();
1236 return result;
1237 }
1238
1239 return computeOnCpu(kPreparedModel.getModel(), kRequest, kPreparedModel.getModelPoolInfos(),
1240 kRequestPoolInfos, deadline, kLoopTimeoutDuration);
1241 }
1242
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & duration) const1243 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuExecution::computeFenced(
1244 const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
1245 const OptionalDuration& duration) const {
1246 VLOG(EXECUTION)
1247 << "CpuExecution::computeFenced wait for sync fences to signal before execution";
1248 for (int syncFd : waitFor) {
1249 if (syncFd > 0) {
1250 auto r = syncWait(syncFd, -1);
1251 if (r != FenceState::SIGNALED) {
1252 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1253 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1254 }
1255 }
1256 }
1257
1258 // Update deadline if the timeout duration is closer than the deadline.
1259 auto closestDeadline = deadline;
1260 if (duration.has_value()) {
1261 const auto timeoutDurationDeadline = makeDeadline(*duration);
1262 if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1263 closestDeadline = timeoutDurationDeadline;
1264 }
1265 }
1266
1267 const auto [result, outputShapes, timing] = compute(nullptr, closestDeadline);
1268 return {result, -1, nullptr, timing};
1269 }
1270
getRuntimeFeatureLevel() const1271 int64_t DeviceManager::getRuntimeFeatureLevel() const {
1272 return versionToFeatureLevel(mRuntimeVersion.level);
1273 }
1274
get()1275 DeviceManager* DeviceManager::get() {
1276 static DeviceManager manager;
1277 return &manager;
1278 }
1279
getCpuDevice()1280 std::shared_ptr<Device> DeviceManager::getCpuDevice() {
1281 return CpuDevice::get();
1282 }
1283
forTest_makeDriverDevice(const SharedDevice & device)1284 std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const SharedDevice& device) {
1285 VLOG(MANAGER) << "forTest_makeDriverDevice(" << device->getName() << ")";
1286 const auto driverDevice = DriverDevice::create(device);
1287 CHECK(driverDevice != nullptr);
1288 return driverDevice;
1289 }
1290
1291 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
getDriverDevices(Version::Level maxFeatureLevelAllowed)1292 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1293 [[maybe_unused]] Version::Level maxFeatureLevelAllowed) {
1294 #ifdef __ANDROID__
1295 auto devices = hardware::neuralnetworks::service::getDevices(maxFeatureLevelAllowed);
1296
1297 std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1298 driverDevices.reserve(devices.size());
1299 for (auto& device : devices) {
1300 driverDevices.push_back(DriverDevice::create(std::move(device)));
1301 }
1302 return driverDevices;
1303 #else // __ANDROID__
1304 return {};
1305 #endif // __ANDROID__
1306 }
1307 #else
getDriverDevices(Version::Level)1308 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1309 Version::Level /*maxFeatureLevelAllowed*/) {
1310 auto devices = getDevices();
1311 std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1312 driverDevices.reserve(devices.size());
1313 for (auto& device : devices) {
1314 driverDevices.push_back(DriverDevice::create(std::move(device)));
1315 }
1316 return driverDevices;
1317 }
1318 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
1319
findAvailableDevices()1320 void DeviceManager::findAvailableDevices() {
1321 VLOG(MANAGER) << "findAvailableDevices";
1322
1323 #ifdef NN_DEBUGGABLE
1324 // debug.nn.enabled-devices defines a regex pattern. For all available driver devices, only the
1325 // ones with name matching the pattern are enabled. Driver devices with unmatched names are
1326 // ignored. If this property is not set, all available driver devices are enabled by default.
1327 // This filter only applies to driver devices. nnapi-reference is always enabled.
1328 std::string patternStr = base::GetProperty("debug.nn.enabled-devices", ".*");
1329 LOG(INFO) << "Enabled devices: " << patternStr;
1330 const std::regex pattern(patternStr);
1331 #endif // NN_DEBUGGABLE
1332
1333 // register driver devices
1334 auto driverDevices = getDriverDevices(mRuntimeVersion.level);
1335 for (auto& driverDevice : driverDevices) {
1336 #ifdef NN_DEBUGGABLE
1337 if (!std::regex_match(driverDevice->getName(), pattern)) {
1338 LOG(INFO) << "Ignored interface " << driverDevice->getName()
1339 << " (version = " << driverDevice->getVersionString() << ")";
1340 continue;
1341 }
1342 #endif // NN_DEBUGGABLE
1343 LOG(INFO) << "Found interface " << driverDevice->getName()
1344 << " (version = " << driverDevice->getVersionString() << ")";
1345 mDevices.push_back(std::move(driverDevice));
1346 }
1347
1348 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
1349 // register CPU fallback device
1350 mDevices.push_back(CpuDevice::get());
1351 mDevicesCpuOnly.push_back(CpuDevice::get());
1352 #endif // NN_COMPATIBILITY_LIBRARY_BUILD
1353 }
1354
registerDevice(const SharedDevice & device)1355 void DeviceManager::registerDevice(const SharedDevice& device) {
1356 if (auto driverDevice = DriverDevice::create(device)) {
1357 mDevices.push_back(std::move(driverDevice));
1358 }
1359 }
1360
DeviceManager()1361 DeviceManager::DeviceManager() {
1362 VLOG(MANAGER) << "DeviceManager::DeviceManager";
1363 mRuntimeVersion = getRuntimeFeatureLevelVersion();
1364 mIsPlatformTelemetryEnabled = getWhetherPlatformTelemetryIsEnabled();
1365 findAvailableDevices();
1366 #ifdef NN_DEBUGGABLE
1367 mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
1368 mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
1369 mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
1370 mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
1371 mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
1372 #endif // NN_DEBUGGABLE
1373 }
1374
1375 } // namespace nn
1376 } // namespace android
1377