1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "Manager"
18 
19 #include "Manager.h"
20 
21 #include <CpuExecutor.h>
22 #include <LegacyUtils.h>
23 #include <MetaModel.h>
24 #include <Tracing.h>
25 #include <android-base/properties.h>
26 #include <nnapi/IBurst.h>
27 #include <nnapi/IDevice.h>
28 #include <nnapi/IExecution.h>
29 #include <nnapi/IPreparedModel.h>
30 #include <nnapi/SharedMemory.h>
31 #include <nnapi/TypeUtils.h>
32 #include <nnapi/Types.h>
33 #include <nnapi/Validation.h>
34 
35 #include <algorithm>
36 #include <functional>
37 #include <iterator>
38 #include <map>
39 #include <memory>
40 #include <regex>
41 #include <set>
42 #include <string>
43 #include <tuple>
44 #include <utility>
45 #include <vector>
46 
47 #include "ExecutionCallback.h"
48 #include "Memory.h"
49 #include "ModelArgumentInfo.h"
50 #include "ServerFlag.h"
51 #include "TypeManager.h"
52 
53 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
54 #include <build/version.h>
55 #include <cutils/native_handle.h>
56 #include <nnapi/hal/1.3/Buffer.h>
57 #include <nnapi/hal/Service.h>
58 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
59 
60 #ifdef NN_EXPERIMENTAL_FEATURE
61 #include "NeuralNetworksExperimentalFeatures.h"
62 #endif  // NN_EXPERIMENTAL_FEATURE
63 
64 namespace android {
65 namespace nn {
66 namespace {
67 
getRuntimeFeatureLevelVersionHelper()68 Version getRuntimeFeatureLevelVersionHelper() {
69 #if defined(NN_EXPERIMENTAL_FEATURE) && defined(NN_COMPATIBILITY_LIBRARY_BUILD)
70 #error "NN_EXPERIMENTAL_FEATURE is not supported when NN_COMPATIBILITY_LIBRARY_BUILD is defined"
71 #elif defined(NN_EXPERIMENTAL_FEATURE)
72     auto version = kVersionFeatureLevelExperimental;
73     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
74     // features that are only available in the runtime.
75     version.runtimeOnlyFeatures = true;
76 #elif defined(NN_COMPATIBILITY_LIBRARY_BUILD)
77     auto version = serverFeatureLevelToVersion(kMaxFeatureLevelNum);
78 #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
79     auto version = serverFeatureLevelToVersion(getServerFeatureLevelFlag());
80     // Enable "runtimeOnlyFeatures" to indicate that the runtime feature level version supports
81     // features that are only available in the runtime.
82     version.runtimeOnlyFeatures = true;
83 #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
84     return version;
85 }
86 
getRuntimeFeatureLevelVersion()87 Version getRuntimeFeatureLevelVersion() {
88     static const Version version = getRuntimeFeatureLevelVersionHelper();
89     return version;
90 }
91 
getWhetherPlatformTelemetryIsEnabled()92 bool getWhetherPlatformTelemetryIsEnabled() {
93 #if !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
94     // b/287186978, force enable telemetry on the platform NNAPI
95     // return getServerTelemetryEnableFlag();
96     return true;
97 #else   // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
98     return false;
99 #endif  // !defined(NN_COMPATIBILITY_LIBRARY_BUILD) && !defined(NN_EXPERIMENTAL_FEATURE)
100 }
101 
102 }  // namespace
103 
104 // A Device with actual underlying driver
105 class DriverDevice : public Device {
106    public:
107     // Create a DriverDevice from a name and a DeviceFactory function.
108     // Returns nullptr on failure.
109     static std::shared_ptr<DriverDevice> create(SharedDevice device);
110 
111     // Prefer using DriverDevice::create
112     explicit DriverDevice(SharedDevice device);
113 
getName() const114     const std::string& getName() const override { return kInterface->getName(); }
getVersionString() const115     const std::string& getVersionString() const override { return kInterface->getVersionString(); }
getFeatureLevel() const116     Version getFeatureLevel() const override { return kInterface->getFeatureLevel(); }
getType() const117     int32_t getType() const override { return static_cast<int32_t>(kInterface->getType()); }
getSupportedExtensions() const118     const std::vector<Extension>& getSupportedExtensions() const override {
119         return kInterface->getSupportedExtensions();
120     }
121     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const122     const Capabilities& getCapabilities() const override { return kInterface->getCapabilities(); }
getPerformance(OperandType type) const123     Capabilities::PerformanceInfo getPerformance(OperandType type) const override {
124         return getCapabilities().operandPerformance.lookup(type);
125     }
getRelaxedFloat32toFloat16PerformanceScalar() const126     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
127         return getCapabilities().relaxedFloat32toFloat16PerformanceScalar;
128     }
getRelaxedFloat32toFloat16PerformanceTensor() const129     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
130         return getCapabilities().relaxedFloat32toFloat16PerformanceTensor;
131     }
getIfPerformance() const132     Capabilities::PerformanceInfo getIfPerformance() const override {
133         return getCapabilities().ifPerformance;
134     }
getWhilePerformance() const135     Capabilities::PerformanceInfo getWhilePerformance() const override {
136         return getCapabilities().whilePerformance;
137     }
getNumberOfCacheFilesNeeded() const138     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
139         return kInterface->getNumberOfCacheFilesNeeded();
140     }
isCachingSupported() const141     bool isCachingSupported() const override {
142         // Caching is supported if either of numModelCache or numDataCache is greater than 0.
143         const auto [numModelCacheFiles, numDataCacheFiles] = getNumberOfCacheFilesNeeded();
144         return numModelCacheFiles > 0 || numDataCacheFiles > 0;
145     }
wait() const146     int wait() const override {
147         auto result = kInterface->wait();
148         if (!result.ok()) {
149             LOG(ERROR) << "DriverDevice::wait error: " << result.error().message;
150             return convertErrorStatusToResultCode(result.error().code);
151         }
152         return ANEURALNETWORKS_NO_ERROR;
153     }
154 
155     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
156             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
157             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
158             const std::optional<CacheToken>& maybeToken,
159             const std::vector<TokenValuePair>& metaData,
160             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
161 
162     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
163                                                             OperandType) const override;
164 
165    private:
166     const SharedDevice kInterface;
167 
168     GeneralResult<std::vector<bool>> getSupportedOperationsImpl(const MetaModel& metaModel) const;
169     GeneralResult<SharedPreparedModel> prepareModelFromCacheInternal(
170             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
171             const CacheToken& token) const;
172 
173 #ifdef NN_DEBUGGABLE
174     // For debugging: behavior of IDevice::getSupportedOperations for SampleDriver.
175     // 0 - all operations reported by IDevice::getSupportedOperations() supported
176     // 1 - some operations reported by IDevice::getSupportedOperations() supported
177     uint32_t mSupported = 0;
178 #endif  // NN_DEBUGGABLE
179 };
180 
181 // A RuntimePreparedModel with underlying IPreparedModel instance return by actual driver.
182 class DriverPreparedModel : public RuntimePreparedModel {
183    public:
DriverPreparedModel(const Device * device,const SharedPreparedModel & preparedModel)184     DriverPreparedModel(const Device* device, const SharedPreparedModel& preparedModel)
185         : mDevice(device), mPreparedModel(preparedModel) {
186         CHECK(mDevice != nullptr);
187         CHECK(mPreparedModel != nullptr);
188     }
189 
getDevice() const190     const Device* getDevice() const override { return mDevice; }
getInterface() const191     SharedPreparedModel getInterface() const override { return mPreparedModel; }
192 
193     std::tuple<int, std::vector<OutputShape>, Timing> execute(
194             const std::vector<ModelArgumentInfo>& inputs,
195             const std::vector<ModelArgumentInfo>& outputs,
196             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
197             MeasureTiming measure, const OptionalTimePoint& deadline,
198             const OptionalDuration& loopTimeoutDuration,
199             const std::vector<TokenValuePair>& metaData) const override;
200 
201     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
202             const std::vector<ModelArgumentInfo>& inputs,
203             const std::vector<ModelArgumentInfo>& outputs,
204             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
205             MeasureTiming measure, const OptionalTimePoint& deadline,
206             const OptionalDuration& loopTimeoutDuration,
207             const OptionalDuration& timeoutDurationAfterFence,
208             const std::vector<TokenValuePair>& metaData) const override;
209 
210     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
211             const std::vector<ModelArgumentInfo>& inputs,
212             const std::vector<ModelArgumentInfo>& outputs,
213             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
214             const OptionalDuration& loopTimeoutDuration,
215             const std::vector<TokenValuePair>& metaData) const override;
216 
configureExecutionBurst() const217     GeneralResult<SharedBurst> configureExecutionBurst() const override {
218         return mPreparedModel->configureExecutionBurst();
219     }
220 
getMemoryPreference() const221     MemoryPreference getMemoryPreference() const override {
222         if (isCompliantVersion(kVersionFeatureLevel5, mDevice->getFeatureLevel())) {
223             return {kDefaultRequestMemoryAlignment, kDefaultRequestMemoryPadding};
224         } else {
225             // We are not able to pass memory padding information to HIDL drivers, so return the
226             // minimum padding.
227             return {kDefaultRequestMemoryAlignment, kMinMemoryPadding};
228         }
229     }
230 
231    private:
232     const Device* mDevice;
233     const SharedPreparedModel mPreparedModel;
234 };
235 
236 class DriverExecution : public RuntimeExecution {
237    public:
DriverExecution(SharedExecution execution,Request request,std::vector<const RuntimeMemory * > memories,MeasureTiming measure,OptionalDuration loopTimeoutDuration,Version deviceFeatureLevel,const std::vector<TokenValuePair> & metaData)238     DriverExecution(SharedExecution execution, Request request,
239                     std::vector<const RuntimeMemory*> memories, MeasureTiming measure,
240                     OptionalDuration loopTimeoutDuration, Version deviceFeatureLevel,
241                     const std::vector<TokenValuePair>& metaData)
242         : kExecution(std::move(execution)),
243           kRequest(std::move(request)),
244           kMemories(std::move(memories)),
245           kMeasure(measure),
246           kLoopTimeoutDuration(std::move(loopTimeoutDuration)),
247           kDeviceFeatureLevel(deviceFeatureLevel),
248           kMetaData(metaData) {
249         CHECK(kExecution != nullptr);
250     }
251 
252     std::tuple<int, std::vector<OutputShape>, Timing> compute(
253             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
254 
255     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
256             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
257             const OptionalDuration& timeoutDurationAfterFence) const override;
258 
259    private:
260     const SharedExecution kExecution;
261 
262     // For burst execution.
263     const Request kRequest;
264     const std::vector<const RuntimeMemory*> kMemories;
265     const MeasureTiming kMeasure;
266     const OptionalDuration kLoopTimeoutDuration;
267     mutable std::map<const IBurst*, SharedExecution> mCachedBurstExecutions;
268 
269     // For fenced execution.
270     const Version kDeviceFeatureLevel;
271 
272     // Execution metadata.
273     std::vector<TokenValuePair> kMetaData;
274 };
275 
DriverDevice(SharedDevice device)276 DriverDevice::DriverDevice(SharedDevice device) : kInterface(std::move(device)) {
277     CHECK(kInterface != nullptr);
278 #ifdef NN_DEBUGGABLE
279     static const char samplePrefix[] = "sample";
280     if (getName().substr(0, sizeof(samplePrefix) - 1) == samplePrefix) {
281         mSupported = getProp("debug.nn.sample.supported");
282     }
283 #endif  // NN_DEBUGGABLE
284 }
285 
create(SharedDevice device)286 std::shared_ptr<DriverDevice> DriverDevice::create(SharedDevice device) {
287     if (device == nullptr) {
288         LOG(ERROR) << "DriverDevice::create called with nullptr";
289         return nullptr;
290     }
291 
292     return std::make_shared<DriverDevice>(std::move(device));
293 }
294 
versionToFeatureLevel(Version::Level versionLevel)295 int64_t DeviceManager::versionToFeatureLevel(Version::Level versionLevel) {
296     switch (versionLevel) {
297         case Version::Level::FEATURE_LEVEL_1:
298             return ANEURALNETWORKS_FEATURE_LEVEL_1;
299         case Version::Level::FEATURE_LEVEL_2:
300             return ANEURALNETWORKS_FEATURE_LEVEL_2;
301         case Version::Level::FEATURE_LEVEL_3:
302             return ANEURALNETWORKS_FEATURE_LEVEL_3;
303         case Version::Level::FEATURE_LEVEL_4:
304             return ANEURALNETWORKS_FEATURE_LEVEL_4;
305         case Version::Level::FEATURE_LEVEL_5:
306             return ANEURALNETWORKS_FEATURE_LEVEL_5;
307         case Version::Level::FEATURE_LEVEL_6:
308             return ANEURALNETWORKS_FEATURE_LEVEL_6;
309         case Version::Level::FEATURE_LEVEL_7:
310             return ANEURALNETWORKS_FEATURE_LEVEL_7;
311         case Version::Level::FEATURE_LEVEL_8:
312             return ANEURALNETWORKS_FEATURE_LEVEL_8;
313 #ifdef NN_EXPERIMENTAL_FEATURE
314         case Version::Level::FEATURE_LEVEL_EXPERIMENTAL:
315             return ANEURALNETWORKS_FEATURE_LEVEL_EXPERIMENTAL;
316 #endif  // NN_EXPERIMENTAL_FEATURE
317     }
318     LOG(FATAL) << "Unrecognized version " << versionLevel;
319     return -1;
320 }
321 
getSupportedOperationsImpl(const MetaModel & metaModel) const322 GeneralResult<std::vector<bool>> DriverDevice::getSupportedOperationsImpl(
323         const MetaModel& metaModel) const {
324     const auto featureLevel = kInterface->getFeatureLevel();
325     const auto slice = metaModel.getSlice(featureLevel);
326     if (!slice.has_value()) {
327         return NN_ERROR() << "getSlice(" << featureLevel << ") failed";
328     }
329 
330     const auto& [sliceModel, slicedModelOperationIndexToModelOperationIndex] = *slice;
331     const std::vector<bool> supported = NN_TRY(kInterface->getSupportedOperations(sliceModel));
332     const uint32_t slicedOperationCount = sliceModel.main.operations.size();
333     if (supported.size() != slicedOperationCount) {
334         return NN_ERROR() << "IDevice::getSupportedOperations returned a vector of length "
335                           << supported.size() << " when expecting " << slicedOperationCount;
336     }
337 
338     const Model& model = metaModel.getModel();
339     const uint32_t operationCount = model.main.operations.size();
340     std::vector<bool> remappedSupported(operationCount, false);
341     for (size_t i = 0; i < supported.size(); ++i) {
342         if (supported[i]) {
343             remappedSupported[slicedModelOperationIndexToModelOperationIndex(i)] = true;
344         }
345     }
346     return remappedSupported;
347 }
348 
getSupportedOperations(const MetaModel & metaModel) const349 std::vector<bool> DriverDevice::getSupportedOperations(const MetaModel& metaModel) const {
350     const Model& model = metaModel.getModel();
351 
352     auto result = getSupportedOperationsImpl(metaModel);
353     if (!result.ok()) {
354         LOG(ERROR) << "getSupportedOperations failed with code " << result.error().code << ": "
355                    << result.error().message;
356         // Set the supported operation vectors to all false, so we won't use this driver.
357         return std::vector<bool>(model.main.operations.size(), false);
358     }
359 
360     std::vector<bool>& supportedOperations = result.value();
361 #ifdef NN_DEBUGGABLE
362     if (mSupported != 1) {
363         return supportedOperations;
364     }
365 
366     const uint32_t baseAccumulator = std::hash<std::string>{}(getName());
367     for (size_t operationIndex = 0; operationIndex < supportedOperations.size(); operationIndex++) {
368         if (!supportedOperations[operationIndex]) {
369             continue;
370         }
371 
372         uint32_t accumulator = baseAccumulator;
373         const Operation& operation = model.main.operations[operationIndex];
374         accumulator ^= static_cast<uint32_t>(operation.type);
375         auto accumulateOperands = [&model, &accumulator](const std::vector<uint32_t>& operands) {
376             for (uint32_t operandIndex : operands) {
377                 const Operand& operand = model.main.operands[operandIndex];
378                 accumulator ^= static_cast<uint32_t>(operand.type);
379                 accumulator ^= operand.dimensions.size();
380                 for (const Dimension& dimension : operand.dimensions) {
381                     accumulator ^= dimension;
382                     if (operand.lifetime == Operand::LifeTime::CONSTANT_COPY ||
383                         operand.lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
384                         operand.lifetime == Operand::LifeTime::POINTER) {
385                         accumulator ^= 1;
386                     }
387                 }
388             }
389         };
390         accumulateOperands(operation.inputs);
391         accumulateOperands(operation.outputs);
392         if (accumulator & 1) {
393             supportedOperations[operationIndex] = false;
394         }
395     }
396 #endif  // NN_DEBUGGABLE
397 
398     return supportedOperations;
399 }
400 
401 // Opens a cache file for reading and writing and returns a shared handle.
createCacheHandle(const std::string & filename,bool createIfNotExist)402 static GeneralResult<SharedHandle> createCacheHandle(const std::string& filename,
403                                                      bool createIfNotExist) {
404     auto fd = base::unique_fd(open(filename.c_str(), createIfNotExist ? (O_RDWR | O_CREAT) : O_RDWR,
405                                    S_IRUSR | S_IWUSR));
406     if (!fd.ok()) {
407         return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
408                << "Failed to " << (createIfNotExist ? "open or create" : "open") << " cache file "
409                << filename;
410     }
411     return std::make_shared<const Handle>(std::move(fd));
412 }
413 
414 // Opens a list of cache files and returns a vector of shared handles. The files
415 // are always opened with both read and write permissions.
createCacheHandleVec(uint32_t numCacheFiles,const std::string & baseFilename,bool createIfNotExist)416 static GeneralResult<std::vector<SharedHandle>> createCacheHandleVec(
417         uint32_t numCacheFiles, const std::string& baseFilename, bool createIfNotExist) {
418     CHECK(numCacheFiles <= kMaxNumberOfCacheFiles);
419     std::vector<SharedHandle> handles;
420     handles.reserve(numCacheFiles);
421     for (uint32_t i = 0; i < numCacheFiles; i++) {
422         std::string filename = baseFilename + std::to_string(i);
423         VLOG(COMPILATION) << "Cache " << i << ": " << filename;
424         handles.push_back(NN_TRY(createCacheHandle(filename, createIfNotExist)));
425     }
426     return handles;
427 }
428 
429 // Maps a token to cache file names and returns a pair of vectors of shared
430 // handles to the opened files.
getCacheHandles(const CacheInfo & cacheInfo,const CacheToken & token,const std::pair<uint32_t,uint32_t> & numCacheFiles,bool createIfNotExist)431 static GeneralResult<CacheHandles> getCacheHandles(
432         const CacheInfo& cacheInfo, const CacheToken& token,
433         const std::pair<uint32_t, uint32_t>& numCacheFiles, bool createIfNotExist) {
434     if (const auto* cacheHandles = std::get_if<CacheHandles>(&cacheInfo.variant)) {
435         if (cacheHandles->modelCache.size() != numCacheFiles.first) {
436             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
437                    << "Expected " << numCacheFiles.first << " model cache handles, got "
438                    << cacheHandles->modelCache.size();
439         }
440         if (cacheHandles->dataCache.size() != numCacheFiles.second) {
441             return NN_ERROR(ErrorStatus::GENERAL_FAILURE)
442                    << "Expected " << numCacheFiles.second << " data cache handles, got "
443                    << cacheHandles->dataCache.size();
444         }
445         return *cacheHandles;
446     }
447 
448     // The filename includes kByteSizeOfCacheToken * 2 characters for token,
449     // and 1 character for model/data cache identifier.
450     std::string filename(kByteSizeOfCacheToken * 2 + 1, '0');
451     for (uint32_t i = 0; i < kByteSizeOfCacheToken; i++) {
452         filename[i * 2] = 'A' + (token[i] & 0x0F);
453         filename[i * 2 + 1] = 'A' + (token[i] >> 4);
454     }
455 
456     const auto& cacheDir = std::get<CacheDir>(cacheInfo.variant);
457     CHECK(cacheDir.empty() || cacheDir.back() == '/');
458     std::string cacheFileName = cacheDir + filename;
459     const uint32_t cacheTypeIdentifierIndex = cacheDir.size() + kByteSizeOfCacheToken * 2;
460 
461     cacheFileName[cacheTypeIdentifierIndex] = '1';
462     std::vector<SharedHandle> modelCache =
463             NN_TRY(createCacheHandleVec(numCacheFiles.first, cacheFileName, createIfNotExist));
464 
465     cacheFileName[cacheTypeIdentifierIndex] = '2';
466     std::vector<SharedHandle> dataCache =
467             NN_TRY(createCacheHandleVec(numCacheFiles.second, cacheFileName, createIfNotExist));
468 
469     return CacheHandles{
470             .modelCache = std::move(modelCache),
471             .dataCache = std::move(dataCache),
472     };
473 }
474 
prepareModelFromCacheInternal(const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const CacheToken & token) const475 GeneralResult<SharedPreparedModel> DriverDevice::prepareModelFromCacheInternal(
476         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
477         const CacheToken& token) const {
478     // Get cache files if they exist, otherwise return from the function early.
479     auto cache = NN_TRY(getCacheHandles(cacheInfo, token, kInterface->getNumberOfCacheFilesNeeded(),
480                                         /*createIfNotExist=*/false));
481     return kInterface->prepareModelFromCache(deadline, cache.modelCache, cache.dataCache, token);
482 }
483 
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo & cacheInfo,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> & metaData,const std::vector<ExtensionNameAndPrefix> & extensionNameAndPrefix) const484 std::pair<int, std::shared_ptr<RuntimePreparedModel>> DriverDevice::prepareModel(
485         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
486         const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
487         const std::optional<CacheToken>& maybeToken, const std::vector<TokenValuePair>& metaData,
488         const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const {
489     // Attempt to compile from cache if token is present.
490     if (maybeToken.has_value()) {
491         auto result = prepareModelFromCacheInternal(deadline, cacheInfo, *maybeToken);
492         if (result.has_value()) {
493             LOG(INFO) << "prepareModelFromCache: successfully prepared model from cache";
494             return {ANEURALNETWORKS_NO_ERROR,
495                     std::make_shared<DriverPreparedModel>(this, std::move(result).value())};
496         } else {
497             LOG(ERROR) << "prepareModelFromCache failure (" << result.error().code
498                        << "): " << result.error().message;
499         }
500     }
501 
502     // Get cache files if they exist, otherwise create them.
503     CacheHandles cache;
504     if (maybeToken.has_value()) {
505         auto result =
506                 getCacheHandles(cacheInfo, *maybeToken, kInterface->getNumberOfCacheFilesNeeded(),
507                                 /*createIfNotExist=*/true);
508         if (result.has_value()) {
509             cache = std::move(result).value();
510         } else {
511             LOG(ERROR) << "getCacheHandles failure (" << result.error().code
512                        << "): " << result.error().message;
513         }
514     }
515 
516     // Get the token if it exists, otherwise get a null token.
517     static constexpr CacheToken kNullToken = {};
518     const CacheToken token = maybeToken.value_or(kNullToken);
519 
520     // Fallback to full compilation (possibly with token) if
521     // prepareModelFromCache could not be used or failed.
522     const Model model = makeModel();
523     auto result =
524             kInterface->prepareModel(model, preference, priority, deadline, cache.modelCache,
525                                      cache.dataCache, token, metaData, extensionNameAndPrefix);
526     if (!result.ok()) {
527         LOG(ERROR) << "IDevice::prepareModel() error: " << result.error().message;
528         return {convertErrorStatusToResultCode(result.error().code), nullptr};
529     }
530     SharedPreparedModel preparedModel = std::move(result).value();
531     CHECK(preparedModel != nullptr)
532             << "IDevice::prepareModel() returned nullptr without error code";
533     return {ANEURALNETWORKS_NO_ERROR,
534             std::make_shared<DriverPreparedModel>(this, std::move(preparedModel))};
535 }
536 
allocate(const MemoryDescriptor & desc,OperandType) const537 std::pair<int, std::unique_ptr<RuntimeMemory>> DriverDevice::allocate(const MemoryDescriptor& desc,
538                                                                       OperandType) const {
539     const BufferDesc bufferDesc = {.dimensions = desc.dimensions};
540     std::vector<SharedPreparedModel> preparedModels(desc.preparedModels.size());
541     std::transform(desc.preparedModels.begin(), desc.preparedModels.end(), preparedModels.begin(),
542                    [](const auto* preparedModel) {
543                        const auto versionedPreparedModel = preparedModel->getInterface();
544                        CHECK(versionedPreparedModel != nullptr);
545                        return versionedPreparedModel;
546                    });
547     auto result =
548             kInterface->allocate(bufferDesc, preparedModels, desc.inputRoles, desc.outputRoles);
549     if (!result.ok()) {
550         LOG(ERROR) << "DriverDevice::allocate -- memory allocation on device " << getName()
551                    << " failed!";
552         return {convertErrorStatusToResultCode(result.error().code), nullptr};
553     }
554     return MemoryFromDevice::create(std::move(result).value());
555 }
556 
createDriverRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)557 static Request createDriverRequest(const std::vector<ModelArgumentInfo>& inputs,
558                                    const std::vector<ModelArgumentInfo>& outputs,
559                                    const std::vector<const RuntimeMemory*>& memories) {
560     Request request;
561     request.inputs.reserve(inputs.size());
562     std::transform(inputs.begin(), inputs.end(), std::back_inserter(request.inputs),
563                    [](const auto& input) { return input.createRequestArgument(); });
564     request.outputs.reserve(outputs.size());
565     std::transform(outputs.begin(), outputs.end(), std::back_inserter(request.outputs),
566                    [](const auto& output) { return output.createRequestArgument(); });
567     request.pools.reserve(memories.size());
568     std::transform(memories.begin(), memories.end(), std::back_inserter(request.pools),
569                    [](const RuntimeMemory* memory) { return memory->getMemoryPool(); });
570     return request;
571 }
572 
573 // Perform computation on an actual device driver.
574 //
575 // Because HIDL cannot take raw pointers, two separate memory pools will be allocated for inputs and
576 // outputs specified by pointers. The input pointer data will be copied to the input pool prior to
577 // execution, and the output pointer data will be copied out from the output pool after the
578 // execution.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst & burstController,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const579 std::tuple<int, std::vector<OutputShape>, Timing> DriverPreparedModel::execute(
580         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
581         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
582         MeasureTiming measure, const OptionalTimePoint& deadline,
583         const OptionalDuration& loopTimeoutDuration,
584         const std::vector<TokenValuePair>& metaData) const {
585     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::execute");
586 
587     auto request = createDriverRequest(inputs, outputs, memories);
588 
589     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::execute::execute");
590 
591     ExecutionResult<std::pair<std::vector<OutputShape>, Timing>> result;
592 
593     // compute using burst if present, otherwise compute from IPreparedModel
594     const bool burstCompute = (burstController != nullptr);
595     if (burstCompute) {
596         for (const RuntimeMemory* memory : memories) {
597             const auto pool = memory->getMemoryPool();
598             if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
599                 auto cacheHold = burstController->cacheMemory(*maybeMemory);
600                 memory->hold(cacheHold);
601             }
602         }
603 
604         VLOG(EXECUTION) << "Before burstController->execute() " << SHOW_IF_DEBUG(request);
605         result = burstController->execute(request, measure, deadline, loopTimeoutDuration, metaData,
606                                           TypeManager::get()->getExtensionNameAndPrefix(metaData));
607     } else {
608         result = mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
609                                          TypeManager::get()->getExtensionNameAndPrefix(metaData));
610     }
611 
612     int n = ANEURALNETWORKS_OP_FAILED;
613     std::vector<OutputShape> outputShapes;
614     Timing timing;
615 
616     if (result.ok()) {
617         n = ANEURALNETWORKS_NO_ERROR;
618         std::tie(outputShapes, timing) = std::move(result).value();
619     } else {
620         auto [message, code, returnedOutputShapes] = std::move(result).error();
621         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
622         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
623                    << "::execute(...) error: " << message;
624         n = convertErrorStatusToResultCode(code);
625         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
626             outputShapes = std::move(returnedOutputShapes);
627         }
628         return {n, std::move(outputShapes), timing};
629     }
630 
631     VLOG(EXECUTION) << "DriverPreparedModel::execute completed";
632     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
633 }
634 
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & timeoutDurationAfterFence,const std::vector<TokenValuePair> & metaData) const635 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverPreparedModel::executeFenced(
636         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
637         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
638         MeasureTiming measure, const OptionalTimePoint& deadline,
639         const OptionalDuration& loopTimeoutDuration,
640         const OptionalDuration& timeoutDurationAfterFence,
641         const std::vector<TokenValuePair>& metaData) const {
642     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::executeFenced");
643     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
644 
645     auto request = createDriverRequest(inputs, outputs, memories);
646 
647     NNTRACE_RT_SWITCH(NNTRACE_PHASE_EXECUTION, "DriverPreparedModel::executeFenced");
648 
649     std::vector<SyncFence> waitForHandles;
650     waitForHandles.reserve(waitFor.size());
651     for (int fd : waitFor) {
652         int dupFd = dup(fd);
653         if (dupFd < 0) {
654             LOG(ERROR) << "Unable to dup the file descriptor";
655             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
656         }
657         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
658     }
659 
660     SyncFence syncFence = SyncFence::createAsSignaled();
661     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
662     Timing timing = {};
663     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, mDevice->getFeatureLevel())) {
664         auto result = mPreparedModel->executeFenced(
665                 request, waitForHandles, measure, deadline, loopTimeoutDuration,
666                 timeoutDurationAfterFence, metaData,
667                 TypeManager::get()->getExtensionNameAndPrefix(metaData));
668         if (!result.ok()) {
669             LOG(ERROR) << "IPreparedModel::executeFenced() error: " << result.error().message;
670             VLOG(EXECUTION) << "**executeFenced failed**";
671             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
672         }
673         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
674     } else {
675         // Fallback to synchronous execution if executeFenced is not supported.
676         // First wait for all sync fences to be ready.
677         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
678         for (const auto& fence : waitForHandles) {
679             if (!fence.hasFd() || fence.getFd() < 0) {
680                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
681             }
682             auto r = fence.syncWait({/* no timeout */});
683             if (r != SyncFence::FenceState::SIGNALED) {
684                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
685                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
686             }
687         }
688         auto result =
689                 mPreparedModel->execute(request, measure, deadline, loopTimeoutDuration, metaData,
690                                         TypeManager::get()->getExtensionNameAndPrefix(metaData));
691         if (!result.ok()) {
692             LOG(ERROR) << "IPreparedModel::execute() error: " << result.error().message;
693             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
694         }
695         std::tie(std::ignore, timing) = result.value();
696     }
697 
698     int syncFenceFd = -1;
699     if (syncFence.hasFd()) {
700         syncFenceFd = dup(syncFence.getFd());
701         if (syncFenceFd < 0) {
702             LOG(ERROR) << "Failed to dup the file descriptor";
703             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
704         }
705     }
706 
707     VLOG(EXECUTION) << "DriverPreparedModel::executeFenced completed";
708     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
709 }
710 
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming measure,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> & metaData) const711 std::pair<int, std::shared_ptr<RuntimeExecution>> DriverPreparedModel::createReusableExecution(
712         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
713         const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
714         const OptionalDuration& loopTimeoutDuration,
715         const std::vector<TokenValuePair>& metaData) const {
716     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "DriverPreparedModel::createReusableExecution");
717 
718     auto request = createDriverRequest(inputs, outputs, memories);
719     auto result = mPreparedModel->createReusableExecution(
720             request, measure, loopTimeoutDuration, metaData,
721             TypeManager::get()->getExtensionNameAndPrefix(metaData));
722     if (!result.ok()) {
723         LOG(ERROR) << "IPreparedModel::createReusableExecution() error: " << result.error().message;
724         const int n = convertErrorStatusToResultCode(result.error().code);
725         return {n, nullptr};
726     }
727     auto execution = std::make_shared<DriverExecution>(
728             std::move(result).value(), std::move(request), memories, measure, loopTimeoutDuration,
729             mDevice->getFeatureLevel(), metaData);
730     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
731 }
732 
compute(const SharedBurst & burstController,const OptionalTimePoint & deadline) const733 std::tuple<int, std::vector<OutputShape>, Timing> DriverExecution::compute(
734         const SharedBurst& burstController, const OptionalTimePoint& deadline) const {
735     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::compute");
736 
737     // compute using burst if present, otherwise compute from IPreparedModel
738     SharedExecution execution;
739     const bool burstCompute = (burstController != nullptr);
740     if (burstCompute) {
741         // create a reusable burst execution if the controller is not seen before
742         auto burstExecution = mCachedBurstExecutions.find(burstController.get());
743         if (burstExecution == mCachedBurstExecutions.end()) {
744             for (const RuntimeMemory* memory : kMemories) {
745                 const auto pool = memory->getMemoryPool();
746                 if (const auto* maybeMemory = std::get_if<SharedMemory>(&pool)) {
747                     auto cacheHold = burstController->cacheMemory(*maybeMemory);
748                     memory->hold(cacheHold);
749                 }
750             }
751             auto createResult = burstController->createReusableExecution(
752                     kRequest, kMeasure, kLoopTimeoutDuration, kMetaData,
753                     TypeManager::get()->getExtensionNameAndPrefix(kMetaData));
754             if (!createResult.ok()) {
755                 LOG(ERROR) << "IBurst::createReusableExecution() error: "
756                            << createResult.error().message;
757                 const int n = convertErrorStatusToResultCode(createResult.error().code);
758                 return {n, {}, {}};
759             }
760             execution = std::move(createResult).value();
761             mCachedBurstExecutions.emplace(burstController.get(), execution);
762         } else {
763             execution = burstExecution->second;
764         }
765         VLOG(EXECUTION) << "Before mBurstExecution->compute() " << SHOW_IF_DEBUG(kRequest);
766     } else {
767         execution = kExecution;
768     }
769 
770     CHECK(execution != nullptr);
771     auto result = execution->compute(deadline);
772     if (!result.ok()) {
773         auto [message, code, returnedOutputShapes] = std::move(result).error();
774         int n = convertErrorStatusToResultCode(code);
775         VLOG(EXECUTION) << "**Execution failed** (ResultCode = " << n << ")";
776         LOG(ERROR) << (burstCompute ? "IBurst" : "IPreparedModel")
777                    << "::execute(...) error: " << message;
778         if (code == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
779             return {n, std::move(returnedOutputShapes), {}};
780         }
781         return {n, {}, {}};
782     }
783 
784     VLOG(EXECUTION) << "DriverExecution::compute completed";
785     auto [outputShapes, timing] = std::move(result).value();
786     return {ANEURALNETWORKS_NO_ERROR, std::move(outputShapes), timing};
787 }
788 
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & timeoutDurationAfterFence) const789 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> DriverExecution::computeFenced(
790         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
791         const OptionalDuration& timeoutDurationAfterFence) const {
792     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "DriverExecution::computeFenced");
793     CHECK(std::all_of(waitFor.begin(), waitFor.end(), [](int fd) { return fd >= 0; }));
794 
795     std::vector<SyncFence> waitForHandles;
796     waitForHandles.reserve(waitFor.size());
797     for (int fd : waitFor) {
798         int dupFd = dup(fd);
799         if (dupFd < 0) {
800             LOG(ERROR) << "Unable to dup the file descriptor";
801             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
802         }
803         waitForHandles.push_back(SyncFence::create(base::unique_fd(dupFd)));
804     }
805 
806     SyncFence syncFence = SyncFence::createAsSignaled();
807     ExecuteFencedInfoCallback executeFencedInfoCallback = nullptr;
808     Timing timing = {};
809     if (isCompliantVersion(kHalVersionV1_3ToApi.canonical, kDeviceFeatureLevel)) {
810         auto result =
811                 kExecution->computeFenced(waitForHandles, deadline, timeoutDurationAfterFence);
812         if (!result.ok()) {
813             LOG(ERROR) << "IExecution::computeFenced() error: " << result.error().message;
814             VLOG(EXECUTION) << "**computeFenced failed**";
815             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
816         }
817         std::tie(syncFence, executeFencedInfoCallback) = std::move(result).value();
818     } else {
819         // Fallback to synchronous execution if computeFenced is not supported.
820         // First wait for all sync fences to be ready.
821         LOG(INFO) << "No drivers able to handle sync fences, falling back to regular execution";
822         for (const auto& fence : waitForHandles) {
823             if (!fence.hasFd() || fence.getFd() < 0) {
824                 return {ANEURALNETWORKS_BAD_DATA, -1, nullptr, {}};
825             }
826             auto r = fence.syncWait({/* no timeout */});
827             if (r != SyncFence::FenceState::SIGNALED) {
828                 LOG(ERROR) << "syncWait failed, fd: " << fence.getFd() << ", state: " << r;
829                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
830             }
831         }
832         auto result = kExecution->compute(deadline);
833         if (!result.ok()) {
834             LOG(ERROR) << "IExecution::compute() error: " << result.error().message;
835             return {convertErrorStatusToResultCode(result.error().code), -1, nullptr, {}};
836         }
837         std::tie(std::ignore, timing) = result.value();
838     }
839 
840     int syncFenceFd = -1;
841     if (syncFence.hasFd()) {
842         syncFenceFd = dup(syncFence.getFd());
843         if (syncFenceFd < 0) {
844             LOG(ERROR) << "Failed to dup the file descriptor";
845             return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, timing};
846         }
847     }
848 
849     VLOG(EXECUTION) << "DriverExecution::computeFenced completed";
850     return {ANEURALNETWORKS_NO_ERROR, syncFenceFd, executeFencedInfoCallback, timing};
851 }
852 
createCpuCapabilities()853 static Capabilities createCpuCapabilities() {
854     constexpr Capabilities::PerformanceInfo kPerf = {.execTime = 1.0f, .powerUsage = 1.0f};
855     return makeCapabilities(kPerf, kPerf, kPerf);
856 }
857 
858 // A special abstracted device for the CPU. Only one instance of this class will exist.
859 // Use get() to retrieve it.
860 class CpuDevice : public Device {
861    public:
862     // Returns the singleton CPU fallback device.
get()863     static std::shared_ptr<CpuDevice> get() {
864         static std::shared_ptr<CpuDevice> instance(new CpuDevice);
865         return instance;
866     }
867 
getName() const868     const std::string& getName() const override { return kName; }
getVersionString() const869     const std::string& getVersionString() const override { return kVersionString; }
getFeatureLevel() const870     Version getFeatureLevel() const override { return kVersion; }
getType() const871     int32_t getType() const override { return ANEURALNETWORKS_DEVICE_CPU; }
getSupportedExtensions() const872     const std::vector<Extension>& getSupportedExtensions() const override {
873         return kSupportedExtensions;
874     }
875     std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const override;
getCapabilities() const876     const Capabilities& getCapabilities() const override { return kCapabilities; }
getPerformance(OperandType) const877     Capabilities::PerformanceInfo getPerformance(OperandType) const override {
878         return kPerformance;
879     }
getRelaxedFloat32toFloat16PerformanceScalar() const880     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const override {
881         return kPerformance;
882     }
getRelaxedFloat32toFloat16PerformanceTensor() const883     Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const override {
884         return kPerformance;
885     }
getIfPerformance() const886     Capabilities::PerformanceInfo getIfPerformance() const override { return kPerformance; }
getWhilePerformance() const887     Capabilities::PerformanceInfo getWhilePerformance() const override { return kPerformance; }
getNumberOfCacheFilesNeeded() const888     std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const override {
889         return {/*numModelCache=*/0, /*numDataCache=*/0};
890     }
isCachingSupported() const891     bool isCachingSupported() const override { return false; }
wait() const892     int wait() const override { return ANEURALNETWORKS_NO_ERROR; }
893 
894     std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
895             const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
896             const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
897             const std::optional<CacheToken>& maybeToken,
898             const std::vector<TokenValuePair>& metaData,
899             const std::vector<ExtensionNameAndPrefix>& extensionNameAndPrefix) const override;
900 
901     std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
902                                                             OperandType type) const override;
903 
904    private:
905     CpuDevice() = default;
906     const Version kVersion = getRuntimeFeatureLevelVersion();
907     const std::string kName = "nnapi-reference";
908 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
909     const std::string kVersionString = build::GetBuildNumber();
910 #else
911     const std::string kVersionString = "UNKNOWN";
912 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
913     // Since the performance is a ratio compared to the CPU performance,
914     // by definition the performance of the CPU is 1.0.
915     const Capabilities::PerformanceInfo kPerformance = {.execTime = 1.0f, .powerUsage = 1.0f};
916     const Capabilities kCapabilities = createCpuCapabilities();
917     const std::vector<Extension> kSupportedExtensions{/* No extensions. */};
918 };
919 
920 // A special abstracted RuntimePreparedModel for the CPU, constructed by CpuDevice.
921 class CpuPreparedModel : public RuntimePreparedModel {
922    public:
923     // Factory method for CpuPreparedModel. Returns ANEURALNETWORKS_NO_ERROR and
924     // a prepared model object if successfully created. Returns an error code
925     // and nullptr otherwise.
926     static std::pair<int, std::shared_ptr<RuntimePreparedModel>> create(Model model);
927 
getDevice() const928     const Device* getDevice() const override { return CpuDevice::get().get(); }
getInterface() const929     SharedPreparedModel getInterface() const override { return nullptr; }
930 
931     std::tuple<int, std::vector<OutputShape>, Timing> execute(
932             const std::vector<ModelArgumentInfo>& inputs,
933             const std::vector<ModelArgumentInfo>& outputs,
934             const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
935             MeasureTiming measure, const OptionalTimePoint& deadline,
936             const OptionalDuration& loopTimeoutDuration,
937             const std::vector<TokenValuePair>& metaData) const override;
938 
configureExecutionBurst() const939     GeneralResult<SharedBurst> configureExecutionBurst() const override { return nullptr; }
940 
941     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
942             const std::vector<ModelArgumentInfo>& inputs,
943             const std::vector<ModelArgumentInfo>& outputs,
944             const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
945             MeasureTiming measure, const OptionalTimePoint& deadline,
946             const OptionalDuration& loopTimeoutDuration,
947             const OptionalDuration& timeoutDurationAfterFence,
948             const std::vector<TokenValuePair>& metaData) const override;
949 
950     std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
951             const std::vector<ModelArgumentInfo>& inputs,
952             const std::vector<ModelArgumentInfo>& outputs,
953             const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
954             const OptionalDuration& loopTimeoutDuration,
955             const std::vector<TokenValuePair>& metaData) const override;
956 
getMemoryPreference() const957     MemoryPreference getMemoryPreference() const override {
958         return {kPreferredAlignment, kPreferredPadding};
959     }
960 
961     // Prefer to use CpuPreparedModel::create.
CpuPreparedModel(Model model,std::vector<RunTimePoolInfo> poolInfos)962     CpuPreparedModel(Model model, std::vector<RunTimePoolInfo> poolInfos)
963         : mModel(std::move(model)), mModelPoolInfos(std::move(poolInfos)) {}
964 
getModel() const965     const Model& getModel() const { return mModel; }
getModelPoolInfos() const966     const std::vector<RunTimePoolInfo>& getModelPoolInfos() const { return mModelPoolInfos; }
967 
968    private:
969     // TFLite kernels prefers 64 bytes for padding and alignment.
970     static constexpr uint32_t kPreferredAlignment = 64;
971     static constexpr uint32_t kPreferredPadding = 64;
972 
973     const Model mModel;
974     const std::vector<RunTimePoolInfo> mModelPoolInfos;
975 };
976 
977 class CpuExecution : public RuntimeExecution {
978    public:
CpuExecution(const CpuPreparedModel & preparedModel,Request request,std::vector<RunTimePoolInfo> requestPoolInfos,OptionalDuration loopTimeoutDuration)979     CpuExecution(const CpuPreparedModel& preparedModel, Request request,
980                  std::vector<RunTimePoolInfo> requestPoolInfos,
981                  OptionalDuration loopTimeoutDuration)
982         : kPreparedModel(preparedModel),
983           kRequest(std::move(request)),
984           kRequestPoolInfos(std::move(requestPoolInfos)),
985           kLoopTimeoutDuration(std::move(loopTimeoutDuration)) {}
986 
987     std::tuple<int, std::vector<OutputShape>, Timing> compute(
988             const SharedBurst& burstController, const OptionalTimePoint& deadline) const override;
989 
990     std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
991             const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
992             const OptionalDuration& timeoutDurationAfterFence) const override;
993 
994    private:
995     const CpuPreparedModel& kPreparedModel;
996     Request kRequest;
997     std::vector<RunTimePoolInfo> kRequestPoolInfos;
998     const OptionalDuration kLoopTimeoutDuration;
999 };
1000 
getSupportedOperations(const MetaModel & metaModel) const1001 std::vector<bool> CpuDevice::getSupportedOperations(const MetaModel& metaModel) const {
1002     const Model& model = metaModel.getModel();
1003     const size_t count = model.main.operations.size();
1004     std::vector<bool> result(count, false);
1005     for (size_t i = 0; i < count; i++) {
1006         // TODO(b/119870033): Decide whether and how post-P operations would be supported on CPU.
1007         //                    We may want to use the slicer for CpuDevice just as we do for
1008         //                    DriverDevice.
1009         OperationType operationType = model.main.operations[i].type;
1010         result[i] = !isExtension(operationType) && operationType != OperationType::OEM_OPERATION;
1011     }
1012     return result;
1013 }
1014 
1015 template <typename Type>
validateAndCheckCompliance(const Type & object)1016 static Result<void> validateAndCheckCompliance(const Type& object) {
1017     const auto version = NN_TRY(validate(object));
1018     if (!isCompliantVersion(version, DeviceManager::get()->getRuntimeVersion())) {
1019         return NN_ERROR() << "Object than is newer what is allowed. Version needed: " << version
1020                           << ", current runtime version supported: "
1021                           << DeviceManager::get()->getRuntimeVersion();
1022     }
1023     return {};
1024 }
1025 
prepareModel(const ModelFactory & makeModel,ExecutionPreference preference,Priority priority,const OptionalTimePoint & deadline,const CacheInfo &,const std::optional<CacheToken> & maybeToken,const std::vector<TokenValuePair> &,const std::vector<ExtensionNameAndPrefix> &) const1026 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuDevice::prepareModel(
1027         const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
1028         const OptionalTimePoint& deadline, const CacheInfo& /*cacheInfo*/,
1029         const std::optional<CacheToken>& maybeToken,
1030         const std::vector<TokenValuePair>& /*metaData*/,
1031         const std::vector<ExtensionNameAndPrefix>& /*extensionNameAndPrefix*/) const {
1032     CHECK(!maybeToken.has_value())
1033             << "Should never call prepareModel with cache information on CpuDevice";
1034 
1035     const Model model = makeModel();
1036     if (auto result = validateAndCheckCompliance(model); !result.ok()) {
1037         LOG(ERROR) << "Invalid Model: " << result.error();
1038         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1039     }
1040     if (auto result = validateAndCheckCompliance(preference); !result.ok()) {
1041         LOG(ERROR) << "Invalid ExecutionPreference: " << result.error();
1042         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1043     }
1044     if (auto result = validateAndCheckCompliance(priority); !result.ok()) {
1045         LOG(ERROR) << "Invalid Priority: " << result.error();
1046         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1047     }
1048     if (hasDeadlinePassed(deadline)) {
1049         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, nullptr};
1050     }
1051 
1052     return CpuPreparedModel::create(model);
1053 }
1054 
allocate(const MemoryDescriptor & desc,OperandType type) const1055 std::pair<int, std::unique_ptr<RuntimeMemory>> CpuDevice::allocate(const MemoryDescriptor& desc,
1056                                                                    OperandType type) const {
1057     uint32_t size = TypeManager::get()->getSizeOfData(type, desc.dimensions);
1058     if (size == 0) {
1059         LOG(ERROR) << "CpuDevice::allocate -- does not support unknown dimensions.";
1060         return {ANEURALNETWORKS_OP_FAILED, nullptr};
1061     }
1062     return MemoryAshmem::create(size);
1063 }
1064 
create(Model model)1065 std::pair<int, std::shared_ptr<RuntimePreparedModel>> CpuPreparedModel::create(Model model) {
1066     std::vector<RunTimePoolInfo> poolInfos;
1067     if (!setRunTimePoolInfosFromCanonicalMemories(&poolInfos, model.pools)) {
1068         return {ANEURALNETWORKS_UNMAPPABLE, nullptr};
1069     }
1070 
1071     std::shared_ptr<RuntimePreparedModel> preparedModel =
1072             std::make_shared<CpuPreparedModel>(std::move(model), std::move(poolInfos));
1073     return {ANEURALNETWORKS_NO_ERROR, std::move(preparedModel)};
1074 }
1075 
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration)1076 static std::tuple<int, std::vector<OutputShape>, Timing> computeOnCpu(
1077         const Model& model, const Request& request,
1078         const std::vector<RunTimePoolInfo>& modelPoolInfos,
1079         const std::vector<RunTimePoolInfo>& requestPoolInfos, const OptionalTimePoint& deadline,
1080         const OptionalDuration& loopTimeoutDuration) {
1081     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1082     CpuExecutor executor;
1083     if (loopTimeoutDuration.has_value()) {
1084         executor.setLoopTimeout(loopTimeoutDuration->count());
1085     }
1086     if (deadline.has_value()) {
1087         executor.setDeadline(*deadline);
1088     }
1089     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1090     const auto& outputShapes = executor.getOutputShapes();
1091     return {err, outputShapes, {}};
1092 }
1093 
executeFenced(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const std::vector<int> & waitFor,MeasureTiming measure,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const OptionalDuration & duration,const std::vector<TokenValuePair> &) const1094 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuPreparedModel::executeFenced(
1095         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1096         const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
1097         MeasureTiming measure, const OptionalTimePoint& deadline,
1098         const OptionalDuration& loopTimeoutDuration, const OptionalDuration& duration,
1099         const std::vector<TokenValuePair>& /*metaData*/) const {
1100     VLOG(EXECUTION)
1101             << "CpuPreparedModel::executeFenced wait for sync fences to signal before execution";
1102     for (int syncFd : waitFor) {
1103         if (syncFd > 0) {
1104             auto r = syncWait(syncFd, -1);
1105             if (r != FenceState::SIGNALED) {
1106                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1107                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1108             }
1109         }
1110     }
1111 
1112     // Update deadline if the timeout duration is closer than the deadline.
1113     auto closestDeadline = deadline;
1114     if (duration.has_value()) {
1115         const auto timeoutDurationDeadline = makeDeadline(*duration);
1116         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1117             closestDeadline = timeoutDurationDeadline;
1118         }
1119     }
1120 
1121     const auto [result, outputShapes, timing] = execute(inputs, outputs, memories, nullptr, measure,
1122                                                         closestDeadline, loopTimeoutDuration, {});
1123     return {result, -1, nullptr, timing};
1124 }
1125 
createCpuRequest(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories)1126 static std::tuple<int, Request, std::vector<RunTimePoolInfo>> createCpuRequest(
1127         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1128         const std::vector<const RuntimeMemory*>& memories) {
1129     std::vector<RunTimePoolInfo> requestPoolInfos;
1130     requestPoolInfos.reserve(memories.size());
1131     for (const RuntimeMemory* mem : memories) {
1132         if (std::optional<RunTimePoolInfo> poolInfo = mem->getRunTimePoolInfo()) {
1133             requestPoolInfos.emplace_back(*poolInfo);
1134         } else {
1135             return {ANEURALNETWORKS_UNMAPPABLE, {}, {}};
1136         }
1137     }
1138     // Create as many pools as there are input / output.
1139     auto fixPointerArguments =
1140             [&requestPoolInfos](const std::vector<ModelArgumentInfo>& argumentInfos) {
1141                 std::vector<DataLocation> ptrArgsLocations;
1142                 for (const ModelArgumentInfo& argumentInfo : argumentInfos) {
1143                     if (argumentInfo.state() == ModelArgumentInfo::POINTER) {
1144                         ptrArgsLocations.push_back(
1145                                 {.poolIndex = static_cast<uint32_t>(requestPoolInfos.size()),
1146                                  .offset = 0,
1147                                  .length = argumentInfo.length(),
1148                                  .padding = argumentInfo.padding()});
1149                         requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1150                                 static_cast<uint8_t*>(argumentInfo.buffer())));
1151                     }
1152                 }
1153                 return ptrArgsLocations;
1154             };
1155     const std::vector<DataLocation> inputPtrArgsLocations = fixPointerArguments(inputs);
1156     const std::vector<DataLocation> outputPtrArgsLocations = fixPointerArguments(outputs);
1157 
1158     Request request;
1159     request.inputs = createRequestArguments(inputs, inputPtrArgsLocations);
1160     request.outputs = createRequestArguments(outputs, outputPtrArgsLocations);
1161     return {ANEURALNETWORKS_NO_ERROR, std::move(request), std::move(requestPoolInfos)};
1162 }
1163 
1164 // Perform computation on NNAPI CPU reference implementation.
1165 //
1166 // Contrary to DriverPreparedModel::execute, the NNAPI CPU reference executor lives in the
1167 // same process as the NNAPI runtime and can take raw pointers. We will create as many pools as
1168 // there are input/output in this method to avoid data copying.
1169 //
1170 // Will choose between sync/async execution according to DeviceManager::mSyncExecCpu.
execute(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,const SharedBurst &,MeasureTiming,const OptionalTimePoint & deadline,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1171 std::tuple<int, std::vector<OutputShape>, Timing> CpuPreparedModel::execute(
1172         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1173         const std::vector<const RuntimeMemory*>& memories, const SharedBurst& /*burstController*/,
1174         MeasureTiming /*measure*/, const OptionalTimePoint& deadline,
1175         const OptionalDuration& loopTimeoutDuration,
1176         const std::vector<TokenValuePair>& /*metaData*/) const {
1177     if (hasDeadlinePassed(deadline)) {
1178         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1179     }
1180 
1181     int nCreateRequest;
1182     Request request;
1183     std::vector<RunTimePoolInfo> requestPoolInfos;
1184     std::tie(nCreateRequest, request, requestPoolInfos) =
1185             createCpuRequest(inputs, outputs, memories);
1186     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1187         return {nCreateRequest, {}, {}};
1188     }
1189 
1190     if (!DeviceManager::get()->syncExecCpu()) {
1191         // TODO: use a thread pool
1192         // TODO(mikie): this could have NNTRACE so we could measure the overhead
1193         //              of spinning up a new thread.
1194         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1195         std::thread([this, &request, &requestPoolInfos, &deadline, &loopTimeoutDuration, &result] {
1196             result = computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1197                                   loopTimeoutDuration);
1198         }).join();
1199         return result;
1200     }
1201 
1202     return computeOnCpu(mModel, request, mModelPoolInfos, requestPoolInfos, deadline,
1203                         loopTimeoutDuration);
1204 }
1205 
createReusableExecution(const std::vector<ModelArgumentInfo> & inputs,const std::vector<ModelArgumentInfo> & outputs,const std::vector<const RuntimeMemory * > & memories,MeasureTiming,const OptionalDuration & loopTimeoutDuration,const std::vector<TokenValuePair> &) const1206 std::pair<int, std::shared_ptr<RuntimeExecution>> CpuPreparedModel::createReusableExecution(
1207         const std::vector<ModelArgumentInfo>& inputs, const std::vector<ModelArgumentInfo>& outputs,
1208         const std::vector<const RuntimeMemory*>& memories, MeasureTiming /*measure*/,
1209         const OptionalDuration& loopTimeoutDuration,
1210         const std::vector<TokenValuePair>& /*metaData*/) const {
1211     auto [nCreateRequest, request, requestPoolInfos] = createCpuRequest(inputs, outputs, memories);
1212     if (nCreateRequest != ANEURALNETWORKS_NO_ERROR) {
1213         return {nCreateRequest, nullptr};
1214     }
1215     auto execution = std::make_shared<CpuExecution>(
1216             *this, std::move(request), std::move(requestPoolInfos), loopTimeoutDuration);
1217     return {ANEURALNETWORKS_NO_ERROR, std::move(execution)};
1218 }
1219 
compute(const SharedBurst &,const OptionalTimePoint & deadline) const1220 std::tuple<int, std::vector<OutputShape>, Timing> CpuExecution::compute(
1221         const SharedBurst& /*burstController*/, const OptionalTimePoint& deadline) const {
1222     if (hasDeadlinePassed(deadline)) {
1223         return {ANEURALNETWORKS_MISSED_DEADLINE_PERSISTENT, {}, {}};
1224     }
1225 
1226     if (!DeviceManager::get()->syncExecCpu()) {
1227         // TODO: use a thread pool
1228         // TODO(mikie): this could have NNTRACE so we could measure the overhead
1229         //              of spinning up a new thread.
1230         std::tuple<int, std::vector<OutputShape>, Timing> result = {};
1231         std::thread([this, &deadline, &result] {
1232             result = computeOnCpu(kPreparedModel.getModel(), kRequest,
1233                                   kPreparedModel.getModelPoolInfos(), kRequestPoolInfos, deadline,
1234                                   kLoopTimeoutDuration);
1235         }).join();
1236         return result;
1237     }
1238 
1239     return computeOnCpu(kPreparedModel.getModel(), kRequest, kPreparedModel.getModelPoolInfos(),
1240                         kRequestPoolInfos, deadline, kLoopTimeoutDuration);
1241 }
1242 
computeFenced(const std::vector<int> & waitFor,const OptionalTimePoint & deadline,const OptionalDuration & duration) const1243 std::tuple<int, int, ExecuteFencedInfoCallback, Timing> CpuExecution::computeFenced(
1244         const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
1245         const OptionalDuration& duration) const {
1246     VLOG(EXECUTION)
1247             << "CpuExecution::computeFenced wait for sync fences to signal before execution";
1248     for (int syncFd : waitFor) {
1249         if (syncFd > 0) {
1250             auto r = syncWait(syncFd, -1);
1251             if (r != FenceState::SIGNALED) {
1252                 LOG(ERROR) << "sync wait failed, fd: " << syncFd;
1253                 return {ANEURALNETWORKS_OP_FAILED, -1, nullptr, {}};
1254             }
1255         }
1256     }
1257 
1258     // Update deadline if the timeout duration is closer than the deadline.
1259     auto closestDeadline = deadline;
1260     if (duration.has_value()) {
1261         const auto timeoutDurationDeadline = makeDeadline(*duration);
1262         if (!closestDeadline.has_value() || *closestDeadline > timeoutDurationDeadline) {
1263             closestDeadline = timeoutDurationDeadline;
1264         }
1265     }
1266 
1267     const auto [result, outputShapes, timing] = compute(nullptr, closestDeadline);
1268     return {result, -1, nullptr, timing};
1269 }
1270 
getRuntimeFeatureLevel() const1271 int64_t DeviceManager::getRuntimeFeatureLevel() const {
1272     return versionToFeatureLevel(mRuntimeVersion.level);
1273 }
1274 
get()1275 DeviceManager* DeviceManager::get() {
1276     static DeviceManager manager;
1277     return &manager;
1278 }
1279 
getCpuDevice()1280 std::shared_ptr<Device> DeviceManager::getCpuDevice() {
1281     return CpuDevice::get();
1282 }
1283 
forTest_makeDriverDevice(const SharedDevice & device)1284 std::shared_ptr<Device> DeviceManager::forTest_makeDriverDevice(const SharedDevice& device) {
1285     VLOG(MANAGER) << "forTest_makeDriverDevice(" << device->getName() << ")";
1286     const auto driverDevice = DriverDevice::create(device);
1287     CHECK(driverDevice != nullptr);
1288     return driverDevice;
1289 }
1290 
1291 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
getDriverDevices(Version::Level maxFeatureLevelAllowed)1292 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1293         [[maybe_unused]] Version::Level maxFeatureLevelAllowed) {
1294 #ifdef __ANDROID__
1295     auto devices = hardware::neuralnetworks::service::getDevices(maxFeatureLevelAllowed);
1296 
1297     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1298     driverDevices.reserve(devices.size());
1299     for (auto& device : devices) {
1300         driverDevices.push_back(DriverDevice::create(std::move(device)));
1301     }
1302     return driverDevices;
1303 #else   // __ANDROID__
1304     return {};
1305 #endif  // __ANDROID__
1306 }
1307 #else
getDriverDevices(Version::Level)1308 std::vector<std::shared_ptr<DriverDevice>> getDriverDevices(
1309         Version::Level /*maxFeatureLevelAllowed*/) {
1310     auto devices = getDevices();
1311     std::vector<std::shared_ptr<DriverDevice>> driverDevices;
1312     driverDevices.reserve(devices.size());
1313     for (auto& device : devices) {
1314         driverDevices.push_back(DriverDevice::create(std::move(device)));
1315     }
1316     return driverDevices;
1317 }
1318 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
1319 
findAvailableDevices()1320 void DeviceManager::findAvailableDevices() {
1321     VLOG(MANAGER) << "findAvailableDevices";
1322 
1323 #ifdef NN_DEBUGGABLE
1324     // debug.nn.enabled-devices defines a regex pattern. For all available driver devices, only the
1325     // ones with name matching the pattern are enabled. Driver devices with unmatched names are
1326     // ignored. If this property is not set, all available driver devices are enabled by default.
1327     // This filter only applies to driver devices. nnapi-reference is always enabled.
1328     std::string patternStr = base::GetProperty("debug.nn.enabled-devices", ".*");
1329     LOG(INFO) << "Enabled devices: " << patternStr;
1330     const std::regex pattern(patternStr);
1331 #endif  // NN_DEBUGGABLE
1332 
1333     // register driver devices
1334     auto driverDevices = getDriverDevices(mRuntimeVersion.level);
1335     for (auto& driverDevice : driverDevices) {
1336 #ifdef NN_DEBUGGABLE
1337         if (!std::regex_match(driverDevice->getName(), pattern)) {
1338             LOG(INFO) << "Ignored interface " << driverDevice->getName()
1339                       << " (version = " << driverDevice->getVersionString() << ")";
1340             continue;
1341         }
1342 #endif  // NN_DEBUGGABLE
1343         LOG(INFO) << "Found interface " << driverDevice->getName()
1344                   << " (version = " << driverDevice->getVersionString() << ")";
1345         mDevices.push_back(std::move(driverDevice));
1346     }
1347 
1348 #ifndef NN_COMPATIBILITY_LIBRARY_BUILD
1349     // register CPU fallback device
1350     mDevices.push_back(CpuDevice::get());
1351     mDevicesCpuOnly.push_back(CpuDevice::get());
1352 #endif  // NN_COMPATIBILITY_LIBRARY_BUILD
1353 }
1354 
registerDevice(const SharedDevice & device)1355 void DeviceManager::registerDevice(const SharedDevice& device) {
1356     if (auto driverDevice = DriverDevice::create(device)) {
1357         mDevices.push_back(std::move(driverDevice));
1358     }
1359 }
1360 
DeviceManager()1361 DeviceManager::DeviceManager() {
1362     VLOG(MANAGER) << "DeviceManager::DeviceManager";
1363     mRuntimeVersion = getRuntimeFeatureLevelVersion();
1364     mIsPlatformTelemetryEnabled = getWhetherPlatformTelemetryIsEnabled();
1365     findAvailableDevices();
1366 #ifdef NN_DEBUGGABLE
1367     mStrictSlicing = (getProp("debug.nn.strict-slicing") != 0);
1368     mPartitioning = getProp("debug.nn.partition", kPartitioningDefault);
1369     mDebugNNCpuOnly = (getProp("debug.nn.cpuonly") != 0);
1370     mSyncExecCpu = (getProp("debug.nn.syncexec-cpu", 1) != 0);
1371     mSyncExecRuntime = (getProp("debug.nn.syncexec-runtime") != 0);
1372 #endif  // NN_DEBUGGABLE
1373 }
1374 
1375 }  // namespace nn
1376 }  // namespace android
1377