1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_COMPILATION_BUILDER_H 18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_COMPILATION_BUILDER_H 19 20 #include <nnapi/Types.h> 21 22 #include <chrono> 23 #include <limits> 24 #include <memory> 25 #include <optional> 26 #include <set> 27 #include <string> 28 #include <utility> 29 #include <vector> 30 31 #include "ExecutionPlan.h" 32 #include "Manager.h" 33 #include "NeuralNetworks.h" 34 35 namespace android { 36 namespace nn { 37 38 class BurstBuilder; 39 class Device; 40 class ExecutionBuilder; 41 class ModelBuilder; 42 43 class CompilationBuilder { 44 public: 45 friend class ExecutionBuilder; // TODO remove this 46 47 // explicitDeviceList is true if the list of devices was provided explicitly 48 // via the ANeuralNetworksModel_createForDevices API (which has certain 49 // special semantics) and false otherwise. 50 CompilationBuilder(const ModelBuilder* model, 51 const std::vector<std::shared_ptr<Device>>& devices, 52 bool explicitDeviceList = false); 53 54 int setPreference(int32_t preference); 55 56 int setCaching(const std::string& cacheDir, const uint8_t* token); 57 // Dups the fds 58 int setCachingFromFds(const int* modelCacheFds, const uint32_t numModelCacheFiles, 59 const int* dataCacheFds, const uint32_t numDataCacheFiles, 60 const uint8_t* token); 61 62 int setPriority(int32_t priority); 63 64 int setTimeoutDuration(uint64_t duration); 65 66 int addExtensionAttribute(const char* extensionName, uint16_t attributeCodeWithinExtension, 67 const void* data, size_t length); 68 69 int finish(); 70 71 int getPreferredMemoryAlignmentForInput(uint32_t index, uint32_t* alignment) const; 72 int getPreferredMemoryPaddingForInput(uint32_t index, uint32_t* padding) const; 73 int getPreferredMemoryAlignmentForOutput(uint32_t index, uint32_t* alignment) const; 74 int getPreferredMemoryPaddingForOutput(uint32_t index, uint32_t* padding) const; 75 76 int createExecution(ExecutionBuilder** execution); 77 78 int createBurst(BurstBuilder** burst); 79 getModel()80 const ModelBuilder* getModel() const { return mModel; } getDevices()81 const std::vector<std::shared_ptr<Device>>& getDevices() const { return mDevices; } 82 83 int forEachStepRoleOfInput(uint32_t index, const StepRoleCallback& callback) const; 84 int forEachStepRoleOfOutput(uint32_t index, const StepRoleCallback& callback) const; 85 createdWithExplicitDeviceList()86 bool createdWithExplicitDeviceList() const { return mExplicitDeviceList; } 87 hasDynamicTemporaries()88 bool hasDynamicTemporaries() const { return mPlan.hasDynamicTemporaries(); } isCacheInfoProvided()89 bool isCacheInfoProvided() const { return mIsCacheInfoProvided; } isFinished()90 bool isFinished() const { return mFinished; } 91 92 // These functions are solely intended for use by unit tests of the 93 // partitioning algorithm. forTest_getExecutionPlan()94 const ExecutionPlan& forTest_getExecutionPlan() const { return mPlan; } 95 int forTest_setPartitioning(uint32_t partitioning); 96 int forTest_failPartitioning( 97 int resultCode); // If not ANEURALNETWORKS_NO_ERROR, then simulate partitioning failure 98 99 struct TelemetryInfo { 100 uint64_t compilationTimeNanos = std::numeric_limits<uint64_t>::max(); 101 bool fallbackToCpuFromError = false; 102 }; getTelemetryInfo()103 const std::optional<TelemetryInfo>& getTelemetryInfo() const { return mTelemetryInfo; } 104 105 private: 106 const ModelBuilder* mModel; 107 108 ExecutionPlan mPlan; 109 110 // Whether the application prefers to go fast or use low power for this execution. 111 int32_t mPreference = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER; 112 113 // See class DeviceManager. When CompilationBuilder is 114 // instantiated, we capture partitioning from DeviceManager; but 115 // we can override this later. 116 uint32_t mPartitioning; 117 118 // For testing purposes, simulate partitioning failure. 119 int mFailPartitioning = ANEURALNETWORKS_NO_ERROR; 120 121 // Once the compilation has been finished, we should not allow further 122 // modifications to the compilation. 123 bool mFinished = false; 124 125 // The set of devices that the partitioning algorithm operates on when 126 // finish() is called. 127 std::vector<std::shared_ptr<Device>> mDevices; 128 129 // mExplicitDeviceList is true if the list of devices was provided 130 // explicitly via the ANeuralNetworksModel_createForDevices API (which has 131 // certain special semantics) and false otherwise. 132 bool mExplicitDeviceList; 133 134 // Compilation caching information. 135 CacheInfo mCacheInfo; 136 uint8_t mToken[ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN]; 137 bool mIsCacheInfoProvided = false; 138 139 // Compilation priority information. 140 int32_t mPriority = ANEURALNETWORKS_PRIORITY_DEFAULT; 141 142 // Amount of time to complete or abort the execution. 143 std::optional<uint64_t> mTimeoutDuration; 144 145 // Supplementary compilation info for Telemetry use 146 std::optional<TelemetryInfo> mTelemetryInfo; 147 148 // Vendor specific metadata 149 std::vector<TokenValuePair> mMetadata; 150 }; 151 152 } // namespace nn 153 } // namespace android 154 155 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_COMPILATION_BUILDER_H 156