1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Class used to build a model through a succession of successive calls 18 // to the NN API. 19 20 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H 21 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H 22 23 #include <LegacyUtils.h> 24 25 #include <memory> 26 #include <vector> 27 28 #include "Memory.h" 29 #include "ModelArchHasher.h" 30 #include "NeuralNetworks.h" 31 32 namespace android { 33 namespace nn { 34 35 class CompilationBuilder; 36 class Device; 37 class ExecutionPlan; 38 class RuntimeMemory; 39 40 class ModelBuilder { 41 public: ModelBuilder()42 ModelBuilder() {} 43 // Returns an operand/operation type corresponding to a given extension operand/operation type. 44 int getExtensionType(const char* extensionName, uint16_t typeWithinExtension, int32_t* type); 45 // Adds an operand to the model. 46 int addOperand(const ANeuralNetworksOperandType& type); 47 int setOperandValue(uint32_t index, const void* buffer, size_t length); 48 int setOperandValueFromMemory(uint32_t index, const RuntimeMemory* memory, uint32_t offset, 49 size_t length); 50 int setOperandValueFromModel(uint32_t index, const ModelBuilder* value); 51 int setOperandSymmPerChannelQuantParams( 52 uint32_t index, const ANeuralNetworksSymmPerChannelQuantParams& extraParams); 53 int setOperandExtensionData(uint32_t index, const void* data, size_t length); 54 55 int addOperation(ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, 56 uint32_t outputCount, const uint32_t* outputs); 57 int identifyInputsAndOutputs(uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, 58 const uint32_t* outputs); 59 int relaxComputationFloat32toFloat16(bool allow); isComputationFloat32RelaxedToFloat16()60 bool isComputationFloat32RelaxedToFloat16() const { return mRelaxComputationFloat32toFloat16; } 61 62 int finish(); isFinished()63 bool isFinished() const { return mCompletedModel; } isValid()64 bool isValid() const { return !mInvalidModel; } 65 hasOEMOperation()66 bool hasOEMOperation() const { return mHasOEMOperation; } hasExtensionOperation()67 bool hasExtensionOperation() const { return mHasExtensionOperation; } hasControlFlow()68 bool hasControlFlow() const { return mHasControlFlow; } 69 70 // explicitDeviceList is true if the list of devices was provided explicitly 71 // via the ANeuralNetworksModel_createForDevices API (which has certain 72 // special semantics) and false otherwise. 73 int createCompilation(CompilationBuilder** compilation, 74 const std::vector<std::shared_ptr<Device>>& devices, 75 bool explicitDeviceList = false); 76 77 Model makeModel() const; 78 operandCount()79 uint32_t operandCount() const { 80 // We don't allow more than uint32_t worth of operands 81 return static_cast<uint32_t>(mOperands.size()); 82 } operationCount()83 uint32_t operationCount() const { 84 // We don't allow more than uint32_t worth of operations 85 return static_cast<uint32_t>(mOperations.size()); 86 } inputCount()87 uint32_t inputCount() const { return static_cast<uint32_t>(mInputIndexes.size()); } outputCount()88 uint32_t outputCount() const { return static_cast<uint32_t>(mOutputIndexes.size()); } getInputOperandIndex(uint32_t i)89 uint32_t getInputOperandIndex(uint32_t i) const { 90 CHECK_LT(i, mInputIndexes.size()); 91 return mInputIndexes[i]; 92 } getInputOperandIndexes()93 const std::vector<uint32_t>& getInputOperandIndexes() const { return mInputIndexes; } getInputOperand(uint32_t i)94 const Operand& getInputOperand(uint32_t i) const { 95 uint32_t index = getInputOperandIndex(i); 96 CHECK_LT(index, mOperands.size()); 97 return mOperands[index]; 98 } getOutputOperandIndex(uint32_t i)99 uint32_t getOutputOperandIndex(uint32_t i) const { 100 CHECK_LT(i, mOutputIndexes.size()); 101 return mOutputIndexes[i]; 102 } getOutputOperandIndexes()103 const std::vector<uint32_t>& getOutputOperandIndexes() const { return mOutputIndexes; } getOutputOperand(uint32_t i)104 const Operand& getOutputOperand(uint32_t i) const { 105 uint32_t index = getOutputOperandIndex(i); 106 CHECK_LT(index, mOperands.size()); 107 return mOperands[index]; 108 } getOperand(uint32_t index)109 const Operand& getOperand(uint32_t index) const { return mOperands[index]; } getOperation(uint32_t index)110 const Operation& getOperation(uint32_t index) const { return mOperations[index]; } getMemories()111 const MemoryTracker& getMemories() const { return mMemories; } getOperations()112 const std::vector<Operation>& getOperations() const { return mOperations; } getSortedOperationMapping()113 const std::vector<uint32_t>& getSortedOperationMapping() const { 114 return mSortedOperationIndexMap; 115 } getPointerToOperandValue(uint32_t offset)116 const uint8_t* getPointerToOperandValue(uint32_t offset) const { 117 return mSmallOperandValues.data() + offset; 118 } referencedModelCount()119 uint32_t referencedModelCount() const { 120 return static_cast<uint32_t>(mReferencedModels.size()); 121 } getReferencedModel(uint32_t i)122 const ModelBuilder* getReferencedModel(uint32_t i) const { 123 CHECK_LT(i, mReferencedModels.size()); 124 return mReferencedModels[i]; 125 } getReferencedModel(const Operand & operand)126 const ModelBuilder* getReferencedModel(const Operand& operand) const { 127 CHECK(operand.lifetime == Operand::LifeTime::SUBGRAPH); 128 return getReferencedModel(operand.location.offset); 129 } 130 131 // simulateFailureResultCode == ANEURALNETWORKS_NO_ERROR means behave normally. 132 int partitionTheWork(const std::vector<std::shared_ptr<Device>>& devices, uint32_t preference, 133 uint32_t priority, const OptionalTimePoint& deadline, ExecutionPlan* plan, 134 const std::vector<TokenValuePair>& metaData, 135 int simulateFailureResultCode = ANEURALNETWORKS_NO_ERROR) const; 136 137 const uint8_t* getModelArchHash() const; 138 139 private: 140 // TODO(b/132322449): move partitionTheWork, findBestDeviceForEachOperation, 141 // getPerformance, supportedByControlFlowInterpreter, 142 // isControlFlowOperationWithOperandOfUnknownSize, partitionTheWorkInternal, 143 // sortIntoRunOrder to CompilationBuilder? 144 145 // Populates bestDeviceForOperation 146 // 147 // For 0 <= i < operationCount(), produces 148 // 149 // 0 <= (*bestDeviceForOperation)[i] <= devices.size() 150 // 151 // (*bestDeviceForOperation)[i] == devices.size() is a special value meaning 152 // that this is a control flow operation scheduled for interpreted execution 153 // (see LogicalStep). 154 int findBestDeviceForEachOperation(uint32_t preference, 155 const std::vector<std::shared_ptr<Device>>& devices, 156 std::vector<int>* bestDeviceForOperation) const; 157 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device) const; 158 float getPerformance(uint32_t preference, const std::shared_ptr<Device> device, 159 uint32_t operationIndex) const; 160 bool supportedByControlFlowInterpreter(uint32_t operationIndex) const; 161 162 // Returns true if the operation is IF or WHILE and has an inner or outer 163 // input or output of unknown size. 164 bool isControlFlowOperationWithOperandOfUnknownSize(uint32_t operationIndex) const; 165 166 int partitionTheWorkInternal(uint32_t sourceModelIndex, 167 const std::vector<std::shared_ptr<Device>>& devices, 168 uint32_t preference, uint32_t priority, 169 const OptionalTimePoint& deadline, ExecutionPlan* plan) const; 170 171 // Return true if either mCompleteModel or mInvalidModel is true. 172 bool badState(const char* name); 173 174 // Removes some trailing operation inputs that are set to default values. 175 // 176 // Some drivers reject operations based on the argument count even when the 177 // optional arguments are set to default values. This transformation enables 178 // more drivers to execute the model. See http://b/147105700. 179 void removeTrailingArgumentsWithDefaultValues(); 180 uint32_t getNumTrailingArgumentsToRemove(const Operation& operation) const; 181 182 // Sorts the operations to be in the correct order for single threaded 183 // node-at-a-time execution. 184 bool sortIntoRunOrder(); 185 186 // Copies the large values to a shared memory, if we have any. 187 int copyLargeValuesToSharedMemory(); 188 189 // Mark that the model should be simplified during ModelBuilder::makeModel, removing arguments 190 // from operations that already match the default values, dead operands, dead pools, dead 191 // subgraphs, and dead extensions. 192 void simplifyModel(); 193 194 // The operations of the graph. 195 std::vector<Operation> mOperations; 196 // The mapping from sorted index to the original index of operations in mOperations. 197 // mSortedOperationIndexMap is empty before sortIntoRunOrder() is called. 198 std::vector<uint32_t> mSortedOperationIndexMap; 199 // Is at least one of those operations an OEM_OPERATION? 200 bool mHasOEMOperation = false; 201 // Is at least one of those operations an extension operation? 202 bool mHasExtensionOperation = false; 203 // The description of the operands of the graph. 204 std::vector<Operand> mOperands; 205 // Is at least one of those operands an OEM operand? 206 bool mHasOEMOperand = false; 207 // The indexes of input operands of the model. 208 std::vector<uint32_t> mInputIndexes; 209 // The indexes of output operands of the model. 210 std::vector<uint32_t> mOutputIndexes; 211 // Whether the model should be simplified during ModelBuilder::makeModel, removing arguments 212 // from operations that already match the default values, dead operands, dead pools, dead 213 // subgraphs, and dead extensions. 214 bool mSimplifyModel = false; 215 216 MemoryTracker mMemories; 217 218 // The value of the small operands that are defined at model 219 // creation time. 220 std::vector<uint8_t> mSmallOperandValues; 221 222 struct LargeValue { 223 uint32_t operandIndex; 224 const void* buffer; 225 }; 226 // Operand index and buffer pointer for all the large operand values of this model. 227 std::vector<LargeValue> mLargeOperandValues; 228 // The shared memory region that will contain the large values. 229 std::unique_ptr<MemoryAshmem> mLargeValueMemory; 230 231 // Once the model has been finished, we should not allow further 232 // modifications to the model. 233 bool mCompletedModel = false; 234 235 // Any invalid manipulation of the model will mark the model invalid. 236 // No further modifications are allowed to the model. 237 bool mInvalidModel = false; 238 239 // 'true' indicates TENSOR_FLOAT32 may be calculated with range and/or 240 // precision as low as that of the IEEE 754 16-bit floating-point format. 241 // 'false' indicates TENSOR_FLOAT32 must be calculated using at least the 242 // range and precision of the IEEE 754 32-bit floating-point format. 243 bool mRelaxComputationFloat32toFloat16 = false; 244 245 // Models referenced by operands in this model. 246 std::vector<const ModelBuilder*> mReferencedModels; 247 248 // Main subgraphs of models referenced by operands in this model. Required 249 // for validateOperation(). 250 std::vector<Model::Subgraph> mReferencedSubgraphsForValidation; 251 252 // Does the model contain control flow operands or operations? 253 bool mHasControlFlow = false; 254 255 // Model architecture hash, used for telemetry. 256 uint8_t mModelArchHash[BYTE_SIZE_OF_MODEL_ARCH_HASH]; 257 258 class ModelMaker; 259 }; 260 261 } // namespace nn 262 } // namespace android 263 264 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_MODEL_BUILDER_H 265