1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 19 20 #include "Callbacks.h" 21 #include "HalInterfaces.h" 22 #include "Memory.h" 23 #include "ModelBuilder.h" 24 #include "NeuralNetworks.h" 25 26 #include <unordered_map> 27 #include <vector> 28 29 using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback; 30 using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback; 31 32 namespace android { 33 namespace nn { 34 35 class CompilationBuilder; 36 class ExecutionPlan; 37 class Memory; 38 class ModelBuilder; 39 class StepExecutor; 40 class VersionedIDevice; 41 42 // TODO move length out of DataLocation 43 struct ModelArgumentInfo { 44 // Whether the argument was specified as being in a Memory, as a pointer, 45 // has no value, or has not been specified. 46 // If POINTER then: 47 // locationAndLength.length is valid. 48 // dimensions is valid. 49 // buffer is valid 50 // If MEMORY then: 51 // locationAndLength.{poolIndex, offset, length} is valid. 52 // dimensions is valid. 53 enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED; 54 DataLocation locationAndLength; 55 std::vector<uint32_t> dimensions; 56 void* buffer; 57 58 int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer, 59 uint32_t length); 60 int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 61 uint32_t poolIndex, uint32_t offset, uint32_t length); 62 int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset); 63 int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType); 64 }; 65 66 class ExecutionBuilder { 67 friend class StepExecutor; 68 public: 69 ExecutionBuilder(const CompilationBuilder* compilation); 70 71 int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer, 72 size_t length); 73 int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 74 const Memory* memory, size_t offset, size_t length); 75 int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 76 size_t length); 77 int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 78 const Memory* memory, size_t offset, size_t length); 79 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 80 getModel()81 const ModelBuilder* getModel() const { return mModel; } 82 83 private: 84 const ModelBuilder* mModel; 85 const ExecutionPlan* mPlan; 86 87 // This is a DeviceManager::kPartitioning* value captured from 88 // CompilationBuilder when the ExecutionBuilder is constructed. 89 uint32_t mPartitioning; 90 91 // The information we'll send to the driver about the inputs and outputs. 92 // Note that we build this in two steps: 93 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 94 // If set from a pointer, don't set the location in the RequestArgument but store it 95 // instead in mInputBuffers or mOutputBuffers. 96 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 97 // the m*Buffers entries. Copy the input values into the shared memory. 98 // We do this to avoid creating a lot of shared memory objects if we have a lot of 99 // parameters specified via pointers. We also avoid copying in the case where 100 // some of the nodes will interpreted on the CPU anyway. 101 std::vector<ModelArgumentInfo> mInputs; 102 std::vector<ModelArgumentInfo> mOutputs; 103 MemoryTracker mMemories; 104 }; 105 106 // class StepExecutor is used to execute a single "step" in a 107 // potentially multiple step execution process. The graph associated 108 // with that step is executed in its entirety on a single device (or 109 // on the CPU). 110 class StepExecutor { 111 public: 112 // executionBuilder 113 // Describes the full (possibly multiple-"step") execution. 114 // model 115 // The model to be executed by the executor. Possibly a 116 // submodel of the model from executionBuilder. 117 // driver, preparedModel 118 // The device on which to execute the "step", and the prepared 119 // model to execute on that device. (Both are nullptr in the 120 // case of CPU.) 121 StepExecutor(const ExecutionBuilder* executionBuilder, 122 const ModelBuilder* model, 123 VersionedIDevice* driver, sp<IPreparedModel> preparedModel); 124 125 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 126 // in the case where we have a single-"step" execution (i.e., the executor 127 // is executing the entire model from the ExecutionBuilder). 128 void mapInputsAndOutputsTrivially(); 129 130 // Map inputs and outputs from ExecutionBuilder to StepExecutor, 131 // one at a time. Note that these are input/output indexes, not 132 // operand indexes. mapInput(uint32_t builderIndex,uint32_t executorIndex)133 void mapInput(uint32_t builderIndex, uint32_t executorIndex) { 134 mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], 135 &mInputs[executorIndex]); 136 } mapOutput(uint32_t builderIndex,uint32_t executorIndex)137 void mapOutput(uint32_t builderIndex, uint32_t executorIndex) { 138 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], 139 &mOutputs[executorIndex]); 140 } mapOutputToInput(uint32_t builderIndex,uint32_t executorIndex)141 void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) { 142 mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], 143 &mInputs[executorIndex]); 144 } 145 146 // The input or output is assumed to have the size of the 147 // corresponding operand. setInputFromTemporaryMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)148 int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) { 149 return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex), 150 memory, offset, 151 &mInputs.at(inputIndex)); 152 } setOutputFromTemporaryMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)153 int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) { 154 return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex), 155 memory, offset, 156 &mOutputs.at(outputIndex)); 157 } 158 159 // Executes using the (driver, preparedModel) specified at construction time. 160 int startCompute(sp<ExecutionCallback>* synchronizationCallback); 161 162 // Executes using the CPU, regardless of the (driver, 163 // preparedModel) specified at construction time. 164 int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback); 165 isCpu()166 bool isCpu() const { return mDriver == nullptr; } 167 168 private: 169 int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory); 170 int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback); 171 172 void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 173 ModelArgumentInfo* executorInputOrOutput); 174 175 int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, 176 const Memory* memory, uint32_t offset, 177 ModelArgumentInfo* inputOrOutputInfo); 178 179 // describes the full (possibly multiple-"step") execution 180 const ExecutionBuilder* mExecutionBuilder; 181 182 // model to be executed on the executor, in both original and 183 // compiled forms; and device on which to execute it 184 const ModelBuilder* mModel; 185 VersionedIDevice* mDriver; // nullptr if CPU execution 186 sp<IPreparedModel> mPreparedModel; // nullptr if CPU execution or if bypassing ExecutionPlan 187 188 // The information we'll send to the driver about the inputs and outputs. 189 // Note that we build this in two steps: 190 // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element. 191 // If set from a pointer, don't set the location in the RequestArgument but store it 192 // instead in mInputBuffers or mOutputBuffers. 193 // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for 194 // the m*Buffers entries. Copy the input values into the shared memory. 195 // We do this to avoid creating a lot of shared memory objects if we have a lot of 196 // parameters specified via pointers. We also avoid copying in the case where 197 // some of the nodes will interpreted on the CPU anyway. 198 std::vector<ModelArgumentInfo> mInputs; 199 std::vector<ModelArgumentInfo> mOutputs; 200 MemoryTracker mMemories; 201 }; 202 203 } // namespace nn 204 } // namespace android 205 206 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H 207