1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Classes used to plan how to execute a model across multiple devices. 18 19 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 20 #define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 21 22 #include "HalInterfaces.h" 23 #include "Memory.h" 24 #include "ModelBuilder.h" 25 #include "NeuralNetworks.h" 26 #include "TokenHasher.h" 27 #include "Utils.h" 28 #include "VersionedInterfaces.h" 29 30 #include <openssl/sha.h> 31 32 #include <set> 33 #include <string> 34 35 namespace android { 36 namespace nn { 37 38 class BurstBuilder; 39 class CompilationBuilder; 40 class Device; 41 class ExecutionBuilder; 42 class ExecutionPlan; 43 class ExecutionBurstController; 44 class Memory; 45 class StepExecutor; 46 47 class ExecutionStep { 48 public: 49 typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType; 50 typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType; 51 52 enum OperandKind { INPUT, OUTPUT }; 53 54 ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, std::shared_ptr<Device> device); 55 int addOperation(int operationIndex, const ModelBuilder& fromModel); 56 int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex, 57 const ModelBuilder& fromModel, OperandKind kind); 58 59 // Each container entry is of the form (fromModel index, subModel index) getModelInputs()60 const RemapVectorType& getModelInputs() const { 61 return mModelInputs; 62 } getModelOutputs()63 const RemapVectorType& getModelOutputs() const { 64 return mModelOutputs; 65 } getTempsAsSubModelInputs()66 const RemapVectorType& getTempsAsSubModelInputs() const { 67 return mTempsAsSubModelInputs; 68 } getTempsAsSubModelOutputs()69 const SubModelOutputSetType& getTempsAsSubModelOutputs() const { 70 return mTempsAsSubModelOutputs; 71 } getOutputsAsSubModelInputs()72 const RemapVectorType& getOutputsAsSubModelInputs() const { 73 return mOutputsAsSubModelInputs; 74 } getOutputIndexSubModelToFromModel()75 const std::vector<uint32_t>& getOutputIndexSubModelToFromModel() const { 76 return mOutputIndexSubModelToFromModel; 77 } getOutputsAsSubModelInputsIndexToFromModel()78 const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const { 79 return mOutputsAsSubModelInputsIndexToFromModel; 80 } 81 recordTempAsSubModelOutput(uint32_t fromModelIndex)82 void recordTempAsSubModelOutput(uint32_t fromModelIndex) { 83 const auto it = mOperandMap.find(fromModelIndex); 84 nnAssert(it != mOperandMap.end()); 85 mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second)); 86 } 87 88 // If this step has a submodel output of unknown size, sets 89 // *hasOutputOfUnknownSize to true; otherwise, leaves it 90 // unchanged. 91 int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize, 92 int32_t executionPreference); 93 getSubModel()94 const ModelBuilder* getSubModel() const { return &mSubModel; } getDevice()95 std::shared_ptr<Device> getDevice() const { return mDevice; } 96 97 // only available after calling finishSubModel() getPreparedSubModel()98 std::shared_ptr<VersionedIPreparedModel> getPreparedSubModel() const { 99 return mPreparedSubModel; 100 } 101 102 // Map inputs and outputs from ExecutionBuilder to StepExecutor. 103 void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const; 104 105 void dump() const; 106 107 // For test only, get the transformed cache token. forTest_getCacheToken()108 const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); } 109 110 private: 111 void logSubModel() const; 112 113 // TODO: Some of the data is working state information that 114 // shouldn't be needed after we've constructed but not executed 115 // the step. 116 117 ExecutionPlan* mPlan; 118 uint32_t mIndex; // index of step within plan 119 ModelBuilder mSubModel; 120 std::shared_ptr<Device> mDevice; 121 std::shared_ptr<VersionedIPreparedModel> mPreparedSubModel; // not used for CPU 122 123 // Inputs of original model that are also inputs of this submodel: 124 // (fromModel index, subModel index) 125 RemapVectorType mModelInputs; 126 // Outputs of original model that are also outputs of this submodel: 127 // (fromModel index, subModel index) 128 RemapVectorType mModelOutputs; 129 // Temporaries of original model that are inputs of this submodel: 130 // (fromModel index, subModel index) 131 RemapVectorType mTempsAsSubModelInputs; 132 // Temporaries of original model that are outputs of this submodel: 133 // (fromModel index, subModel index) 134 SubModelOutputSetType mTempsAsSubModelOutputs; 135 // Outputs of original model that are inputs of this submodel: 136 // (fromModel index, subModel index) 137 RemapVectorType mOutputsAsSubModelInputs; 138 // Converts operand indexes from the main model to the submodel. 139 std::unordered_map<uint32_t, uint32_t> mOperandMap; 140 // Converts input indexes from the submodel to the main model 141 // (these are input indexes, not operand indexes). This vector 142 // only describes inputs of the submodel that are also inputs of 143 // the main model -- that is, mModelInputs but not mTempsAsSubModelInputs. 144 std::vector<uint32_t> mInputIndexSubModelToFromModel; 145 // Converts output indexes from the submodel to the main model 146 // (these are output indexes, not operand indexes). This vector 147 // only describes outputs of the submodel that are also outputs of 148 // the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs. 149 std::vector<uint32_t> mOutputIndexSubModelToFromModel; 150 // Converts indexes into mOutputsAsSubModelInputs to indexes into 151 // main model outputs (these are input and output indexes, not 152 // operand indexes). To be specific, if the main model outputs 153 // are mainModelOutputs, 154 // 155 // mOutputsAsSubModelInputsIndexToFromModel.size() == 156 // mOutputsAsSubModelInputs.size() 157 // 158 // and when (0 <= i < mOutputsAsSubModelInputs.size()), 159 // 160 // mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] == 161 // mOutputsAsSubModelInputs[i].first 162 std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel; 163 164 // The compilation caching token. 165 TokenHasher mToken; 166 }; 167 168 class ExecutionPlan { 169 public: 170 ExecutionPlan(const ExecutionPlan&) = delete; 171 ExecutionPlan& operator=(const ExecutionPlan&) = delete; 172 ExecutionPlan()173 ExecutionPlan() { } ~ExecutionPlan()174 ~ExecutionPlan() { delete mBody; } 175 176 // Controller is part of the interface to a mechanism for 177 // performing an execution in N steps. 178 // 179 // Usage pattern: 180 // - Instantiate Controller with ExecutionPlan::makeController(). 181 // - Call ExecutionPlan::next() on Controller N+1 times. The first N times, 182 // *executor is set to point to a new StepExecutor corresponding 183 // to that step. The N+1st time, *executor is set to nullptr, 184 // signifying there are no more steps. 185 // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR, 186 // a problem has occurred. 187 class Controller { 188 friend class ExecutionPlan; 189 private: 190 Controller(const Controller&) = delete; 191 Controller& operator=(const Controller&) = delete; 192 193 // Map from the operand index of a TEMPORARY in the original 194 // model to an offset into mTemporaries used to represent that 195 // TEMPORARY as an inter-partition input or output. 196 typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType; 197 198 static const size_t kBadStepIndex = ~size_t(0); 199 200 Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder, 201 const BurstBuilder* burstBuilder, 202 std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs, 203 uint32_t totalSizeOfTemporaries); 204 205 const ExecutionPlan* mPlan; 206 ExecutionBuilder* mExecutionBuilder; 207 const BurstBuilder* mBurstBuilder; 208 std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs; // may be nullptr 209 Memory mTemporaries; 210 size_t mNextStepIndex; 211 }; 212 213 std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const; 214 215 std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder, 216 const BurstBuilder* burstBuilder) const; 217 218 int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor, 219 std::shared_ptr<ExecutionBurstController>* burstController = nullptr) const; 220 221 // Create the same executor as the last one created by next(). 222 int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const; 223 224 std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device); 225 226 void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model); 227 228 int finish(const ModelBuilder* fromModel, int32_t executionPreference); 229 recordTemporaryDef(uint32_t fromModelIndex,uint32_t stepIndex)230 void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) { 231 auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep; 232 nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0); 233 temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex)); 234 } 235 236 void dump() const; 237 238 void reset(); 239 isValid()240 bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; } 241 setCaching(const std::string * cacheDir,const uint8_t * token)242 void setCaching(const std::string* cacheDir, const uint8_t* token) { 243 mCacheDir = cacheDir; 244 mToken = token; 245 } getCacheDir()246 const std::string* getCacheDir() const { return mCacheDir; } getCacheToken()247 const uint8_t* getCacheToken() const { return mToken; } 248 249 // These functions are solely intended for use by unit tests of 250 // the partitioning algorithm. 251 enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND }; 252 Kind forTest_getKind() const; 253 std::shared_ptr<const Device> forTest_simpleGetDevice() const; 254 const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const; 255 bool forTest_hasSubModelOutputsOfUnknownSize() const; 256 const uint8_t* forTest_simpleGetCacheToken() const; 257 258 private: 259 void findTempsAsSubModelOutputs(); 260 261 struct Body { ~BodyBody262 virtual ~Body() {} 263 virtual void dump() const = 0; 264 virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0; 265 virtual bool hasSubModelOutputsOfUnknownSize() const = 0; 266 bool mSuccessfulFinish = false; 267 }; 268 269 struct SimpleBody : Body { SimpleBodySimpleBody270 SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model, 271 const std::string* cacheDir, const uint8_t* token) 272 : mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {} 273 274 void dump() const override; 275 int finish(const ModelBuilder* fromModel, int32_t executionPreference) override; hasSubModelOutputsOfUnknownSizeSimpleBody276 virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; } 277 278 std::shared_ptr<Device> mDevice; 279 const ModelBuilder* mModel; 280 std::shared_ptr<VersionedIPreparedModel> mPreparedModel; // not used for CPU 281 282 const std::string* mCacheDir; 283 TokenHasher mToken; 284 }; 285 286 struct CompoundBody : Body { 287 void dump() const override; 288 int finish(const ModelBuilder* fromModel, int32_t executionPreference) override; hasSubModelOutputsOfUnknownSizeCompoundBody289 virtual bool hasSubModelOutputsOfUnknownSize() const override { 290 return mHasSubModelOutputOfUnknownSize; 291 } 292 293 // TODO: Some of the data is working state information that 294 // shouldn't be needed after we've constructed but not 295 // executed the plan. 296 297 std::vector<std::shared_ptr<ExecutionStep>> mSteps; 298 299 // Map from original operand index to defining step index. 300 // Used for all (and only) TEMPORARY_VARIABLEs. 301 std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep; 302 303 bool mHasSubModelOutputOfUnknownSize = false; 304 private: 305 void findTempsAsSubModelOutputs(); 306 }; 307 308 enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY; 309 Body* mBody = nullptr; compound()310 CompoundBody* compound() { 311 nnAssert(mState == COMPOUND); 312 return static_cast<CompoundBody*>(mBody); 313 } compound()314 const CompoundBody* compound() const { 315 nnAssert(mState == COMPOUND); 316 return static_cast<const CompoundBody*>(mBody); 317 } 318 319 // Pointers to compilation caching information in CompilationBuilder. 320 const std::string* mCacheDir = nullptr; 321 const uint8_t* mToken = nullptr; 322 }; 323 324 } // namespace nn 325 } // namespace android 326 327 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H 328