1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Classes used to plan how to execute a model across multiple devices. 18 19 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H 20 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H 21 22 #include <LegacyUtils.h> 23 #include <TokenHasher.h> 24 #include <android-base/logging.h> 25 #include <nnapi/IBurst.h> 26 #include <nnapi/Types.h> 27 28 #include <algorithm> 29 #include <chrono> 30 #include <functional> 31 #include <map> 32 #include <memory> 33 #include <ostream> 34 #include <set> 35 #include <string> 36 #include <tuple> 37 #include <unordered_map> 38 #include <utility> 39 #include <variant> 40 #include <vector> 41 42 #include "Memory.h" 43 #include "ModelArgumentInfo.h" 44 #include "ModelBuilder.h" 45 #include "NeuralNetworks.h" 46 47 namespace android { 48 namespace nn { 49 50 class BurstBuilder; 51 class CompilationBuilder; 52 class Device; 53 class ExecutionBuilder; 54 class ExecutionPlan; 55 class RuntimeMemory; 56 class RuntimePreparedModel; 57 class StepExecutor; 58 59 struct ConstantReferenceLocation; 60 struct CacheInfo; 61 62 // NNAPI Control Flow allows referring to an NNAPI model inside another NNAPI 63 // model using OperandType::SUBGRAPH. For example, an IF operation within a 64 // model mey refer to two other models corresponding to then and else branches. 65 // 66 // The partitioning process transforms this nested representation into a list 67 // of LogicalSteps. 68 // 69 // The following terms are used: 70 // - The main model is the top-level model being compiled (not referenced by any 71 // OperandType::SUBGRAPH operand within the compilation). 72 // - A referenced model is a non-top-level model being compiled (referenced by 73 // at least one OperandType::SUBGRAPH operand within the set of models being 74 // compiled). 75 // - A source model is either the main model or a referenced model. 76 // - A step model is a model excerpted from a source model during the 77 // partitioning process. 78 // - A partition is a LogicalStep representing at least one operation of a 79 // source model. In particular, ExecutionStep represents a step model, IfStep 80 // represents an IF operation, WhileStep represents a WHILE operation. 81 // A GotoStep is not a partition. 82 // - A partition boundary operand is a source model operand that is an input or 83 // output of a partition. For ExecutionStep, the inputs and outputs of the 84 // step model are boundary operands; for IfStep and WhileStep, the inputs and 85 // outputs of the corresponding operation are boundary operands. 86 // - A partition boundary static temporary is a partition boundary 87 // operand which is of lifetime TEMPORARY_VARIABLE in the source model and 88 // whose dimensions are fully specified. 89 // - A partition boundary dynamic temporary is a partition boundary 90 // operand which is of lifetime TEMPORARY_VARIABLE in the source model and 91 // whose dimensions are not fully specified. 92 // - A main execution is the execution of a main model. 93 // 94 // Referenced models can be sources of partition boundary operands. For example, 95 // this happens when a referenced model is partitioned into one or more 96 // LogicalSteps. 97 // 98 // (model index, operand index within model) 99 typedef std::pair<uint32_t, uint32_t> SourceOperandIndex; 100 101 // A collection of source models. 102 class SourceModels { 103 public: addModel(const ModelBuilder * model)104 uint32_t addModel(const ModelBuilder* model) { 105 uint32_t modelIndex = mModels.size(); 106 mModels.push_back(model); 107 return modelIndex; 108 } 109 getModel(uint32_t index)110 const ModelBuilder* getModel(uint32_t index) const { return mModels[index]; } 111 size()112 uint32_t size() const { return mModels.size(); } 113 114 private: 115 std::vector<const ModelBuilder*> mModels; 116 }; 117 118 // Represents all partition boundary dynamic temporaries for a particular main 119 // execution. 120 // 121 // Usage pattern: 122 // - declare() every partition boundary dynamic temporary. 123 // - endDeclarations(). After this point, lookup() is permitted. 124 // - Before executing an ExecutionStep, call allocate(). 125 // - After executing an ExecutionStep, call redeclare() for every partition 126 // boundary dynamic temporary for which we've learned or guessed more about 127 // the dimensions or length. 128 // 129 // Each partition boundary temporary has a location assigned by allocate() for 130 // its defining step (see declare() and allocate()). That location remains 131 // valid until redeclare() increases the length of some temporary in its defining 132 // step or allocate() is called again for its defining step. 133 class DynamicTemporaries { 134 DISALLOW_COPY_AND_ASSIGN(DynamicTemporaries); 135 136 public: 137 DynamicTemporaries() = default; 138 DynamicTemporaries(DynamicTemporaries&&) = default; 139 DynamicTemporaries& operator=(DynamicTemporaries&&) = default; 140 141 // Declare a dynamic temporary. stepIndex is the step that defines the 142 // temporary (i.e., in which the temporary appears as an operation output 143 // operand). initialDimensions and initialLength indicate what we know or 144 // (in the case of length) guess about those properties. 145 void declare(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex, 146 const Dimensions& initialDimensions, uint32_t initialLength, uint32_t alignment, 147 uint32_t padding); 148 149 // Indicate that we've finished declaring all dynamic temporaries. endDeclarations()150 void endDeclarations() { 151 CHECK(!mDeclared); 152 mDeclared = true; 153 } 154 155 // Redeclare a dynamic temporary, indicating what we've learned about it. 156 // This may invalidate the location of temporaries defined by its step. 157 // Returns true if dimensions or length changed, false otherwise. 158 bool redeclare(SourceOperandIndex sourceOperandIndex, const Dimensions& newDimensions, 159 uint32_t newLength); 160 161 // Ensure that all dynamic temporaries defined by the specified step have 162 // locations. The return value is a ResultCode (e.g., 163 // ANEURALNETWORKS_NO_ERROR). 164 // 165 // Even if dynamic temporaries have already been allocated for this step, 166 // this call may reallocate them. A reallocation is not guaranteed to 167 // preserve location (LocationAndShape.memory, LocationAndShape.offset) or 168 // contents of temporaries. 169 int allocate(uint32_t stepIndex); 170 171 // Do the dynamic temporaries defined by this step have valid allocations? 172 // (Will be true if there are no dynamic temporaries defined by this step.) 173 bool allocated(uint32_t stepIndex) const; 174 175 // Dump information to VLOG(EXECUTION). 176 void vlogDump(const char* context = nullptr) const; 177 178 // If the specified operand is a dynamic temporary, return location and 179 // shape information; otherwise, return std::nullopt. 180 // 181 // If temporary exists but does not have a valid allocation, then: 182 // - If mustBeAllocated == true, then trigger a failed CHECK(). 183 // - If mustBeAllocated == false, then memory == nullptr and offset == ~0. 184 struct LocationAndShape { 185 const RuntimeMemory* memory; 186 uint32_t offset; 187 const Dimensions* dimensions; 188 uint32_t paddedLength; 189 }; 190 std::optional<LocationAndShape> lookup(SourceOperandIndex sourceOperandIndex, 191 bool mustBeAllocated = true) const; 192 193 // Have any dynamic temporaries been declared? empty()194 bool empty() const { return mSourceOperandToTemporary.empty(); } 195 196 private: 197 // The same as LocationAndShape, except that: 198 // - the base of the location is represented not by memory but by defining stepIndex 199 // - it additionally contains information about the preferred alignment and padding 200 struct InternalLocationAndShape { 201 uint32_t stepIndex; 202 uint32_t offset; 203 Dimensions dimensions; 204 uint32_t paddedLength; 205 uint32_t alignment; 206 uint32_t padding; 207 }; 208 std::map<SourceOperandIndex, InternalLocationAndShape> mSourceOperandToTemporary; 209 210 // Every dynamic temporary defined at a given stepIndex. 211 std::map<uint32_t, std::vector<SourceOperandIndex>> mStepIndexToSourceOperandIndexes; 212 213 std::map<uint32_t, std::unique_ptr<MemoryAshmem>> mStepIndexToMemory; 214 215 // For a given defining stepIndex, we consider either all its dynamic 216 // temporaries to be allocated (have valid locations) or none of them to be. 217 std::set<uint32_t> mAllocatedStepIndexes; 218 219 // Has endDeclarations() been called? 220 bool mDeclared = false; 221 }; 222 223 // The location of a static temporary. 224 struct StaticTemporaryLocation { 225 // The offset relative to ExecutionPlan::Controller::mTemporaries during execution. 226 uint32_t offset; 227 uint32_t paddedLength; 228 }; 229 230 // An excerpt of a source model to be run by a specific device. 231 class ExecutionStep { 232 public: 233 typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType; 234 typedef std::set<std::pair<uint32_t, uint32_t>> StepModelOutputSetType; 235 236 enum OperandKind { INPUT, OUTPUT }; 237 238 ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, uint32_t sourceModelIndex, 239 std::shared_ptr<Device> device); 240 241 int addOperation(int operationIndex); 242 int addOperand(uint32_t sourceOperandIndex, uint32_t* stepOperandIndex, OperandKind kind); 243 244 // Each container entry is of the form (source model operand index, step model operand index) getStepModelInputs()245 const RemapVectorType& getStepModelInputs() const { return mStepModelInputs; } getStepModelOutputs()246 const RemapVectorType& getStepModelOutputs() const { return mStepModelOutputs; } getModelInputs()247 const RemapVectorType& getModelInputs() const { return mModelInputs; } getModelOutputs()248 const RemapVectorType& getModelOutputs() const { return mModelOutputs; } getTempsAsStepModelInputs()249 const RemapVectorType& getTempsAsStepModelInputs() const { return mTempsAsStepModelInputs; } getTempsAsStepModelOutputs()250 const StepModelOutputSetType& getTempsAsStepModelOutputs() const { 251 return mTempsAsStepModelOutputs; 252 } getOutputsAsStepModelInputs()253 const RemapVectorType& getOutputsAsStepModelInputs() const { return mOutputsAsStepModelInputs; } getInputIndexStepModelToMainModel()254 const std::vector<uint32_t>& getInputIndexStepModelToMainModel() const { 255 return mInputIndexStepModelToMainModel; 256 } getOutputIndexStepModelToMainModel()257 const std::vector<uint32_t>& getOutputIndexStepModelToMainModel() const { 258 return mOutputIndexStepModelToMainModel; 259 } getOutputsAsStepModelInputsIndexToMainModel()260 const std::vector<uint32_t>& getOutputsAsStepModelInputsIndexToMainModel() const { 261 return mOutputsAsStepModelInputsIndexToMainModel; 262 } 263 getModelOutputsThatAreDownstreamInputs()264 const std::set<uint32_t>& getModelOutputsThatAreDownstreamInputs() const { 265 return mModelOutputsThatAreDownstreamInputs; 266 } 267 getIndex()268 uint32_t getIndex() const { return mIndex; } getSourceModelIndex()269 uint32_t getSourceModelIndex() const { return mSourceModelIndex; } 270 271 void declareModelOutputIsDownstreamInput(uint32_t mainModelOutputIndex); 272 void recordTempAsStepModelOutput(uint32_t stepOperandIndex); 273 274 // If this step has a step model output of unknown size, sets 275 // *hasOutputOfUnknownSize to true; otherwise, leaves it 276 // unchanged. 277 int finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize, 278 int32_t executionPreference, int32_t priority); 279 getStepModel()280 const ModelBuilder* getStepModel() const { return &mStepModel; } getDevice()281 std::shared_ptr<Device> getDevice() const { return mDevice; } 282 283 // only available after calling finishStepModel() getPreparedStepModel()284 std::shared_ptr<RuntimePreparedModel> getPreparedStepModel() const { 285 return mPreparedStepModel; 286 } 287 288 // Map inputs and outputs from ExecutionBuilder to StepExecutor. 289 // 290 // This method only reads map entries for which the first element of 291 // SourceOperandIndex is mSourceModelIndex. 292 // 293 // mainModelOutputShapes may be nullptr if the only main model outputs that are 294 // inputs of this step are of fully specified shape. 295 void mapInputsAndOutputs( 296 std::shared_ptr<StepExecutor> stepExecutor, 297 const std::vector<OutputShape>* mainModelOutputShapes, 298 const RuntimeMemory* temporaryMemory, // for static temporaries 299 const std::map<SourceOperandIndex, StaticTemporaryLocation>& 300 sourceOperandToLocationOfTemporary, // for static temporaries 301 const DynamicTemporaries& dynamicTemporaries, 302 const std::map<SourceOperandIndex, uint32_t>& sourceOperandToInputIndex, 303 const std::map<SourceOperandIndex, uint32_t>& sourceOperandToOutputIndex, 304 const std::map<SourceOperandIndex, ConstantReferenceLocation>& 305 sourceOperandToConstantReference) const; 306 hasNoInputsOrNoOutputs()307 bool hasNoInputsOrNoOutputs() const { 308 return mStepModelInputs.empty() || mStepModelOutputs.empty(); 309 } 310 311 void dump() const; 312 313 // For test only, get the transformed cache token. forTest_getCacheToken()314 const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); } 315 316 private: 317 void logStepModel() const; 318 const ModelBuilder* getSourceModel() const; 319 320 // TODO: Some of the data is working state information that 321 // shouldn't be needed after we've constructed but not executed 322 // the step. 323 324 ExecutionPlan* mPlan; 325 uint32_t mIndex; // index of step within plan 326 uint32_t mSourceModelIndex; 327 ModelBuilder mStepModel; // An excerpt of a source model to be run by one device. 328 std::shared_ptr<Device> mDevice; 329 std::shared_ptr<RuntimePreparedModel> mPreparedStepModel; 330 331 // All inputs of this step model: 332 // (source model operand index, step model operand index) 333 // 334 // Depending on whether the source operand is an input or output of the main 335 // model, the memory should be mapped using 336 // ExecutionPlan::CompoundBody::mSourceOperandToInputIndex, 337 // ExecutionPlan::Controller::mSourceOperandToLocationOfTemporary, or 338 // ExecutionPlan::Controller::mDynamicTemporaries, or 339 // ExecutionPlan::CompoundBody::mSourceOperandToOutputIndex. 340 RemapVectorType mStepModelInputs; 341 // All outputs of this step model: 342 // (source model operand index, step model operand index) 343 // 344 // Depending on whether the source operand is an output of the main model, 345 // the memory should be mapped using 346 // ExecutionPlan::CompoundBody::mSourceOperandToOutputIndex, 347 // ExecutionPlan::Controller::mSourceOperandToLocationOfTemporary, or 348 // ExecutionPlan::Controller::mDynamicTemporaries. 349 // 350 // mOutputIndexStepModelToMainModel and declareModelOutputIsDownstreamInput() 351 // rely on mModelOutputs being a prefix of mStepModelOutputs. 352 RemapVectorType mStepModelOutputs; 353 // Inputs of main model that are also inputs of this step model: 354 // (main model operand index, step model operand index) 355 RemapVectorType mModelInputs; 356 // Outputs of main model that are also outputs of this step model: 357 // (main model operand index, step model operand index) 358 RemapVectorType mModelOutputs; 359 // Temporaries of source model that are inputs of this step model: 360 // (source model operand index, step model operand index) 361 RemapVectorType mTempsAsStepModelInputs; 362 // Temporaries of source model that are outputs of this step model: 363 // (source model operand index, step model operand index) 364 StepModelOutputSetType mTempsAsStepModelOutputs; 365 // Outputs of main model that are inputs of this step model: 366 // (main model operand index, step model operand index) 367 RemapVectorType mOutputsAsStepModelInputs; 368 // Converts operand indexes from the source model to the step model. 369 std::unordered_map<uint32_t, uint32_t> mOperandMap; 370 // Converts input indexes from the step model to the main model 371 // (these are input indexes, not operand indexes). This vector 372 // only describes inputs of the step model that are also inputs of 373 // the main model -- that is, mModelInputs but not mTempsAsStepModelInputs. 374 std::vector<uint32_t> mInputIndexStepModelToMainModel; 375 // Converts output indexes from the step model to the main model 376 // (these are output indexes, not operand indexes). This vector 377 // only describes outputs of the step model that are also outputs of 378 // the main model -- that is, mModelOutputs but not 379 // mTempsAsStepModelOutputs. 380 std::vector<uint32_t> mOutputIndexStepModelToMainModel; 381 // Converts indexes into mOutputsAsStepModelInputs to indexes into 382 // main model outputs (these are input and output indexes, not 383 // operand indexes). To be specific, if the main model outputs 384 // are mainModelOutputs, 385 // 386 // mOutputsAsStepModelInputsIndexToMainModel.size() == 387 // mOutputsAsStepModelInputs.size() 388 // 389 // and when (0 <= i < mOutputsAsStepModelInputs.size()), 390 // 391 // mainModelOutputs[mOutputsAsStepModelInputsIndexToMainModel[i]] == 392 // mOutputsAsStepModelInputs[i].first 393 std::vector<uint32_t> mOutputsAsStepModelInputsIndexToMainModel; 394 395 // Step model output indexes (not operand indexes) that are outputs of the 396 // main model used as inputs to some other partition. 397 std::set<uint32_t> mModelOutputsThatAreDownstreamInputs; 398 399 // The compilation caching token. 400 TokenHasher mToken; 401 }; 402 403 // An IF operation to be run on the ExecutionPlan::next() interpreter. The 404 // branch models might run on devices. See LogicalStep. 405 // 406 // Execution plan structure: 407 // Index Step 408 // i if then=(i + 1) else=(j + 1) 409 // ... (then model steps) 410 // j goto k 411 // ... (else model steps) 412 // k (steps after the IF) 413 struct IfStep { 414 // The index of this step. 415 size_t index = ~size_t(0); 416 // The index of the first step of the "then" branch. 417 size_t thenStepIndex = ~size_t(0); 418 // The index of the first step of the "else" branch. 419 size_t elseStepIndex = ~size_t(0); 420 // The boolean condition input of the IF operation. The value of this 421 // operand determines the branch of the IF operation to be executed. 422 SourceOperandIndex conditionOperandIndex = {~uint32_t(0), ~uint32_t(0)}; 423 // Input operands of the IF operation to be passed to a branch model. 424 std::vector<SourceOperandIndex> outerInputOperands; 425 // Output operands of the IF operation. 426 std::vector<SourceOperandIndex> outerOutputOperands; 427 // Input operands of the "then" branch model. 428 std::vector<SourceOperandIndex> thenBranchInputOperands; 429 // Output operands of the "then" branch model. 430 std::vector<SourceOperandIndex> thenBranchOutputOperands; 431 // Input operands of the "else" branch model. 432 std::vector<SourceOperandIndex> elseBranchInputOperands; 433 // Output operands of the "else" branch model. 434 std::vector<SourceOperandIndex> elseBranchOutputOperands; 435 }; 436 437 // A WHILE operation to be run on the ExecutionPlan::next() interpreter. The 438 // condition and body models might run other devices. See LogicalStep. 439 // 440 // Execution plan structure: 441 // Index Step 442 // i while cond=(i + 1) body=(j + 1) exit=(k + 1) 443 // ... (cond model steps) 444 // j goto i 445 // ... (body model steps) 446 // k goto i 447 // ... (steps after the WHILE) 448 // 449 // Note that WhileStep has WhileState associated with it. 450 struct WhileStep { 451 // The index of this step. 452 size_t index = ~size_t(0); 453 // The index of the first step of the condition model. 454 size_t condStepIndex = ~size_t(0); 455 // The index of the first step of the body model. 456 size_t bodyStepIndex = ~size_t(0); 457 // The index of the first step after the loop. 458 size_t exitStepIndex = ~size_t(0); 459 // Input operands of the WHILE operation to be passed to the condition and 460 // body models. 461 std::vector<SourceOperandIndex> outerInputOperands; 462 // Output operands of the WHILE operation. 463 std::vector<SourceOperandIndex> outerOutputOperands; 464 // Input operands of the condition model. 465 std::vector<SourceOperandIndex> condInputOperands; 466 // Output operand of the condition model. The value of this operand 467 // determines whether to continue execution or exit the loop. 468 SourceOperandIndex condOutputOperand = {~uint32_t(0), ~uint32_t(0)}; 469 // Input operands of the body model. 470 std::vector<SourceOperandIndex> bodyInputOperands; 471 // Output operands of the body model. 472 std::vector<SourceOperandIndex> bodyOutputOperands; 473 }; 474 475 // A helper step. See LogicalStep. 476 struct GotoStep { 477 // The index of this step. 478 size_t index = ~size_t(0); 479 // The index of the step to go to. 480 size_t gotoStepIndex = ~size_t(0); 481 }; 482 483 // One of ExecutionStep, IfStep, WhileStep, or GotoStep. 484 // 485 // When ExecutionPlan::next() is called, it interprets logical steps until it 486 // encounters an ExecutionStep ("interpreted execution"). 487 // - For an IfStep, it decides which branch to take and proceeds to the 488 // corresponding step. 489 // - For a WhileStep, it decides whether to execute the condition or body (based 490 // on WhileState), or exit the loop (based on the condition model output), and 491 // proceeds to the corresponding step. 492 // - For a GotoStep, it proceeds to the indicated step unconditionally. 493 class LogicalStep { 494 public: 495 template <typename... Args> LogicalStep(Args &&...args)496 explicit LogicalStep(Args&&... args) : mStep(std::forward<Args>(args)...) {} 497 isExecution()498 bool isExecution() const { return std::holds_alternative<ExecutionStep>(mStep); } isIf()499 bool isIf() const { return std::holds_alternative<IfStep>(mStep); } isWhile()500 bool isWhile() const { return std::holds_alternative<WhileStep>(mStep); } isGoto()501 bool isGoto() const { return std::holds_alternative<GotoStep>(mStep); } 502 503 // Returns a non-null pointer or crashes. executionStep()504 ExecutionStep* executionStep() { return &std::get<ExecutionStep>(mStep); } ifStep()505 IfStep* ifStep() { return &std::get<IfStep>(mStep); } whileStep()506 WhileStep* whileStep() { return &std::get<WhileStep>(mStep); } gotoStep()507 GotoStep* gotoStep() { return &std::get<GotoStep>(mStep); } 508 509 // Returns a non-null pointer or crashes. executionStep()510 const ExecutionStep* executionStep() const { return &std::get<ExecutionStep>(mStep); } ifStep()511 const IfStep* ifStep() const { return &std::get<IfStep>(mStep); } whileStep()512 const WhileStep* whileStep() const { return &std::get<WhileStep>(mStep); } gotoStep()513 const GotoStep* gotoStep() const { return &std::get<GotoStep>(mStep); } 514 515 // May return nullptr. tryExecutionStep()516 ExecutionStep* tryExecutionStep() { return std::get_if<ExecutionStep>(&mStep); } tryIfStep()517 IfStep* tryIfStep() { return std::get_if<IfStep>(&mStep); } tryWhileStep()518 WhileStep* tryWhileStep() { return std::get_if<WhileStep>(&mStep); } tryGotoStep()519 GotoStep* tryGotoStep() { return std::get_if<GotoStep>(&mStep); } 520 521 // May return nullptr. tryExecutionStep()522 const ExecutionStep* tryExecutionStep() const { return std::get_if<ExecutionStep>(&mStep); } tryIfStep()523 const IfStep* tryIfStep() const { return std::get_if<IfStep>(&mStep); } tryWhileStep()524 const WhileStep* tryWhileStep() const { return std::get_if<WhileStep>(&mStep); } tryGotoStep()525 const GotoStep* tryGotoStep() const { return std::get_if<GotoStep>(&mStep); } 526 527 void dump() const; 528 529 private: 530 std::variant<ExecutionStep, IfStep, WhileStep, GotoStep> mStep; 531 }; 532 533 std::ostream& operator<<(std::ostream& os, const IfStep& step); 534 std::ostream& operator<<(std::ostream& os, const WhileStep& step); 535 std::ostream& operator<<(std::ostream& os, const GotoStep& step); 536 537 // Describes the state of WhileStep. 538 struct WhileState { 539 // A pseudo iteration number indicating the loop is not being executed. 540 static constexpr uint64_t kOutsideLoop = ~uint64_t(0); 541 // Whether we need to evaluate the condition or body next. 542 enum Stage { EVALUATE_CONDITION, EVALUATE_BODY } stage = EVALUATE_CONDITION; 543 // Current iteration number. Must be set to kOutsideLoop when exiting the 544 // loop. 545 uint64_t iteration = kOutsideLoop; 546 // Time point when the loop started executing. 547 TimePoint startTime; 548 }; 549 550 struct ConstantCopyLocation { 551 const uint8_t* buffer; 552 uint32_t length; 553 }; 554 555 struct ConstantReferenceLocation { 556 const RuntimeMemory* memory; 557 uint32_t offset; 558 uint32_t length; 559 }; 560 561 // A tuple of {execution_step_index, io_type, io_index} specifying an input/output role of an 562 // ExecutionStep. 563 using StepRole = std::tuple<uint32_t, IOType, uint32_t>; 564 565 // A callback function that takes the prepared_model, io_type, and io_index of a step role. 566 using StepRoleCallback = std::function<void(const RuntimePreparedModel*, IOType, uint32_t)>; 567 568 class ExecutionPlan { 569 public: 570 ExecutionPlan(const ExecutionPlan&) = delete; 571 ExecutionPlan& operator=(const ExecutionPlan&) = delete; 572 ExecutionPlan()573 ExecutionPlan() {} ~ExecutionPlan()574 ~ExecutionPlan() { delete mBody; } 575 576 // Controller is part of the interface to a mechanism for performing a 577 // main execution in N steps. 578 // 579 // The value of N may not be known beforehand if the model contains WHILE 580 // loops. See LogicalStep. 581 // 582 // Usage pattern: 583 // - Instantiate Controller with ExecutionPlan::makeController(). 584 // - Call ExecutionPlan::next() on Controller N+1 times. The first N times, 585 // *executor is set to point to a new StepExecutor corresponding 586 // to that step. The N+1st time, *executor is set to nullptr, 587 // signifying there are no more steps. 588 // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR, 589 // a problem has occurred. 590 class Controller { 591 friend class ExecutionPlan; 592 593 private: 594 Controller(const Controller&) = delete; 595 Controller& operator=(const Controller&) = delete; 596 597 static const size_t kBadStepIndex = ~size_t(0); 598 599 // A constructor for mState == COMPOUND. 600 Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder, 601 const BurstBuilder* burstBuilder, 602 603 // static temporaries 604 uint32_t totalSizeOfTemporaries, 605 std::map<SourceOperandIndex, StaticTemporaryLocation> 606 sourceOperandToLocationOfTemporary, 607 std::map<SourceOperandIndex, StaticTemporaryLocation> 608 sourceOperandToLocationOfTemporary2, 609 610 std::map<SourceOperandIndex, uint32_t> sourceOperandToInputIndex, 611 std::map<SourceOperandIndex, uint32_t> sourceOperandToOutputIndex, 612 const std::map<SourceOperandIndex, ConstantCopyLocation>& 613 sourceOperandToConstantCopy, 614 std::map<SourceOperandIndex, ConstantReferenceLocation> 615 sourceOperandToConstantReference, 616 DynamicTemporaries dynamicTemporaries); 617 618 // Sets the location of innerOperand to be the same as the location of outerOperand. 619 void setInput(const SourceOperandIndex& outerOperand, 620 const SourceOperandIndex& innerOperand); 621 void setOutput(const SourceOperandIndex& outerOperand, 622 const SourceOperandIndex& innerOperand); 623 624 // Wait for mLastStepSyncFd to signal. 625 // No-op if mLastStepSyncFd is -1 which the mLastStepSyncFd is initialized to. 626 // mLastStepSyncFd will also be set to -1 when the most recently processed step 627 // does not generate a sync fence. 628 int waitForLastStepSyncFence() const; 629 630 [[maybe_unused]] const ExecutionPlan* mPlan; 631 ExecutionBuilder* mExecutionBuilder; 632 const BurstBuilder* mBurstBuilder; 633 // Map from source operand index to an offset into mTemporaries used 634 // to represent that operand as an inter-partition input or output. 635 // 636 // The four maps 637 // - mSourceOperandToLocationOfTemporary 638 // - mSourceOperandToInputIndex 639 // - mSourceOperandToOutputIndex 640 // - mSourceOperandToConstantReference 641 // are initialized from similarly named fields of ExecutionPlan::CompoundBody. 642 // 643 // A particular key appears in at most one map at any given time. This 644 // restriction does not apply to mSourceOperandToLocationOfTemporary2. 645 // 646 // The maps are modified during the execution of IfStep and WhileStep. 647 // See ExecutionPlan::nextCompound(). 648 std::map<SourceOperandIndex, StaticTemporaryLocation> mSourceOperandToLocationOfTemporary; 649 // Map from source operand index to an additional offset into 650 // mTemporaries used for double buffering of WHILE loop output operands. 651 std::map<SourceOperandIndex, StaticTemporaryLocation> mSourceOperandToLocationOfTemporary2; 652 // Map from source operand index to an input index of the main model. 653 std::map<SourceOperandIndex, uint32_t> mSourceOperandToInputIndex; 654 // Map from source operand index to an output index of the main model. 655 std::map<SourceOperandIndex, uint32_t> mSourceOperandToOutputIndex; 656 // Map from source operand index to a constant reference location. 657 // Used for WHILE loop operand initializers that are constant references. 658 std::map<SourceOperandIndex, ConstantReferenceLocation> mSourceOperandToConstantReference; 659 660 // static temporaries 661 std::unique_ptr<MemoryAshmem> mTemporaries; 662 663 DynamicTemporaries mDynamicTemporaries; 664 665 // Index of the next step to be processed by ExecutionPlan::next(). 666 size_t mNextStepIndex; 667 // The value to reset mNextStepIndex to for partial CPU fallback. 668 size_t mFallbackNextStepIndex; 669 // Map from WhileStep index to the associated WhileState. 670 std::unordered_map<size_t, WhileState> mWhileState; 671 // The sync fence fd of the last step. 672 int mLastStepSyncFd; 673 }; 674 675 std::vector<SharedBurst> makeBursts() const; 676 677 // Only legal to call when mState == COMPOUND. 678 std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder, 679 const BurstBuilder* burstBuilder) const; 680 681 // Sets up a new StepExecutor and burstController (if applicable) if there 682 // is a step to execute. See ExecutionPlan::Controller. 683 // Handles control flow. See LogicalStep. 684 // burstController is nullptr if we are not to do burst execution. 685 // mainModelOutputShapes may be nullptr if the only main model outputs that are step model 686 // inputs are of fully specified shape. 687 // syncFdOfLastStep is the sync fence fd generated by the most recently processed step. 688 // Only legal to call when mState == COMPOUND. 689 int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor, 690 SharedBurst* burstController, const std::vector<OutputShape>* mainModelOutputShapes, 691 int syncFdOfLastStep = -1) const; 692 693 // Create the same executor as the last one created by next(). 694 int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor, 695 SharedBurst* burstController, 696 const std::vector<OutputShape>* mainModelOutputShapes) const; 697 698 // Only legal to call when mState == SIMPLE. 699 // See the constructor of StepExecutor for the semantics of "reusable". 700 std::shared_ptr<StepExecutor> makeStepExecutor(bool reusable, 701 ExecutionBuilder* executionBuilder) const; 702 703 ExecutionStep* createNewExecutionStep(uint32_t sourceModelIndex, 704 const std::shared_ptr<Device> device); 705 IfStep* createNewIfStep(); 706 WhileStep* createNewWhileStep(); 707 GotoStep* createNewGotoStep(); 708 709 // Only legal to call when mState == COMPOUND. getNextStepIndex()710 size_t getNextStepIndex() const { return compound()->mSteps.size(); } 711 712 void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model); 713 714 // simulateFailureResultCode == ANEURALNETWORKS_NO_ERROR means behave normally. 715 int finish(int32_t executionPreference, int32_t priority, const OptionalTimePoint& deadline, 716 const std::vector<TokenValuePair>& metaData, int simulateFailureResultCode); 717 718 void recordOutputDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex); 719 void recordTemporaryDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex); 720 721 void dump() const; 722 723 void reset(); 724 isValid()725 bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; } isSimple()726 bool isSimple() const { return mState == SIMPLE; } isCompound()727 bool isCompound() const { return mState == COMPOUND; } 728 bool isSimpleCpu() const; 729 setCaching(const CacheInfo * cacheInfo,const uint8_t * token)730 void setCaching(const CacheInfo* cacheInfo, const uint8_t* token) { 731 mCacheInfo = cacheInfo; 732 mToken = token; 733 } getCacheInfo()734 const CacheInfo* getCacheInfo() const { return mCacheInfo; } getCacheToken()735 const uint8_t* getCacheToken() const { return mToken; } 736 737 // The caller is responsible for making sure the index is within range. forEachStepRoleOfInput(uint32_t index,const StepRoleCallback & callback)738 void forEachStepRoleOfInput(uint32_t index, const StepRoleCallback& callback) const { 739 CHECK(mBody != nullptr); 740 mBody->forEachStepRoleOfInput(index, callback); 741 } forEachStepRoleOfOutput(uint32_t index,const StepRoleCallback & callback)742 void forEachStepRoleOfOutput(uint32_t index, const StepRoleCallback& callback) const { 743 CHECK(mBody != nullptr); 744 mBody->forEachStepRoleOfOutput(index, callback); 745 } 746 747 // "type" specifies input or output, and "index" is the main model input or output index. 748 // The caller is responsible for making sure the index is within range. 749 MemoryPreference getMemoryPreference(IOType type, uint32_t index) const; 750 getSourceModels()751 SourceModels& getSourceModels() { return mSourceModels; } getSourceModels()752 const SourceModels& getSourceModels() const { return mSourceModels; } 753 754 // "index" is the main model input or output index. 755 // The caller is responsible for making sure the index is within range. 756 SourceOperandIndex getInputSourceOperand(uint32_t index) const; 757 SourceOperandIndex getOutputSourceOperand(uint32_t index) const; 758 759 bool hasDynamicTemporaries() const; 760 761 // These functions are solely intended for use by unit tests of 762 // the partitioning algorithm. 763 enum class Kind { 764 ERROR, 765 EMPTY, 766 SIMPLE, 767 COMPOUND 768 }; // See operator<< defined outside this class 769 Kind forTest_getKind() const; 770 std::shared_ptr<const Device> forTest_simpleGetDevice() const; 771 const std::vector<std::shared_ptr<LogicalStep>>& forTest_compoundGetSteps() const; forTest_compoundForEachStepRoleOfSourceOperand(SourceOperandIndex index,const StepRoleCallback & callback)772 void forTest_compoundForEachStepRoleOfSourceOperand(SourceOperandIndex index, 773 const StepRoleCallback& callback) const { 774 compound()->forEachStepRoleOfSourceOperand(index, callback); 775 } 776 // The "flat" in the name signifies that this method requires that the 777 // model not contain any control flow operations. 778 std::set<uint32_t> forTest_flatGetDynamicTemporaries() const; 779 const uint8_t* forTest_simpleGetCacheToken() const; 780 bool forTest_hasStepModelWithNoInputsOrNoOutputs() const; 781 782 private: 783 // Becomes a new COMPOUND step if mState == EMPTY, otherwise does nothing. 784 // Illegal to call for when mState == SIMPLE. 785 void becomeCompoundIfEmpty(); 786 getSourceOperand(const std::pair<uint32_t,uint32_t> & sourceOperandIndex)787 const Operand& getSourceOperand(const std::pair<uint32_t, uint32_t>& sourceOperandIndex) const { 788 return getSourceModels() 789 .getModel(sourceOperandIndex.first) 790 ->getOperand(sourceOperandIndex.second); 791 } 792 793 class Buffer { 794 public: 795 Buffer(void* pointer, uint32_t size); 796 Buffer(RunTimePoolInfo info, uint32_t offset); 797 void* getPointer() const; 798 uint32_t getSize() const; 799 void flush() const; 800 801 private: 802 RunTimePoolInfo mInfo; 803 uint32_t mOffset; 804 }; 805 806 // Returns the buffer associated with a partition boundary operand. 807 std::optional<Buffer> getBuffer(std::shared_ptr<Controller> controller, 808 SourceOperandIndex operandIndex) const; 809 std::optional<Buffer> getBufferFromModelArgumentInfo( 810 const ModelArgumentInfo& info, const ExecutionBuilder* executionBuilder) const; 811 // Reads the value of a partition boundary boolean condition operand. 812 int readConditionValue(std::shared_ptr<Controller> controller, SourceOperandIndex operandIndex, 813 bool* value) const; 814 815 // Handles control flow. See LogicalStep. 816 int nextCompound(std::shared_ptr<Controller> controller, 817 std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, 818 const std::vector<OutputShape>* mainModelOutputShapes) const; 819 int nextCompound(const ExecutionStep* step, std::shared_ptr<Controller> controller, 820 std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, 821 const std::vector<OutputShape>* mainModelOutputShapes) const; 822 int nextCompound(const IfStep* step, std::shared_ptr<Controller> controller, 823 std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, 824 const std::vector<OutputShape>* mainModelOutputShapes) const; 825 int nextCompound(const WhileStep* step, std::shared_ptr<Controller> controller, 826 std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, 827 const std::vector<OutputShape>* mainModelOutputShapes) const; 828 int nextCompound(const GotoStep* step, std::shared_ptr<Controller> controller, 829 std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController, 830 const std::vector<OutputShape>* mainModelOutputShapes) const; 831 832 struct Body { ~BodyBody833 virtual ~Body() {} 834 virtual void dump() const = 0; 835 virtual int finish(const SourceModels* sourceModels, int32_t executionPreference, 836 int32_t priority, const OptionalTimePoint& deadline, 837 const std::vector<TokenValuePair>& metadata, 838 int simulateFailureResultCode) = 0; 839 virtual bool hasDynamicTemporaries() const = 0; 840 virtual bool hasStepModelWithNoInputsOrNoOutputs() const = 0; 841 virtual void forEachStepRoleOfInput(uint32_t index, 842 const StepRoleCallback& callback) const = 0; 843 virtual void forEachStepRoleOfOutput(uint32_t index, 844 const StepRoleCallback& callback) const = 0; 845 bool mSuccessfulFinish = false; 846 }; 847 848 struct SimpleBody : Body { SimpleBodySimpleBody849 SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model, 850 const CacheInfo* cacheInfo, const uint8_t* token) 851 : mDevice(device), mModel(model), mCacheInfo(cacheInfo), mToken(token) {} 852 853 void dump() const override; 854 int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority, 855 const OptionalTimePoint& deadline, const std::vector<TokenValuePair>& metadata, 856 int simulateFailureResultCode) override; hasDynamicTemporariesSimpleBody857 bool hasDynamicTemporaries() const override { return false; } hasStepModelWithNoInputsOrNoOutputsSimpleBody858 bool hasStepModelWithNoInputsOrNoOutputs() const override { return false; } 859 void forEachStepRoleOfInput(uint32_t index, 860 const StepRoleCallback& callback) const override; 861 void forEachStepRoleOfOutput(uint32_t index, 862 const StepRoleCallback& callback) const override; 863 864 std::shared_ptr<Device> mDevice; 865 const ModelBuilder* mModel; 866 std::shared_ptr<RuntimePreparedModel> mPreparedModel; 867 868 const CacheInfo* mCacheInfo; 869 TokenHasher mToken; 870 }; 871 872 struct CompoundBody : Body { CompoundBodyCompoundBody873 CompoundBody(const ExecutionPlan* plan) : mPlan(plan) { CHECK(plan != nullptr); } 874 875 void dump() const override; 876 int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority, 877 const OptionalTimePoint& deadline, const std::vector<TokenValuePair>& metadata, 878 int simulateFailureResultCode) override; hasDynamicTemporariesCompoundBody879 bool hasDynamicTemporaries() const override { return mHasDynamicTemporaries; } 880 bool hasStepModelWithNoInputsOrNoOutputs() const override; 881 void forEachStepRoleOfInput(uint32_t index, 882 const StepRoleCallback& callback) const override; 883 void forEachStepRoleOfOutput(uint32_t index, 884 const StepRoleCallback& callback) const override; 885 // Supported for any legal source operand index. For a source operand that doesn't have a 886 // step role, the callback will not be invoked at all. 887 void forEachStepRoleOfSourceOperand(const SourceOperandIndex& index, 888 const StepRoleCallback& callback) const; 889 // Supported for any legal source operand index. 890 MemoryPreference getMemoryPreferenceOfSourceOperand(const SourceOperandIndex& index) const; 891 892 // TODO: Some of the data is working state information that 893 // shouldn't be needed after we've constructed but not 894 // executed the plan. 895 896 std::vector<std::shared_ptr<LogicalStep>> mSteps; 897 898 // Map from source operand index to defining ExecutionStep index. 899 // Used for all (and only) SUBGRAPH_OUTPUTs that are defined by 900 // ExecutionSteps. Those defined by IfSteps and WhileSteps are not in 901 // the map. 902 std::map<SourceOperandIndex, uint32_t> mOutputToDefiningExecutionStep; 903 904 // Map from source operand index to defining ExecutionStep index. 905 // Used for all (and only) TEMPORARY_VARIABLEs that are defined by 906 // ExecutionSteps. Those defined by IfSteps and WhileSteps are not in 907 // the map. 908 std::map<SourceOperandIndex, uint32_t> mTemporaryToDefiningExecutionStep; 909 910 // Map from source operand index to input index of the main model. 911 // This map only contains SUBGRAPH_INPUTs of the main model and is used 912 // to initialize ExecutionPlan::Controller::mSourceOperandToInputIndex; 913 std::map<SourceOperandIndex, uint32_t> mSourceOperandToInputIndex; 914 915 // Map from source operand index to output index of the main model. 916 // This map only contains SUBGRAPH_OUTPUTs of the main model and is used 917 // to initialize ExecutionPlan::Controller::mSourceOperandToOutputIndex; 918 std::map<SourceOperandIndex, uint32_t> mSourceOperandToOutputIndex; 919 920 // Map from source operand index to location of a CONSTANT_COPY or 921 // POINTER operand. 922 // This map only contains constant partition boundary IF and WHILE 923 // operands and is used to create a ExecutionPlan::Controller. 924 std::map<SourceOperandIndex, ConstantCopyLocation> mSourceOperandToBoundaryConstantCopy; 925 926 // Map from source operand index to location of a CONSTANT_REFERENCE 927 // operand. This map only contains constant partition boundary IF and 928 // WHILE operands and is used to initialize 929 // ExecutionPlan::Controller::mSourceOperandToConstantReference. 930 std::map<SourceOperandIndex, ConstantReferenceLocation> 931 mSourceOperandToBoundaryConstantReference; 932 933 // Map from source operand index of a boundary operand to the step roles that its memory 934 // may be used for. 935 // This map only contains partition boundary operands that have ExecutionStep roles, that 936 // is, SUBGRAPH_INPUTs, SUBGRAPH_OUTPUTs, and partition boundary static and dynamic 937 // temporaries. If a partition boundary operand is not found in the map, then the operand 938 // does not have any ExecutionStep role (this may happen with interpreted control flow). 939 std::map<SourceOperandIndex, std::set<StepRole>> mSourceOperandToStepRoles; 940 941 bool mHasDynamicTemporaries = false; 942 943 private: 944 void findTempsAsStepModelOutputs(); 945 946 void findModelOutputsThatAreDownstreamInputs(); 947 948 // Constant values that are inputs to IF and WHILE operations and lie on 949 // a partition boundary ("control flow boundary constants") require 950 // special treatment. We need to be able to dynamically associate those 951 // values with the corresponding SUBGRAPH_INPUT operands in a referenced 952 // model. 953 // 954 // For CONSTANT_COPY and POINTER boundary operands, we copy those to 955 // temporary memory and treat them similarly to TEMPORARY_VARIABLE 956 // operands in Controller. 957 // 958 // For CONSTANT_REFERENCE boundary operands, we keep track of them in 959 // ExecutionPlan::Controller::mSourceOperandToConstantReference. 960 // 961 // Note that for IF inputs and input-only WHILE inputs that are boundary 962 // constants, we could embed those inside the referenced model, but we 963 // currently don't do so. See b/148216514. 964 void findControlFlowBoundaryConstants(const SourceModels* sourceModels); 965 966 // This method will set mSourceOperandToStepRoles. 967 void findMemoryStepRoles(); 968 969 const ExecutionPlan* mPlan; 970 }; 971 972 enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY; 973 Body* mBody = nullptr; simple()974 SimpleBody* simple() { 975 CHECK(mState == SIMPLE); 976 CHECK(mBody != nullptr); 977 return static_cast<SimpleBody*>(mBody); 978 } simple()979 const SimpleBody* simple() const { 980 CHECK(mState == SIMPLE); 981 CHECK(mBody != nullptr); 982 return static_cast<const SimpleBody*>(mBody); 983 } compound()984 CompoundBody* compound() { 985 CHECK(mState == COMPOUND); 986 CHECK(mBody != nullptr); 987 return static_cast<CompoundBody*>(mBody); 988 } compound()989 const CompoundBody* compound() const { 990 CHECK(mState == COMPOUND); 991 CHECK(mBody != nullptr); 992 return static_cast<const CompoundBody*>(mBody); 993 } 994 995 void forEachDynamicTemporary(const std::function<void(SourceOperandIndex, const Operand&, 996 uint32_t definingStepIndex)>&) const; 997 998 // Pointers to compilation caching information in CompilationBuilder. 999 const CacheInfo* mCacheInfo = nullptr; 1000 const uint8_t* mToken = nullptr; 1001 1002 SourceModels mSourceModels; 1003 }; 1004 1005 inline std::ostream& operator<<(std::ostream& out, ExecutionPlan::Kind kind) { 1006 const int intKind = static_cast<int>(kind); 1007 if (kind < ExecutionPlan::Kind::ERROR || kind > ExecutionPlan::Kind::COMPOUND) { 1008 return out << "<UNK(" << intKind << ")>"; 1009 } 1010 static const char* name[] = {"ERROR", "EMPTY", "SIMPLE", "COMPOUND"}; 1011 return out << name[intKind]; 1012 } 1013 1014 } // namespace nn 1015 } // namespace android 1016 1017 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H 1018