1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Classes used to plan how to execute a model across multiple devices.
18 
19 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H
20 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H
21 
22 #include <LegacyUtils.h>
23 #include <TokenHasher.h>
24 #include <android-base/logging.h>
25 #include <nnapi/IBurst.h>
26 #include <nnapi/Types.h>
27 
28 #include <algorithm>
29 #include <chrono>
30 #include <functional>
31 #include <map>
32 #include <memory>
33 #include <ostream>
34 #include <set>
35 #include <string>
36 #include <tuple>
37 #include <unordered_map>
38 #include <utility>
39 #include <variant>
40 #include <vector>
41 
42 #include "Memory.h"
43 #include "ModelArgumentInfo.h"
44 #include "ModelBuilder.h"
45 #include "NeuralNetworks.h"
46 
47 namespace android {
48 namespace nn {
49 
50 class BurstBuilder;
51 class CompilationBuilder;
52 class Device;
53 class ExecutionBuilder;
54 class ExecutionPlan;
55 class RuntimeMemory;
56 class RuntimePreparedModel;
57 class StepExecutor;
58 
59 struct ConstantReferenceLocation;
60 struct CacheInfo;
61 
62 // NNAPI Control Flow allows referring to an NNAPI model inside another NNAPI
63 // model using OperandType::SUBGRAPH. For example, an IF operation within a
64 // model mey refer to two other models corresponding to then and else branches.
65 //
66 // The partitioning process transforms this nested representation into a list
67 // of LogicalSteps.
68 //
69 // The following terms are used:
70 // - The main model is the top-level model being compiled (not referenced by any
71 //   OperandType::SUBGRAPH operand within the compilation).
72 // - A referenced model is a non-top-level model being compiled (referenced by
73 //   at least one OperandType::SUBGRAPH operand within the set of models being
74 //   compiled).
75 // - A source model is either the main model or a referenced model.
76 // - A step model is a model excerpted from a source model during the
77 //   partitioning process.
78 // - A partition is a LogicalStep representing at least one operation of a
79 //   source model. In particular, ExecutionStep represents a step model, IfStep
80 //   represents an IF operation, WhileStep represents a WHILE operation.
81 //   A GotoStep is not a partition.
82 // - A partition boundary operand is a source model operand that is an input or
83 //   output of a partition. For ExecutionStep, the inputs and outputs of the
84 //   step model are boundary operands; for IfStep and WhileStep, the inputs and
85 //   outputs of the corresponding operation are boundary operands.
86 // - A partition boundary static temporary is a partition boundary
87 //   operand which is of lifetime TEMPORARY_VARIABLE in the source model and
88 //   whose dimensions are fully specified.
89 // - A partition boundary dynamic temporary is a partition boundary
90 //   operand which is of lifetime TEMPORARY_VARIABLE in the source model and
91 //   whose dimensions are not fully specified.
92 // - A main execution is the execution of a main model.
93 //
94 // Referenced models can be sources of partition boundary operands. For example,
95 // this happens when a referenced model is partitioned into one or more
96 // LogicalSteps.
97 //
98 // (model index, operand index within model)
99 typedef std::pair<uint32_t, uint32_t> SourceOperandIndex;
100 
101 // A collection of source models.
102 class SourceModels {
103    public:
addModel(const ModelBuilder * model)104     uint32_t addModel(const ModelBuilder* model) {
105         uint32_t modelIndex = mModels.size();
106         mModels.push_back(model);
107         return modelIndex;
108     }
109 
getModel(uint32_t index)110     const ModelBuilder* getModel(uint32_t index) const { return mModels[index]; }
111 
size()112     uint32_t size() const { return mModels.size(); }
113 
114    private:
115     std::vector<const ModelBuilder*> mModels;
116 };
117 
118 // Represents all partition boundary dynamic temporaries for a particular main
119 // execution.
120 //
121 // Usage pattern:
122 // - declare() every partition boundary dynamic temporary.
123 // - endDeclarations().  After this point, lookup() is permitted.
124 // - Before executing an ExecutionStep, call allocate().
125 // - After executing an ExecutionStep, call redeclare() for every partition
126 //   boundary dynamic temporary for which we've learned or guessed more about
127 //   the dimensions or length.
128 //
129 // Each partition boundary temporary has a location assigned by allocate() for
130 // its defining step (see declare() and allocate()).  That location remains
131 // valid until redeclare() increases the length of some temporary in its defining
132 // step or allocate() is called again for its defining step.
133 class DynamicTemporaries {
134     DISALLOW_COPY_AND_ASSIGN(DynamicTemporaries);
135 
136    public:
137     DynamicTemporaries() = default;
138     DynamicTemporaries(DynamicTemporaries&&) = default;
139     DynamicTemporaries& operator=(DynamicTemporaries&&) = default;
140 
141     // Declare a dynamic temporary.  stepIndex is the step that defines the
142     // temporary (i.e., in which the temporary appears as an operation output
143     // operand).  initialDimensions and initialLength indicate what we know or
144     // (in the case of length) guess about those properties.
145     void declare(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex,
146                  const Dimensions& initialDimensions, uint32_t initialLength, uint32_t alignment,
147                  uint32_t padding);
148 
149     // Indicate that we've finished declaring all dynamic temporaries.
endDeclarations()150     void endDeclarations() {
151         CHECK(!mDeclared);
152         mDeclared = true;
153     }
154 
155     // Redeclare a dynamic temporary, indicating what we've learned about it.
156     // This may invalidate the location of temporaries defined by its step.
157     // Returns true if dimensions or length changed, false otherwise.
158     bool redeclare(SourceOperandIndex sourceOperandIndex, const Dimensions& newDimensions,
159                    uint32_t newLength);
160 
161     // Ensure that all dynamic temporaries defined by the specified step have
162     // locations.  The return value is a ResultCode (e.g.,
163     // ANEURALNETWORKS_NO_ERROR).
164     //
165     // Even if dynamic temporaries have already been allocated for this step,
166     // this call may reallocate them.  A reallocation is not guaranteed to
167     // preserve location (LocationAndShape.memory, LocationAndShape.offset) or
168     // contents of temporaries.
169     int allocate(uint32_t stepIndex);
170 
171     // Do the dynamic temporaries defined by this step have valid allocations?
172     // (Will be true if there are no dynamic temporaries defined by this step.)
173     bool allocated(uint32_t stepIndex) const;
174 
175     // Dump information to VLOG(EXECUTION).
176     void vlogDump(const char* context = nullptr) const;
177 
178     // If the specified operand is a dynamic temporary, return location and
179     // shape information; otherwise, return std::nullopt.
180     //
181     // If temporary exists but does not have a valid allocation, then:
182     //  - If mustBeAllocated == true, then trigger a failed CHECK().
183     //  - If mustBeAllocated == false, then memory == nullptr and offset == ~0.
184     struct LocationAndShape {
185         const RuntimeMemory* memory;
186         uint32_t offset;
187         const Dimensions* dimensions;
188         uint32_t paddedLength;
189     };
190     std::optional<LocationAndShape> lookup(SourceOperandIndex sourceOperandIndex,
191                                            bool mustBeAllocated = true) const;
192 
193     // Have any dynamic temporaries been declared?
empty()194     bool empty() const { return mSourceOperandToTemporary.empty(); }
195 
196    private:
197     // The same as LocationAndShape, except that:
198     // - the base of the location is represented not by memory but by defining stepIndex
199     // - it additionally contains information about the preferred alignment and padding
200     struct InternalLocationAndShape {
201         uint32_t stepIndex;
202         uint32_t offset;
203         Dimensions dimensions;
204         uint32_t paddedLength;
205         uint32_t alignment;
206         uint32_t padding;
207     };
208     std::map<SourceOperandIndex, InternalLocationAndShape> mSourceOperandToTemporary;
209 
210     // Every dynamic temporary defined at a given stepIndex.
211     std::map<uint32_t, std::vector<SourceOperandIndex>> mStepIndexToSourceOperandIndexes;
212 
213     std::map<uint32_t, std::unique_ptr<MemoryAshmem>> mStepIndexToMemory;
214 
215     // For a given defining stepIndex, we consider either all its dynamic
216     // temporaries to be allocated (have valid locations) or none of them to be.
217     std::set<uint32_t> mAllocatedStepIndexes;
218 
219     // Has endDeclarations() been called?
220     bool mDeclared = false;
221 };
222 
223 // The location of a static temporary.
224 struct StaticTemporaryLocation {
225     // The offset relative to ExecutionPlan::Controller::mTemporaries during execution.
226     uint32_t offset;
227     uint32_t paddedLength;
228 };
229 
230 // An excerpt of a source model to be run by a specific device.
231 class ExecutionStep {
232    public:
233     typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
234     typedef std::set<std::pair<uint32_t, uint32_t>> StepModelOutputSetType;
235 
236     enum OperandKind { INPUT, OUTPUT };
237 
238     ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, uint32_t sourceModelIndex,
239                   std::shared_ptr<Device> device);
240 
241     int addOperation(int operationIndex);
242     int addOperand(uint32_t sourceOperandIndex, uint32_t* stepOperandIndex, OperandKind kind);
243 
244     // Each container entry is of the form (source model operand index, step model operand index)
getStepModelInputs()245     const RemapVectorType& getStepModelInputs() const { return mStepModelInputs; }
getStepModelOutputs()246     const RemapVectorType& getStepModelOutputs() const { return mStepModelOutputs; }
getModelInputs()247     const RemapVectorType& getModelInputs() const { return mModelInputs; }
getModelOutputs()248     const RemapVectorType& getModelOutputs() const { return mModelOutputs; }
getTempsAsStepModelInputs()249     const RemapVectorType& getTempsAsStepModelInputs() const { return mTempsAsStepModelInputs; }
getTempsAsStepModelOutputs()250     const StepModelOutputSetType& getTempsAsStepModelOutputs() const {
251         return mTempsAsStepModelOutputs;
252     }
getOutputsAsStepModelInputs()253     const RemapVectorType& getOutputsAsStepModelInputs() const { return mOutputsAsStepModelInputs; }
getInputIndexStepModelToMainModel()254     const std::vector<uint32_t>& getInputIndexStepModelToMainModel() const {
255         return mInputIndexStepModelToMainModel;
256     }
getOutputIndexStepModelToMainModel()257     const std::vector<uint32_t>& getOutputIndexStepModelToMainModel() const {
258         return mOutputIndexStepModelToMainModel;
259     }
getOutputsAsStepModelInputsIndexToMainModel()260     const std::vector<uint32_t>& getOutputsAsStepModelInputsIndexToMainModel() const {
261         return mOutputsAsStepModelInputsIndexToMainModel;
262     }
263 
getModelOutputsThatAreDownstreamInputs()264     const std::set<uint32_t>& getModelOutputsThatAreDownstreamInputs() const {
265         return mModelOutputsThatAreDownstreamInputs;
266     }
267 
getIndex()268     uint32_t getIndex() const { return mIndex; }
getSourceModelIndex()269     uint32_t getSourceModelIndex() const { return mSourceModelIndex; }
270 
271     void declareModelOutputIsDownstreamInput(uint32_t mainModelOutputIndex);
272     void recordTempAsStepModelOutput(uint32_t stepOperandIndex);
273 
274     // If this step has a step model output of unknown size, sets
275     // *hasOutputOfUnknownSize to true; otherwise, leaves it
276     // unchanged.
277     int finishStepModel(const ModelBuilder* mainModel, bool* hasOutputOfUnknownSize,
278                         int32_t executionPreference, int32_t priority);
279 
getStepModel()280     const ModelBuilder* getStepModel() const { return &mStepModel; }
getDevice()281     std::shared_ptr<Device> getDevice() const { return mDevice; }
282 
283     // only available after calling finishStepModel()
getPreparedStepModel()284     std::shared_ptr<RuntimePreparedModel> getPreparedStepModel() const {
285         return mPreparedStepModel;
286     }
287 
288     // Map inputs and outputs from ExecutionBuilder to StepExecutor.
289     //
290     // This method only reads map entries for which the first element of
291     // SourceOperandIndex is mSourceModelIndex.
292     //
293     // mainModelOutputShapes may be nullptr if the only main model outputs that are
294     //     inputs of this step are of fully specified shape.
295     void mapInputsAndOutputs(
296             std::shared_ptr<StepExecutor> stepExecutor,
297             const std::vector<OutputShape>* mainModelOutputShapes,
298             const RuntimeMemory* temporaryMemory,  // for static temporaries
299             const std::map<SourceOperandIndex, StaticTemporaryLocation>&
300                     sourceOperandToLocationOfTemporary,  // for static temporaries
301             const DynamicTemporaries& dynamicTemporaries,
302             const std::map<SourceOperandIndex, uint32_t>& sourceOperandToInputIndex,
303             const std::map<SourceOperandIndex, uint32_t>& sourceOperandToOutputIndex,
304             const std::map<SourceOperandIndex, ConstantReferenceLocation>&
305                     sourceOperandToConstantReference) const;
306 
hasNoInputsOrNoOutputs()307     bool hasNoInputsOrNoOutputs() const {
308         return mStepModelInputs.empty() || mStepModelOutputs.empty();
309     }
310 
311     void dump() const;
312 
313     // For test only, get the transformed cache token.
forTest_getCacheToken()314     const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); }
315 
316    private:
317     void logStepModel() const;
318     const ModelBuilder* getSourceModel() const;
319 
320     // TODO: Some of the data is working state information that
321     // shouldn't be needed after we've constructed but not executed
322     // the step.
323 
324     ExecutionPlan* mPlan;
325     uint32_t mIndex;  // index of step within plan
326     uint32_t mSourceModelIndex;
327     ModelBuilder mStepModel;  // An excerpt of a source model to be run by one device.
328     std::shared_ptr<Device> mDevice;
329     std::shared_ptr<RuntimePreparedModel> mPreparedStepModel;
330 
331     // All inputs of this step model:
332     //     (source model operand index, step model operand index)
333     //
334     // Depending on whether the source operand is an input or output of the main
335     // model, the memory should be mapped using
336     // ExecutionPlan::CompoundBody::mSourceOperandToInputIndex,
337     // ExecutionPlan::Controller::mSourceOperandToLocationOfTemporary, or
338     // ExecutionPlan::Controller::mDynamicTemporaries, or
339     // ExecutionPlan::CompoundBody::mSourceOperandToOutputIndex.
340     RemapVectorType mStepModelInputs;
341     // All outputs of this step model:
342     //     (source model operand index, step model operand index)
343     //
344     // Depending on whether the source operand is an output of the main model,
345     // the memory should be mapped using
346     // ExecutionPlan::CompoundBody::mSourceOperandToOutputIndex,
347     // ExecutionPlan::Controller::mSourceOperandToLocationOfTemporary, or
348     // ExecutionPlan::Controller::mDynamicTemporaries.
349     //
350     // mOutputIndexStepModelToMainModel and declareModelOutputIsDownstreamInput()
351     // rely on mModelOutputs being a prefix of mStepModelOutputs.
352     RemapVectorType mStepModelOutputs;
353     // Inputs of main model that are also inputs of this step model:
354     //     (main model operand index, step model operand index)
355     RemapVectorType mModelInputs;
356     // Outputs of main model that are also outputs of this step model:
357     //     (main model operand index, step model operand index)
358     RemapVectorType mModelOutputs;
359     // Temporaries of source model that are inputs of this step model:
360     //     (source model operand index, step model operand index)
361     RemapVectorType mTempsAsStepModelInputs;
362     // Temporaries of source model that are outputs of this step model:
363     //     (source model operand index, step model operand index)
364     StepModelOutputSetType mTempsAsStepModelOutputs;
365     // Outputs of main model that are inputs of this step model:
366     //     (main model operand index, step model operand index)
367     RemapVectorType mOutputsAsStepModelInputs;
368     // Converts operand indexes from the source model to the step model.
369     std::unordered_map<uint32_t, uint32_t> mOperandMap;
370     // Converts input indexes from the step model to the main model
371     // (these are input indexes, not operand indexes).  This vector
372     // only describes inputs of the step model that are also inputs of
373     // the main model -- that is, mModelInputs but not mTempsAsStepModelInputs.
374     std::vector<uint32_t> mInputIndexStepModelToMainModel;
375     // Converts output indexes from the step model to the main model
376     // (these are output indexes, not operand indexes).  This vector
377     // only describes outputs of the step model that are also outputs of
378     // the main model -- that is, mModelOutputs but not
379     // mTempsAsStepModelOutputs.
380     std::vector<uint32_t> mOutputIndexStepModelToMainModel;
381     // Converts indexes into mOutputsAsStepModelInputs to indexes into
382     // main model outputs (these are input and output indexes, not
383     // operand indexes).  To be specific, if the main model outputs
384     // are mainModelOutputs,
385     //
386     //     mOutputsAsStepModelInputsIndexToMainModel.size() ==
387     //     mOutputsAsStepModelInputs.size()
388     //
389     // and when (0 <= i < mOutputsAsStepModelInputs.size()),
390     //
391     //     mainModelOutputs[mOutputsAsStepModelInputsIndexToMainModel[i]] ==
392     //     mOutputsAsStepModelInputs[i].first
393     std::vector<uint32_t> mOutputsAsStepModelInputsIndexToMainModel;
394 
395     // Step model output indexes (not operand indexes) that are outputs of the
396     // main model used as inputs to some other partition.
397     std::set<uint32_t> mModelOutputsThatAreDownstreamInputs;
398 
399     // The compilation caching token.
400     TokenHasher mToken;
401 };
402 
403 // An IF operation to be run on the ExecutionPlan::next() interpreter. The
404 // branch models might run on devices. See LogicalStep.
405 //
406 // Execution plan structure:
407 // Index  Step
408 //   i    if then=(i + 1) else=(j + 1)
409 //  ...   (then model steps)
410 //   j    goto k
411 //  ...   (else model steps)
412 //   k    (steps after the IF)
413 struct IfStep {
414     // The index of this step.
415     size_t index = ~size_t(0);
416     // The index of the first step of the "then" branch.
417     size_t thenStepIndex = ~size_t(0);
418     // The index of the first step of the "else" branch.
419     size_t elseStepIndex = ~size_t(0);
420     // The boolean condition input of the IF operation. The value of this
421     // operand determines the branch of the IF operation to be executed.
422     SourceOperandIndex conditionOperandIndex = {~uint32_t(0), ~uint32_t(0)};
423     // Input operands of the IF operation to be passed to a branch model.
424     std::vector<SourceOperandIndex> outerInputOperands;
425     // Output operands of the IF operation.
426     std::vector<SourceOperandIndex> outerOutputOperands;
427     // Input operands of the "then" branch model.
428     std::vector<SourceOperandIndex> thenBranchInputOperands;
429     // Output operands of the "then" branch model.
430     std::vector<SourceOperandIndex> thenBranchOutputOperands;
431     // Input operands of the "else" branch model.
432     std::vector<SourceOperandIndex> elseBranchInputOperands;
433     // Output operands of the "else" branch model.
434     std::vector<SourceOperandIndex> elseBranchOutputOperands;
435 };
436 
437 // A WHILE operation to be run on the ExecutionPlan::next() interpreter. The
438 // condition and body models might run other devices. See LogicalStep.
439 //
440 // Execution plan structure:
441 // Index  Step
442 //   i    while cond=(i + 1) body=(j + 1) exit=(k + 1)
443 //  ...   (cond model steps)
444 //   j    goto i
445 //  ...   (body model steps)
446 //   k    goto i
447 //  ...   (steps after the WHILE)
448 //
449 //  Note that WhileStep has WhileState associated with it.
450 struct WhileStep {
451     // The index of this step.
452     size_t index = ~size_t(0);
453     // The index of the first step of the condition model.
454     size_t condStepIndex = ~size_t(0);
455     // The index of the first step of the body model.
456     size_t bodyStepIndex = ~size_t(0);
457     // The index of the first step after the loop.
458     size_t exitStepIndex = ~size_t(0);
459     // Input operands of the WHILE operation to be passed to the condition and
460     // body models.
461     std::vector<SourceOperandIndex> outerInputOperands;
462     // Output operands of the WHILE operation.
463     std::vector<SourceOperandIndex> outerOutputOperands;
464     // Input operands of the condition model.
465     std::vector<SourceOperandIndex> condInputOperands;
466     // Output operand of the condition model. The value of this operand
467     // determines whether to continue execution or exit the loop.
468     SourceOperandIndex condOutputOperand = {~uint32_t(0), ~uint32_t(0)};
469     // Input operands of the body model.
470     std::vector<SourceOperandIndex> bodyInputOperands;
471     // Output operands of the body model.
472     std::vector<SourceOperandIndex> bodyOutputOperands;
473 };
474 
475 // A helper step. See LogicalStep.
476 struct GotoStep {
477     // The index of this step.
478     size_t index = ~size_t(0);
479     // The index of the step to go to.
480     size_t gotoStepIndex = ~size_t(0);
481 };
482 
483 // One of ExecutionStep, IfStep, WhileStep, or GotoStep.
484 //
485 // When ExecutionPlan::next() is called, it interprets logical steps until it
486 // encounters an ExecutionStep ("interpreted execution").
487 // - For an IfStep, it decides which branch to take and proceeds to the
488 //   corresponding step.
489 // - For a WhileStep, it decides whether to execute the condition or body (based
490 //   on WhileState), or exit the loop (based on the condition model output), and
491 //   proceeds to the corresponding step.
492 // - For a GotoStep, it proceeds to the indicated step unconditionally.
493 class LogicalStep {
494    public:
495     template <typename... Args>
LogicalStep(Args &&...args)496     explicit LogicalStep(Args&&... args) : mStep(std::forward<Args>(args)...) {}
497 
isExecution()498     bool isExecution() const { return std::holds_alternative<ExecutionStep>(mStep); }
isIf()499     bool isIf() const { return std::holds_alternative<IfStep>(mStep); }
isWhile()500     bool isWhile() const { return std::holds_alternative<WhileStep>(mStep); }
isGoto()501     bool isGoto() const { return std::holds_alternative<GotoStep>(mStep); }
502 
503     // Returns a non-null pointer or crashes.
executionStep()504     ExecutionStep* executionStep() { return &std::get<ExecutionStep>(mStep); }
ifStep()505     IfStep* ifStep() { return &std::get<IfStep>(mStep); }
whileStep()506     WhileStep* whileStep() { return &std::get<WhileStep>(mStep); }
gotoStep()507     GotoStep* gotoStep() { return &std::get<GotoStep>(mStep); }
508 
509     // Returns a non-null pointer or crashes.
executionStep()510     const ExecutionStep* executionStep() const { return &std::get<ExecutionStep>(mStep); }
ifStep()511     const IfStep* ifStep() const { return &std::get<IfStep>(mStep); }
whileStep()512     const WhileStep* whileStep() const { return &std::get<WhileStep>(mStep); }
gotoStep()513     const GotoStep* gotoStep() const { return &std::get<GotoStep>(mStep); }
514 
515     // May return nullptr.
tryExecutionStep()516     ExecutionStep* tryExecutionStep() { return std::get_if<ExecutionStep>(&mStep); }
tryIfStep()517     IfStep* tryIfStep() { return std::get_if<IfStep>(&mStep); }
tryWhileStep()518     WhileStep* tryWhileStep() { return std::get_if<WhileStep>(&mStep); }
tryGotoStep()519     GotoStep* tryGotoStep() { return std::get_if<GotoStep>(&mStep); }
520 
521     // May return nullptr.
tryExecutionStep()522     const ExecutionStep* tryExecutionStep() const { return std::get_if<ExecutionStep>(&mStep); }
tryIfStep()523     const IfStep* tryIfStep() const { return std::get_if<IfStep>(&mStep); }
tryWhileStep()524     const WhileStep* tryWhileStep() const { return std::get_if<WhileStep>(&mStep); }
tryGotoStep()525     const GotoStep* tryGotoStep() const { return std::get_if<GotoStep>(&mStep); }
526 
527     void dump() const;
528 
529    private:
530     std::variant<ExecutionStep, IfStep, WhileStep, GotoStep> mStep;
531 };
532 
533 std::ostream& operator<<(std::ostream& os, const IfStep& step);
534 std::ostream& operator<<(std::ostream& os, const WhileStep& step);
535 std::ostream& operator<<(std::ostream& os, const GotoStep& step);
536 
537 // Describes the state of WhileStep.
538 struct WhileState {
539     // A pseudo iteration number indicating the loop is not being executed.
540     static constexpr uint64_t kOutsideLoop = ~uint64_t(0);
541     // Whether we need to evaluate the condition or body next.
542     enum Stage { EVALUATE_CONDITION, EVALUATE_BODY } stage = EVALUATE_CONDITION;
543     // Current iteration number. Must be set to kOutsideLoop when exiting the
544     // loop.
545     uint64_t iteration = kOutsideLoop;
546     // Time point when the loop started executing.
547     TimePoint startTime;
548 };
549 
550 struct ConstantCopyLocation {
551     const uint8_t* buffer;
552     uint32_t length;
553 };
554 
555 struct ConstantReferenceLocation {
556     const RuntimeMemory* memory;
557     uint32_t offset;
558     uint32_t length;
559 };
560 
561 // A tuple of {execution_step_index, io_type, io_index} specifying an input/output role of an
562 // ExecutionStep.
563 using StepRole = std::tuple<uint32_t, IOType, uint32_t>;
564 
565 // A callback function that takes the prepared_model, io_type, and io_index of a step role.
566 using StepRoleCallback = std::function<void(const RuntimePreparedModel*, IOType, uint32_t)>;
567 
568 class ExecutionPlan {
569    public:
570     ExecutionPlan(const ExecutionPlan&) = delete;
571     ExecutionPlan& operator=(const ExecutionPlan&) = delete;
572 
ExecutionPlan()573     ExecutionPlan() {}
~ExecutionPlan()574     ~ExecutionPlan() { delete mBody; }
575 
576     // Controller is part of the interface to a mechanism for performing a
577     // main execution in N steps.
578     //
579     // The value of N may not be known beforehand if the model contains WHILE
580     // loops. See LogicalStep.
581     //
582     // Usage pattern:
583     // - Instantiate Controller with ExecutionPlan::makeController().
584     // - Call ExecutionPlan::next() on Controller N+1 times.  The first N times,
585     //   *executor is set to point to a new StepExecutor corresponding
586     //   to that step.  The N+1st time, *executor is set to nullptr,
587     //   signifying there are no more steps.
588     // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
589     //   a problem has occurred.
590     class Controller {
591         friend class ExecutionPlan;
592 
593        private:
594         Controller(const Controller&) = delete;
595         Controller& operator=(const Controller&) = delete;
596 
597         static const size_t kBadStepIndex = ~size_t(0);
598 
599         // A constructor for mState == COMPOUND.
600         Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
601                    const BurstBuilder* burstBuilder,
602 
603                    // static temporaries
604                    uint32_t totalSizeOfTemporaries,
605                    std::map<SourceOperandIndex, StaticTemporaryLocation>
606                            sourceOperandToLocationOfTemporary,
607                    std::map<SourceOperandIndex, StaticTemporaryLocation>
608                            sourceOperandToLocationOfTemporary2,
609 
610                    std::map<SourceOperandIndex, uint32_t> sourceOperandToInputIndex,
611                    std::map<SourceOperandIndex, uint32_t> sourceOperandToOutputIndex,
612                    const std::map<SourceOperandIndex, ConstantCopyLocation>&
613                            sourceOperandToConstantCopy,
614                    std::map<SourceOperandIndex, ConstantReferenceLocation>
615                            sourceOperandToConstantReference,
616                    DynamicTemporaries dynamicTemporaries);
617 
618         // Sets the location of innerOperand to be the same as the location of outerOperand.
619         void setInput(const SourceOperandIndex& outerOperand,
620                       const SourceOperandIndex& innerOperand);
621         void setOutput(const SourceOperandIndex& outerOperand,
622                        const SourceOperandIndex& innerOperand);
623 
624         // Wait for mLastStepSyncFd to signal.
625         // No-op if mLastStepSyncFd is -1 which the mLastStepSyncFd is initialized to.
626         // mLastStepSyncFd will also be set to -1 when the most recently processed step
627         // does not generate a sync fence.
628         int waitForLastStepSyncFence() const;
629 
630         [[maybe_unused]] const ExecutionPlan* mPlan;
631         ExecutionBuilder* mExecutionBuilder;
632         const BurstBuilder* mBurstBuilder;
633         // Map from source operand index to an offset into mTemporaries used
634         // to represent that operand as an inter-partition input or output.
635         //
636         // The four maps
637         // - mSourceOperandToLocationOfTemporary
638         // - mSourceOperandToInputIndex
639         // - mSourceOperandToOutputIndex
640         // - mSourceOperandToConstantReference
641         // are initialized from similarly named fields of ExecutionPlan::CompoundBody.
642         //
643         // A particular key appears in at most one map at any given time. This
644         // restriction does not apply to mSourceOperandToLocationOfTemporary2.
645         //
646         // The maps are modified during the execution of IfStep and WhileStep.
647         // See ExecutionPlan::nextCompound().
648         std::map<SourceOperandIndex, StaticTemporaryLocation> mSourceOperandToLocationOfTemporary;
649         // Map from source operand index to an additional offset into
650         // mTemporaries used for double buffering of WHILE loop output operands.
651         std::map<SourceOperandIndex, StaticTemporaryLocation> mSourceOperandToLocationOfTemporary2;
652         // Map from source operand index to an input index of the main model.
653         std::map<SourceOperandIndex, uint32_t> mSourceOperandToInputIndex;
654         // Map from source operand index to an output index of the main model.
655         std::map<SourceOperandIndex, uint32_t> mSourceOperandToOutputIndex;
656         // Map from source operand index to a constant reference location.
657         // Used for WHILE loop operand initializers that are constant references.
658         std::map<SourceOperandIndex, ConstantReferenceLocation> mSourceOperandToConstantReference;
659 
660         // static temporaries
661         std::unique_ptr<MemoryAshmem> mTemporaries;
662 
663         DynamicTemporaries mDynamicTemporaries;
664 
665         // Index of the next step to be processed by ExecutionPlan::next().
666         size_t mNextStepIndex;
667         // The value to reset mNextStepIndex to for partial CPU fallback.
668         size_t mFallbackNextStepIndex;
669         // Map from WhileStep index to the associated WhileState.
670         std::unordered_map<size_t, WhileState> mWhileState;
671         // The sync fence fd of the last step.
672         int mLastStepSyncFd;
673     };
674 
675     std::vector<SharedBurst> makeBursts() const;
676 
677     // Only legal to call when mState == COMPOUND.
678     std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
679                                                const BurstBuilder* burstBuilder) const;
680 
681     // Sets up a new StepExecutor and burstController (if applicable) if there
682     // is a step to execute. See ExecutionPlan::Controller.
683     // Handles control flow. See LogicalStep.
684     // burstController is nullptr if we are not to do burst execution.
685     // mainModelOutputShapes may be nullptr if the only main model outputs that are step model
686     //     inputs are of fully specified shape.
687     // syncFdOfLastStep is the sync fence fd generated by the most recently processed step.
688     // Only legal to call when mState == COMPOUND.
689     int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
690              SharedBurst* burstController, const std::vector<OutputShape>* mainModelOutputShapes,
691              int syncFdOfLastStep = -1) const;
692 
693     // Create the same executor as the last one created by next().
694     int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
695                  SharedBurst* burstController,
696                  const std::vector<OutputShape>* mainModelOutputShapes) const;
697 
698     // Only legal to call when mState == SIMPLE.
699     // See the constructor of StepExecutor for the semantics of "reusable".
700     std::shared_ptr<StepExecutor> makeStepExecutor(bool reusable,
701                                                    ExecutionBuilder* executionBuilder) const;
702 
703     ExecutionStep* createNewExecutionStep(uint32_t sourceModelIndex,
704                                           const std::shared_ptr<Device> device);
705     IfStep* createNewIfStep();
706     WhileStep* createNewWhileStep();
707     GotoStep* createNewGotoStep();
708 
709     // Only legal to call when mState == COMPOUND.
getNextStepIndex()710     size_t getNextStepIndex() const { return compound()->mSteps.size(); }
711 
712     void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
713 
714     // simulateFailureResultCode == ANEURALNETWORKS_NO_ERROR means behave normally.
715     int finish(int32_t executionPreference, int32_t priority, const OptionalTimePoint& deadline,
716                const std::vector<TokenValuePair>& metaData, int simulateFailureResultCode);
717 
718     void recordOutputDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex);
719     void recordTemporaryDef(SourceOperandIndex sourceOperandIndex, uint32_t stepIndex);
720 
721     void dump() const;
722 
723     void reset();
724 
isValid()725     bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; }
isSimple()726     bool isSimple() const { return mState == SIMPLE; }
isCompound()727     bool isCompound() const { return mState == COMPOUND; }
728     bool isSimpleCpu() const;
729 
setCaching(const CacheInfo * cacheInfo,const uint8_t * token)730     void setCaching(const CacheInfo* cacheInfo, const uint8_t* token) {
731         mCacheInfo = cacheInfo;
732         mToken = token;
733     }
getCacheInfo()734     const CacheInfo* getCacheInfo() const { return mCacheInfo; }
getCacheToken()735     const uint8_t* getCacheToken() const { return mToken; }
736 
737     // The caller is responsible for making sure the index is within range.
forEachStepRoleOfInput(uint32_t index,const StepRoleCallback & callback)738     void forEachStepRoleOfInput(uint32_t index, const StepRoleCallback& callback) const {
739         CHECK(mBody != nullptr);
740         mBody->forEachStepRoleOfInput(index, callback);
741     }
forEachStepRoleOfOutput(uint32_t index,const StepRoleCallback & callback)742     void forEachStepRoleOfOutput(uint32_t index, const StepRoleCallback& callback) const {
743         CHECK(mBody != nullptr);
744         mBody->forEachStepRoleOfOutput(index, callback);
745     }
746 
747     // "type" specifies input or output, and "index" is the main model input or output index.
748     // The caller is responsible for making sure the index is within range.
749     MemoryPreference getMemoryPreference(IOType type, uint32_t index) const;
750 
getSourceModels()751     SourceModels& getSourceModels() { return mSourceModels; }
getSourceModels()752     const SourceModels& getSourceModels() const { return mSourceModels; }
753 
754     // "index" is the main model input or output index.
755     // The caller is responsible for making sure the index is within range.
756     SourceOperandIndex getInputSourceOperand(uint32_t index) const;
757     SourceOperandIndex getOutputSourceOperand(uint32_t index) const;
758 
759     bool hasDynamicTemporaries() const;
760 
761     // These functions are solely intended for use by unit tests of
762     // the partitioning algorithm.
763     enum class Kind {
764         ERROR,
765         EMPTY,
766         SIMPLE,
767         COMPOUND
768     };  // See operator<< defined outside this class
769     Kind forTest_getKind() const;
770     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
771     const std::vector<std::shared_ptr<LogicalStep>>& forTest_compoundGetSteps() const;
forTest_compoundForEachStepRoleOfSourceOperand(SourceOperandIndex index,const StepRoleCallback & callback)772     void forTest_compoundForEachStepRoleOfSourceOperand(SourceOperandIndex index,
773                                                         const StepRoleCallback& callback) const {
774         compound()->forEachStepRoleOfSourceOperand(index, callback);
775     }
776     //     The "flat" in the name signifies that this method requires that the
777     //     model not contain any control flow operations.
778     std::set<uint32_t> forTest_flatGetDynamicTemporaries() const;
779     const uint8_t* forTest_simpleGetCacheToken() const;
780     bool forTest_hasStepModelWithNoInputsOrNoOutputs() const;
781 
782    private:
783     // Becomes a new COMPOUND step if mState == EMPTY, otherwise does nothing.
784     // Illegal to call for when mState == SIMPLE.
785     void becomeCompoundIfEmpty();
786 
getSourceOperand(const std::pair<uint32_t,uint32_t> & sourceOperandIndex)787     const Operand& getSourceOperand(const std::pair<uint32_t, uint32_t>& sourceOperandIndex) const {
788         return getSourceModels()
789                 .getModel(sourceOperandIndex.first)
790                 ->getOperand(sourceOperandIndex.second);
791     }
792 
793     class Buffer {
794        public:
795         Buffer(void* pointer, uint32_t size);
796         Buffer(RunTimePoolInfo info, uint32_t offset);
797         void* getPointer() const;
798         uint32_t getSize() const;
799         void flush() const;
800 
801        private:
802         RunTimePoolInfo mInfo;
803         uint32_t mOffset;
804     };
805 
806     // Returns the buffer associated with a partition boundary operand.
807     std::optional<Buffer> getBuffer(std::shared_ptr<Controller> controller,
808                                     SourceOperandIndex operandIndex) const;
809     std::optional<Buffer> getBufferFromModelArgumentInfo(
810             const ModelArgumentInfo& info, const ExecutionBuilder* executionBuilder) const;
811     // Reads the value of a partition boundary boolean condition operand.
812     int readConditionValue(std::shared_ptr<Controller> controller, SourceOperandIndex operandIndex,
813                            bool* value) const;
814 
815     // Handles control flow. See LogicalStep.
816     int nextCompound(std::shared_ptr<Controller> controller,
817                      std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController,
818                      const std::vector<OutputShape>* mainModelOutputShapes) const;
819     int nextCompound(const ExecutionStep* step, std::shared_ptr<Controller> controller,
820                      std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController,
821                      const std::vector<OutputShape>* mainModelOutputShapes) const;
822     int nextCompound(const IfStep* step, std::shared_ptr<Controller> controller,
823                      std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController,
824                      const std::vector<OutputShape>* mainModelOutputShapes) const;
825     int nextCompound(const WhileStep* step, std::shared_ptr<Controller> controller,
826                      std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController,
827                      const std::vector<OutputShape>* mainModelOutputShapes) const;
828     int nextCompound(const GotoStep* step, std::shared_ptr<Controller> controller,
829                      std::shared_ptr<StepExecutor>* executor, SharedBurst* burstController,
830                      const std::vector<OutputShape>* mainModelOutputShapes) const;
831 
832     struct Body {
~BodyBody833         virtual ~Body() {}
834         virtual void dump() const = 0;
835         virtual int finish(const SourceModels* sourceModels, int32_t executionPreference,
836                            int32_t priority, const OptionalTimePoint& deadline,
837                            const std::vector<TokenValuePair>& metadata,
838                            int simulateFailureResultCode) = 0;
839         virtual bool hasDynamicTemporaries() const = 0;
840         virtual bool hasStepModelWithNoInputsOrNoOutputs() const = 0;
841         virtual void forEachStepRoleOfInput(uint32_t index,
842                                             const StepRoleCallback& callback) const = 0;
843         virtual void forEachStepRoleOfOutput(uint32_t index,
844                                              const StepRoleCallback& callback) const = 0;
845         bool mSuccessfulFinish = false;
846     };
847 
848     struct SimpleBody : Body {
SimpleBodySimpleBody849         SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model,
850                    const CacheInfo* cacheInfo, const uint8_t* token)
851             : mDevice(device), mModel(model), mCacheInfo(cacheInfo), mToken(token) {}
852 
853         void dump() const override;
854         int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority,
855                    const OptionalTimePoint& deadline, const std::vector<TokenValuePair>& metadata,
856                    int simulateFailureResultCode) override;
hasDynamicTemporariesSimpleBody857         bool hasDynamicTemporaries() const override { return false; }
hasStepModelWithNoInputsOrNoOutputsSimpleBody858         bool hasStepModelWithNoInputsOrNoOutputs() const override { return false; }
859         void forEachStepRoleOfInput(uint32_t index,
860                                     const StepRoleCallback& callback) const override;
861         void forEachStepRoleOfOutput(uint32_t index,
862                                      const StepRoleCallback& callback) const override;
863 
864         std::shared_ptr<Device> mDevice;
865         const ModelBuilder* mModel;
866         std::shared_ptr<RuntimePreparedModel> mPreparedModel;
867 
868         const CacheInfo* mCacheInfo;
869         TokenHasher mToken;
870     };
871 
872     struct CompoundBody : Body {
CompoundBodyCompoundBody873         CompoundBody(const ExecutionPlan* plan) : mPlan(plan) { CHECK(plan != nullptr); }
874 
875         void dump() const override;
876         int finish(const SourceModels* sourceModels, int32_t executionPreference, int32_t priority,
877                    const OptionalTimePoint& deadline, const std::vector<TokenValuePair>& metadata,
878                    int simulateFailureResultCode) override;
hasDynamicTemporariesCompoundBody879         bool hasDynamicTemporaries() const override { return mHasDynamicTemporaries; }
880         bool hasStepModelWithNoInputsOrNoOutputs() const override;
881         void forEachStepRoleOfInput(uint32_t index,
882                                     const StepRoleCallback& callback) const override;
883         void forEachStepRoleOfOutput(uint32_t index,
884                                      const StepRoleCallback& callback) const override;
885         // Supported for any legal source operand index. For a source operand that doesn't have a
886         // step role, the callback will not be invoked at all.
887         void forEachStepRoleOfSourceOperand(const SourceOperandIndex& index,
888                                             const StepRoleCallback& callback) const;
889         // Supported for any legal source operand index.
890         MemoryPreference getMemoryPreferenceOfSourceOperand(const SourceOperandIndex& index) const;
891 
892         // TODO: Some of the data is working state information that
893         // shouldn't be needed after we've constructed but not
894         // executed the plan.
895 
896         std::vector<std::shared_ptr<LogicalStep>> mSteps;
897 
898         // Map from source operand index to defining ExecutionStep index.
899         // Used for all (and only) SUBGRAPH_OUTPUTs that are defined by
900         // ExecutionSteps. Those defined by IfSteps and WhileSteps are not in
901         // the map.
902         std::map<SourceOperandIndex, uint32_t> mOutputToDefiningExecutionStep;
903 
904         // Map from source operand index to defining ExecutionStep index.
905         // Used for all (and only) TEMPORARY_VARIABLEs that are defined by
906         // ExecutionSteps. Those defined by IfSteps and WhileSteps are not in
907         // the map.
908         std::map<SourceOperandIndex, uint32_t> mTemporaryToDefiningExecutionStep;
909 
910         // Map from source operand index to input index of the main model.
911         // This map only contains SUBGRAPH_INPUTs of the main model and is used
912         // to initialize ExecutionPlan::Controller::mSourceOperandToInputIndex;
913         std::map<SourceOperandIndex, uint32_t> mSourceOperandToInputIndex;
914 
915         // Map from source operand index to output index of the main model.
916         // This map only contains SUBGRAPH_OUTPUTs of the main model and is used
917         // to initialize ExecutionPlan::Controller::mSourceOperandToOutputIndex;
918         std::map<SourceOperandIndex, uint32_t> mSourceOperandToOutputIndex;
919 
920         // Map from source operand index to location of a CONSTANT_COPY or
921         // POINTER operand.
922         // This map only contains constant partition boundary IF and WHILE
923         // operands and is used to create a ExecutionPlan::Controller.
924         std::map<SourceOperandIndex, ConstantCopyLocation> mSourceOperandToBoundaryConstantCopy;
925 
926         // Map from source operand index to location of a CONSTANT_REFERENCE
927         // operand.  This map only contains constant partition boundary IF and
928         // WHILE operands and is used to initialize
929         // ExecutionPlan::Controller::mSourceOperandToConstantReference.
930         std::map<SourceOperandIndex, ConstantReferenceLocation>
931                 mSourceOperandToBoundaryConstantReference;
932 
933         // Map from source operand index of a boundary operand to the step roles that its memory
934         // may be used for.
935         // This map only contains partition boundary operands that have ExecutionStep roles, that
936         // is, SUBGRAPH_INPUTs, SUBGRAPH_OUTPUTs, and partition boundary static and dynamic
937         // temporaries. If a partition boundary operand is not found in the map, then the operand
938         // does not have any ExecutionStep role (this may happen with interpreted control flow).
939         std::map<SourceOperandIndex, std::set<StepRole>> mSourceOperandToStepRoles;
940 
941         bool mHasDynamicTemporaries = false;
942 
943        private:
944         void findTempsAsStepModelOutputs();
945 
946         void findModelOutputsThatAreDownstreamInputs();
947 
948         // Constant values that are inputs to IF and WHILE operations and lie on
949         // a partition boundary ("control flow boundary constants") require
950         // special treatment. We need to be able to dynamically associate those
951         // values with the corresponding SUBGRAPH_INPUT operands in a referenced
952         // model.
953         //
954         // For CONSTANT_COPY and POINTER boundary operands, we copy those to
955         // temporary memory and treat them similarly to TEMPORARY_VARIABLE
956         // operands in Controller.
957         //
958         // For CONSTANT_REFERENCE boundary operands, we keep track of them in
959         // ExecutionPlan::Controller::mSourceOperandToConstantReference.
960         //
961         // Note that for IF inputs and input-only WHILE inputs that are boundary
962         // constants, we could embed those inside the referenced model, but we
963         // currently don't do so. See b/148216514.
964         void findControlFlowBoundaryConstants(const SourceModels* sourceModels);
965 
966         // This method will set mSourceOperandToStepRoles.
967         void findMemoryStepRoles();
968 
969         const ExecutionPlan* mPlan;
970     };
971 
972     enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
973     Body* mBody = nullptr;
simple()974     SimpleBody* simple() {
975         CHECK(mState == SIMPLE);
976         CHECK(mBody != nullptr);
977         return static_cast<SimpleBody*>(mBody);
978     }
simple()979     const SimpleBody* simple() const {
980         CHECK(mState == SIMPLE);
981         CHECK(mBody != nullptr);
982         return static_cast<const SimpleBody*>(mBody);
983     }
compound()984     CompoundBody* compound() {
985         CHECK(mState == COMPOUND);
986         CHECK(mBody != nullptr);
987         return static_cast<CompoundBody*>(mBody);
988     }
compound()989     const CompoundBody* compound() const {
990         CHECK(mState == COMPOUND);
991         CHECK(mBody != nullptr);
992         return static_cast<const CompoundBody*>(mBody);
993     }
994 
995     void forEachDynamicTemporary(const std::function<void(SourceOperandIndex, const Operand&,
996                                                           uint32_t definingStepIndex)>&) const;
997 
998     // Pointers to compilation caching information in CompilationBuilder.
999     const CacheInfo* mCacheInfo = nullptr;
1000     const uint8_t* mToken = nullptr;
1001 
1002     SourceModels mSourceModels;
1003 };
1004 
1005 inline std::ostream& operator<<(std::ostream& out, ExecutionPlan::Kind kind) {
1006     const int intKind = static_cast<int>(kind);
1007     if (kind < ExecutionPlan::Kind::ERROR || kind > ExecutionPlan::Kind::COMPOUND) {
1008         return out << "<UNK(" << intKind << ")>";
1009     }
1010     static const char* name[] = {"ERROR", "EMPTY", "SIMPLE", "COMPOUND"};
1011     return out << name[intKind];
1012 }
1013 
1014 }  // namespace nn
1015 }  // namespace android
1016 
1017 #endif  // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_RUNTIME_EXECUTION_PLAN_H
1018