1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Classes used to plan how to execute a model across multiple devices.
18 
19 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
20 #define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
21 
22 #include "HalInterfaces.h"
23 #include "Memory.h"
24 #include "ModelBuilder.h"
25 #include "NeuralNetworks.h"
26 #include "Utils.h"
27 
28 #include <set>
29 
30 namespace android {
31 namespace nn {
32 
33 class CompilationBuilder;
34 class Device;
35 class ExecutionBuilder;
36 class ExecutionPlan;
37 class Memory;
38 class StepExecutor;
39 
40 class ExecutionStep {
41 public:
42     typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
43     typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType;
44 
45     enum OperandKind { INPUT, OUTPUT };
46 
47     ExecutionStep(ExecutionPlan* plan,
48                   uint32_t stepIndex,
49                   std::shared_ptr<Device> device);
50     int addOperation(int operationIndex, const ModelBuilder& fromModel);
51     int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex,
52                    const ModelBuilder& fromModel, OperandKind kind);
53 
54     // Each container entry is of the form (fromModel index, subModel index)
getModelInputs()55     const RemapVectorType& getModelInputs() const {
56         return mModelInputs;
57     }
getModelOutputs()58     const RemapVectorType& getModelOutputs() const {
59         return mModelOutputs;
60     }
getTempsAsSubModelInputs()61     const RemapVectorType& getTempsAsSubModelInputs() const {
62         return mTempsAsSubModelInputs;
63     }
getTempsAsSubModelOutputs()64     const SubModelOutputSetType& getTempsAsSubModelOutputs() const {
65         return mTempsAsSubModelOutputs;
66     }
getOutputsAsSubModelInputs()67     const RemapVectorType& getOutputsAsSubModelInputs() const {
68         return mOutputsAsSubModelInputs;
69     }
getOutputsAsSubModelInputsIndexToFromModel()70     const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const {
71         return mOutputsAsSubModelInputsIndexToFromModel;
72     }
73 
recordTempAsSubModelOutput(uint32_t fromModelIndex)74     void recordTempAsSubModelOutput(uint32_t fromModelIndex) {
75         const auto it = mOperandMap.find(fromModelIndex);
76         nnAssert(it != mOperandMap.end());
77         mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second));
78     }
79 
80     // If this step has a submodel output of unknown size, sets
81     // *hasOutputOfUnknownSize to true; otherwise, leaves it
82     // unchanged.
83     int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize,
84                        int32_t executionPreference);
85 
getSubModel()86     const ModelBuilder* getSubModel() const { return &mSubModel; }
getDevice()87     std::shared_ptr<Device> getDevice() const { return mDevice; }
88 
89     // only available after calling finishSubModel()
getPreparedSubModel()90     sp<IPreparedModel> getPreparedSubModel() const { return mPreparedSubModel; }
91 
92     // Map inputs and outputs from ExecutionBuilder to StepExecutor.
93     void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const;
94 
95     void dump() const;
96 
97 private:
98     void logSubModel() const;
99 
100     // TODO: Some of the data is working state information that
101     // shouldn't be needed after we've constructed but not executed
102     // the step.
103 
104     ExecutionPlan* mPlan;
105     uint32_t mIndex;  // index of step within plan
106     ModelBuilder mSubModel;
107     std::shared_ptr<Device> mDevice;  // nullptr signifies CPU
108     sp<IPreparedModel> mPreparedSubModel;  // not used for CPU
109 
110     // Inputs of original model that are also inputs of this submodel:
111     //     (fromModel index, subModel index)
112     RemapVectorType mModelInputs;
113     // Outputs of original model that are also outputs of this submodel:
114     //     (fromModel index, subModel index)
115     RemapVectorType mModelOutputs;
116     // Temporaries of original model that are inputs of this submodel:
117     //     (fromModel index, subModel index)
118     RemapVectorType mTempsAsSubModelInputs;
119     // Temporaries of original model that are outputs of this submodel:
120     //     (fromModel index, subModel index)
121     SubModelOutputSetType mTempsAsSubModelOutputs;
122     // Outputs of original model that are inputs of this submodel:
123     //     (fromModel index, subModel index)
124     RemapVectorType mOutputsAsSubModelInputs;
125     // Converts operand indexes from the main model to the submodel.
126     std::unordered_map<uint32_t, uint32_t> mOperandMap;
127     // Converts input indexes from the submodel to the main model
128     // (these are input indexes, not operand indexes).  This vector
129     // only describes inputs of the submodel that are also inputs of
130     // the main model -- that is, mModelInputs but not mTempsAsSubModelInputs.
131     std::vector<uint32_t> mInputIndexSubModelToFromModel;
132     // Converts output indexes from the submodel to the main model
133     // (these are output indexes, not operand indexes).  This vector
134     // only describes outputs of the submodel that are also outputs of
135     // the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs.
136     std::vector<uint32_t> mOutputIndexSubModelToFromModel;
137     // Converts indexes into mOutputsAsSubModelInputs to indexes into
138     // main model outputs (these are input and output indexes, not
139     // operand indexes).  To be specific, if the main model outputs
140     // are mainModelOutputs,
141     //
142     //     mOutputsAsSubModelInputsIndexToFromModel.size() ==
143     //     mOutputsAsSubModelInputs.size()
144     //
145     // and when (0 <= i < mOutputsAsSubModelInputs.size()),
146     //
147     //     mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] ==
148     //     mOutputsAsSubModelInputs[i].first
149     std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel;
150 };
151 
152 class ExecutionPlan {
153 public:
154     ExecutionPlan(const ExecutionPlan&) = delete;
155     ExecutionPlan& operator=(const ExecutionPlan&) = delete;
156 
ExecutionPlan()157     ExecutionPlan() { }
~ExecutionPlan()158     ~ExecutionPlan() { delete mBody; }
159 
160     // Controller is part of the interface to a mechanism for
161     // performing an execution in N steps.
162     //
163     // Usage pattern:
164     // - Instantiate Controller with ExecutionPlan::makeController().
165     // - Call ExecutionPlan::next() on Controller N+1 times.  The first N times,
166     //   *executor is set to point to a new StepExecutor corresponding
167     //   to that step.  The N+1st time, *executor is set to nullptr,
168     //   signifying there are no more steps.
169     // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
170     //   a problem has occurred.
171     class Controller {
172         friend class ExecutionPlan;
173     private:
174         Controller(const Controller&) = delete;
175         Controller& operator=(const Controller&) = delete;
176 
177         // Map from the operand index of a TEMPORARY in the original
178         // model to an offset into mTemporaries used to represent that
179         // TEMPORARY as an inter-partition input or output.
180         typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType;
181 
182         static const size_t kBadStepIndex = ~size_t(0);
183 
184         Controller(const ExecutionPlan* plan, const ExecutionBuilder* executionBuilder,
185                    std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs,
186                    uint32_t totalSizeOfTemporaries);
187 
188         const ExecutionPlan* mPlan;
189         const ExecutionBuilder* mExecutionBuilder;
190         std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs;  // may be nullptr
191         Memory mTemporaries;
192         size_t mNextStepIndex;
193     };
194 
195     std::shared_ptr<Controller> makeController(const ExecutionBuilder* executionBuilder) const;
196 
197     int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
198 
199     // Create the same executor as the last one created by next().
200     int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
201 
202     std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);
203 
204     void becomeSingleStep(const std::shared_ptr<Device> device,
205                           const ModelBuilder* model);
206 
207     int finish(const ModelBuilder* fromModel, int32_t executionPreference);
208 
recordTemporaryDef(uint32_t fromModelIndex,uint32_t stepIndex)209     void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) {
210         auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep;
211         nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0);
212         temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex));
213     }
214 
215     void dump() const;
216 
217     // These functions are solely intended for use by unit tests of
218     // the partitioning algorithm.
219     enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND };
220     Kind forTest_getKind() const;
221     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
222     const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
223     bool forTest_hasSubModelOutputsOfUnknownSize() const;
224 
225 private:
226     void findTempsAsSubModelOutputs();
227 
228     struct Body {
~BodyBody229         virtual ~Body() {}
230         virtual void dump() const = 0;
231         virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
232         virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
233         bool mSuccessfulFinish = false;
234     };
235 
236     struct SimpleBody : Body {
SimpleBodySimpleBody237         SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model) :
238                 mDevice(device), mModel(model) {}
239 
240         void dump() const override;
241         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
hasSubModelOutputsOfUnknownSizeSimpleBody242         virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
243 
244         std::shared_ptr<Device> mDevice;  // nullptr signifies CPU
245         const ModelBuilder* mModel;
246         sp<IPreparedModel> mPreparedModel;  // not used for CPU
247     };
248 
249     struct CompoundBody : Body {
250         void dump() const override;
251         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
hasSubModelOutputsOfUnknownSizeCompoundBody252         virtual bool hasSubModelOutputsOfUnknownSize() const override {
253             return mHasSubModelOutputOfUnknownSize;
254         }
255 
256         // TODO: Some of the data is working state information that
257         // shouldn't be needed after we've constructed but not
258         // executed the plan.
259 
260         std::vector<std::shared_ptr<ExecutionStep>> mSteps;
261 
262         // Map from original operand index to defining step index.
263         // Used for all (and only) TEMPORARY_VARIABLEs.
264         std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep;
265 
266         bool mHasSubModelOutputOfUnknownSize = false;
267     private:
268         void findTempsAsSubModelOutputs();
269     };
270 
271     enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
272     Body* mBody = nullptr;
compound()273     CompoundBody* compound() {
274         nnAssert(mState == COMPOUND);
275         return static_cast<CompoundBody*>(mBody);
276     }
compound()277     const CompoundBody* compound() const {
278         nnAssert(mState == COMPOUND);
279         return static_cast<const CompoundBody*>(mBody);
280     }
281 };
282 
283 }  // namespace nn
284 }  // namespace android
285 
286 #endif  // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
287