1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Classes used to plan how to execute a model across multiple devices.
18 
19 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
20 #define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
21 
22 #include "HalInterfaces.h"
23 #include "Memory.h"
24 #include "ModelBuilder.h"
25 #include "NeuralNetworks.h"
26 #include "TokenHasher.h"
27 #include "Utils.h"
28 #include "VersionedInterfaces.h"
29 
30 #include <openssl/sha.h>
31 
32 #include <set>
33 #include <string>
34 
35 namespace android {
36 namespace nn {
37 
38 class BurstBuilder;
39 class CompilationBuilder;
40 class Device;
41 class ExecutionBuilder;
42 class ExecutionPlan;
43 class ExecutionBurstController;
44 class Memory;
45 class StepExecutor;
46 
47 class ExecutionStep {
48 public:
49     typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
50     typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType;
51 
52     enum OperandKind { INPUT, OUTPUT };
53 
54     ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, std::shared_ptr<Device> device);
55     int addOperation(int operationIndex, const ModelBuilder& fromModel);
56     int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex,
57                    const ModelBuilder& fromModel, OperandKind kind);
58 
59     // Each container entry is of the form (fromModel index, subModel index)
getModelInputs()60     const RemapVectorType& getModelInputs() const {
61         return mModelInputs;
62     }
getModelOutputs()63     const RemapVectorType& getModelOutputs() const {
64         return mModelOutputs;
65     }
getTempsAsSubModelInputs()66     const RemapVectorType& getTempsAsSubModelInputs() const {
67         return mTempsAsSubModelInputs;
68     }
getTempsAsSubModelOutputs()69     const SubModelOutputSetType& getTempsAsSubModelOutputs() const {
70         return mTempsAsSubModelOutputs;
71     }
getOutputsAsSubModelInputs()72     const RemapVectorType& getOutputsAsSubModelInputs() const {
73         return mOutputsAsSubModelInputs;
74     }
getOutputIndexSubModelToFromModel()75     const std::vector<uint32_t>& getOutputIndexSubModelToFromModel() const {
76         return mOutputIndexSubModelToFromModel;
77     }
getOutputsAsSubModelInputsIndexToFromModel()78     const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const {
79         return mOutputsAsSubModelInputsIndexToFromModel;
80     }
81 
recordTempAsSubModelOutput(uint32_t fromModelIndex)82     void recordTempAsSubModelOutput(uint32_t fromModelIndex) {
83         const auto it = mOperandMap.find(fromModelIndex);
84         nnAssert(it != mOperandMap.end());
85         mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second));
86     }
87 
88     // If this step has a submodel output of unknown size, sets
89     // *hasOutputOfUnknownSize to true; otherwise, leaves it
90     // unchanged.
91     int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize,
92                        int32_t executionPreference);
93 
getSubModel()94     const ModelBuilder* getSubModel() const { return &mSubModel; }
getDevice()95     std::shared_ptr<Device> getDevice() const { return mDevice; }
96 
97     // only available after calling finishSubModel()
getPreparedSubModel()98     std::shared_ptr<VersionedIPreparedModel> getPreparedSubModel() const {
99         return mPreparedSubModel;
100     }
101 
102     // Map inputs and outputs from ExecutionBuilder to StepExecutor.
103     void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const;
104 
105     void dump() const;
106 
107     // For test only, get the transformed cache token.
forTest_getCacheToken()108     const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); }
109 
110    private:
111     void logSubModel() const;
112 
113     // TODO: Some of the data is working state information that
114     // shouldn't be needed after we've constructed but not executed
115     // the step.
116 
117     ExecutionPlan* mPlan;
118     uint32_t mIndex;  // index of step within plan
119     ModelBuilder mSubModel;
120     std::shared_ptr<Device> mDevice;
121     std::shared_ptr<VersionedIPreparedModel> mPreparedSubModel;  // not used for CPU
122 
123     // Inputs of original model that are also inputs of this submodel:
124     //     (fromModel index, subModel index)
125     RemapVectorType mModelInputs;
126     // Outputs of original model that are also outputs of this submodel:
127     //     (fromModel index, subModel index)
128     RemapVectorType mModelOutputs;
129     // Temporaries of original model that are inputs of this submodel:
130     //     (fromModel index, subModel index)
131     RemapVectorType mTempsAsSubModelInputs;
132     // Temporaries of original model that are outputs of this submodel:
133     //     (fromModel index, subModel index)
134     SubModelOutputSetType mTempsAsSubModelOutputs;
135     // Outputs of original model that are inputs of this submodel:
136     //     (fromModel index, subModel index)
137     RemapVectorType mOutputsAsSubModelInputs;
138     // Converts operand indexes from the main model to the submodel.
139     std::unordered_map<uint32_t, uint32_t> mOperandMap;
140     // Converts input indexes from the submodel to the main model
141     // (these are input indexes, not operand indexes).  This vector
142     // only describes inputs of the submodel that are also inputs of
143     // the main model -- that is, mModelInputs but not mTempsAsSubModelInputs.
144     std::vector<uint32_t> mInputIndexSubModelToFromModel;
145     // Converts output indexes from the submodel to the main model
146     // (these are output indexes, not operand indexes).  This vector
147     // only describes outputs of the submodel that are also outputs of
148     // the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs.
149     std::vector<uint32_t> mOutputIndexSubModelToFromModel;
150     // Converts indexes into mOutputsAsSubModelInputs to indexes into
151     // main model outputs (these are input and output indexes, not
152     // operand indexes).  To be specific, if the main model outputs
153     // are mainModelOutputs,
154     //
155     //     mOutputsAsSubModelInputsIndexToFromModel.size() ==
156     //     mOutputsAsSubModelInputs.size()
157     //
158     // and when (0 <= i < mOutputsAsSubModelInputs.size()),
159     //
160     //     mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] ==
161     //     mOutputsAsSubModelInputs[i].first
162     std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel;
163 
164     // The compilation caching token.
165     TokenHasher mToken;
166 };
167 
168 class ExecutionPlan {
169 public:
170     ExecutionPlan(const ExecutionPlan&) = delete;
171     ExecutionPlan& operator=(const ExecutionPlan&) = delete;
172 
ExecutionPlan()173     ExecutionPlan() { }
~ExecutionPlan()174     ~ExecutionPlan() { delete mBody; }
175 
176     // Controller is part of the interface to a mechanism for
177     // performing an execution in N steps.
178     //
179     // Usage pattern:
180     // - Instantiate Controller with ExecutionPlan::makeController().
181     // - Call ExecutionPlan::next() on Controller N+1 times.  The first N times,
182     //   *executor is set to point to a new StepExecutor corresponding
183     //   to that step.  The N+1st time, *executor is set to nullptr,
184     //   signifying there are no more steps.
185     // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
186     //   a problem has occurred.
187     class Controller {
188         friend class ExecutionPlan;
189     private:
190         Controller(const Controller&) = delete;
191         Controller& operator=(const Controller&) = delete;
192 
193         // Map from the operand index of a TEMPORARY in the original
194         // model to an offset into mTemporaries used to represent that
195         // TEMPORARY as an inter-partition input or output.
196         typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType;
197 
198         static const size_t kBadStepIndex = ~size_t(0);
199 
200         Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
201                    const BurstBuilder* burstBuilder,
202                    std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs,
203                    uint32_t totalSizeOfTemporaries);
204 
205         const ExecutionPlan* mPlan;
206         ExecutionBuilder* mExecutionBuilder;
207         const BurstBuilder* mBurstBuilder;
208         std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs;  // may be nullptr
209         Memory mTemporaries;
210         size_t mNextStepIndex;
211     };
212 
213     std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;
214 
215     std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
216                                                const BurstBuilder* burstBuilder) const;
217 
218     int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
219              std::shared_ptr<ExecutionBurstController>* burstController = nullptr) const;
220 
221     // Create the same executor as the last one created by next().
222     int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
223 
224     std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);
225 
226     void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
227 
228     int finish(const ModelBuilder* fromModel, int32_t executionPreference);
229 
recordTemporaryDef(uint32_t fromModelIndex,uint32_t stepIndex)230     void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) {
231         auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep;
232         nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0);
233         temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex));
234     }
235 
236     void dump() const;
237 
238     void reset();
239 
isValid()240     bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; }
241 
setCaching(const std::string * cacheDir,const uint8_t * token)242     void setCaching(const std::string* cacheDir, const uint8_t* token) {
243         mCacheDir = cacheDir;
244         mToken = token;
245     }
getCacheDir()246     const std::string* getCacheDir() const { return mCacheDir; }
getCacheToken()247     const uint8_t* getCacheToken() const { return mToken; }
248 
249     // These functions are solely intended for use by unit tests of
250     // the partitioning algorithm.
251     enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND };
252     Kind forTest_getKind() const;
253     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
254     const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
255     bool forTest_hasSubModelOutputsOfUnknownSize() const;
256     const uint8_t* forTest_simpleGetCacheToken() const;
257 
258    private:
259     void findTempsAsSubModelOutputs();
260 
261     struct Body {
~BodyBody262         virtual ~Body() {}
263         virtual void dump() const = 0;
264         virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
265         virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
266         bool mSuccessfulFinish = false;
267     };
268 
269     struct SimpleBody : Body {
SimpleBodySimpleBody270         SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model,
271                    const std::string* cacheDir, const uint8_t* token)
272             : mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}
273 
274         void dump() const override;
275         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
hasSubModelOutputsOfUnknownSizeSimpleBody276         virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
277 
278         std::shared_ptr<Device> mDevice;
279         const ModelBuilder* mModel;
280         std::shared_ptr<VersionedIPreparedModel> mPreparedModel;  // not used for CPU
281 
282         const std::string* mCacheDir;
283         TokenHasher mToken;
284     };
285 
286     struct CompoundBody : Body {
287         void dump() const override;
288         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
hasSubModelOutputsOfUnknownSizeCompoundBody289         virtual bool hasSubModelOutputsOfUnknownSize() const override {
290             return mHasSubModelOutputOfUnknownSize;
291         }
292 
293         // TODO: Some of the data is working state information that
294         // shouldn't be needed after we've constructed but not
295         // executed the plan.
296 
297         std::vector<std::shared_ptr<ExecutionStep>> mSteps;
298 
299         // Map from original operand index to defining step index.
300         // Used for all (and only) TEMPORARY_VARIABLEs.
301         std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep;
302 
303         bool mHasSubModelOutputOfUnknownSize = false;
304     private:
305         void findTempsAsSubModelOutputs();
306     };
307 
308     enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
309     Body* mBody = nullptr;
compound()310     CompoundBody* compound() {
311         nnAssert(mState == COMPOUND);
312         return static_cast<CompoundBody*>(mBody);
313     }
compound()314     const CompoundBody* compound() const {
315         nnAssert(mState == COMPOUND);
316         return static_cast<const CompoundBody*>(mBody);
317     }
318 
319     // Pointers to compilation caching information in CompilationBuilder.
320     const std::string* mCacheDir = nullptr;
321     const uint8_t* mToken = nullptr;
322 };
323 
324 }  // namespace nn
325 }  // namespace android
326 
327 #endif  // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
328