1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
19 
20 #include "Callbacks.h"
21 #include "HalInterfaces.h"
22 #include "Memory.h"
23 #include "ModelBuilder.h"
24 #include "NeuralNetworks.h"
25 
26 #include <unordered_map>
27 #include <vector>
28 
29 using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
30 using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
31 
32 namespace android {
33 namespace nn {
34 
35 class CompilationBuilder;
36 class ExecutionPlan;
37 class Memory;
38 class ModelBuilder;
39 class StepExecutor;
40 class VersionedIDevice;
41 
42 // TODO move length out of DataLocation
43 struct ModelArgumentInfo {
44     // Whether the argument was specified as being in a Memory, as a pointer,
45     // has no value, or has not been specified.
46     // If POINTER then:
47     //   locationAndLength.length is valid.
48     //   dimensions is valid.
49     //   buffer is valid
50     // If MEMORY then:
51     //   locationAndLength.{poolIndex, offset, length} is valid.
52     //   dimensions is valid.
53     enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
54     DataLocation locationAndLength;
55     std::vector<uint32_t> dimensions;
56     void* buffer;
57 
58     int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
59                        uint32_t length);
60     int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
61                       uint32_t poolIndex, uint32_t offset, uint32_t length);
62     int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset);
63     int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
64 };
65 
66 class ExecutionBuilder {
67     friend class StepExecutor;
68 public:
69     ExecutionBuilder(const CompilationBuilder* compilation);
70 
71     int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
72                  size_t length);
73     int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
74                            const Memory* memory, size_t offset, size_t length);
75     int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
76                   size_t length);
77     int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
78                             const Memory* memory, size_t offset, size_t length);
79     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
80 
getModel()81     const ModelBuilder* getModel() const { return mModel; }
82 
83 private:
84     const ModelBuilder* mModel;
85     const ExecutionPlan* mPlan;
86 
87     // This is a DeviceManager::kPartitioning* value captured from
88     // CompilationBuilder when the ExecutionBuilder is constructed.
89     uint32_t mPartitioning;
90 
91     // The information we'll send to the driver about the inputs and outputs.
92     // Note that we build this in two steps:
93     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
94     //    If set from a pointer, don't set the location in the RequestArgument but store it
95     //    instead in mInputBuffers or mOutputBuffers.
96     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
97     //    the m*Buffers entries.  Copy the input values into the shared memory.
98     // We do this to avoid creating a lot of shared memory objects if we have a lot of
99     // parameters specified via pointers.  We also avoid copying in the case where
100     // some of the nodes will interpreted on the CPU anyway.
101     std::vector<ModelArgumentInfo> mInputs;
102     std::vector<ModelArgumentInfo> mOutputs;
103     MemoryTracker mMemories;
104 };
105 
106 // class StepExecutor is used to execute a single "step" in a
107 // potentially multiple step execution process.  The graph associated
108 // with that step is executed in its entirety on a single device (or
109 // on the CPU).
110 class StepExecutor {
111 public:
112     // executionBuilder
113     //     Describes the full (possibly multiple-"step") execution.
114     // model
115     //     The model to be executed by the executor.  Possibly a
116     //     submodel of the model from executionBuilder.
117     // driver, preparedModel
118     //     The device on which to execute the "step", and the prepared
119     //     model to execute on that device.  (Both are nullptr in the
120     //     case of CPU.)
121     StepExecutor(const ExecutionBuilder* executionBuilder,
122                  const ModelBuilder* model,
123                  VersionedIDevice* driver, sp<IPreparedModel> preparedModel);
124 
125     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
126     // in the case where we have a single-"step" execution (i.e., the executor
127     // is executing the entire model from the ExecutionBuilder).
128     void mapInputsAndOutputsTrivially();
129 
130     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
131     // one at a time.  Note that these are input/output indexes, not
132     // operand indexes.
mapInput(uint32_t builderIndex,uint32_t executorIndex)133     void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
134         mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
135                          &mInputs[executorIndex]);
136     }
mapOutput(uint32_t builderIndex,uint32_t executorIndex)137     void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
138         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
139                          &mOutputs[executorIndex]);
140     }
mapOutputToInput(uint32_t builderIndex,uint32_t executorIndex)141     void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) {
142         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
143                          &mInputs[executorIndex]);
144     }
145 
146     // The input or output is assumed to have the size of the
147     // corresponding operand.
setInputFromTemporaryMemory(uint32_t inputIndex,const Memory * memory,uint32_t offset)148     int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
149         return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
150                                                    memory, offset,
151                                                    &mInputs.at(inputIndex));
152     }
setOutputFromTemporaryMemory(uint32_t outputIndex,const Memory * memory,uint32_t offset)153     int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
154         return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
155                                                    memory, offset,
156                                                    &mOutputs.at(outputIndex));
157     }
158 
159     // Executes using the (driver, preparedModel) specified at construction time.
160     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
161 
162     // Executes using the CPU, regardless of the (driver,
163     // preparedModel) specified at construction time.
164     int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
165 
isCpu()166     bool isCpu() const { return mDriver == nullptr; }
167 
168 private:
169     int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
170     int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
171 
172     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
173                           ModelArgumentInfo* executorInputOrOutput);
174 
175     int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
176                                             const Memory* memory, uint32_t offset,
177                                             ModelArgumentInfo* inputOrOutputInfo);
178 
179     // describes the full (possibly multiple-"step") execution
180     const ExecutionBuilder* mExecutionBuilder;
181 
182     // model to be executed on the executor, in both original and
183     // compiled forms; and device on which to execute it
184     const ModelBuilder* mModel;
185     VersionedIDevice* mDriver;          // nullptr if CPU execution
186     sp<IPreparedModel> mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
187 
188     // The information we'll send to the driver about the inputs and outputs.
189     // Note that we build this in two steps:
190     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
191     //    If set from a pointer, don't set the location in the RequestArgument but store it
192     //    instead in mInputBuffers or mOutputBuffers.
193     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
194     //    the m*Buffers entries.  Copy the input values into the shared memory.
195     // We do this to avoid creating a lot of shared memory objects if we have a lot of
196     // parameters specified via pointers.  We also avoid copying in the case where
197     // some of the nodes will interpreted on the CPU anyway.
198     std::vector<ModelArgumentInfo> mInputs;
199     std::vector<ModelArgumentInfo> mOutputs;
200     MemoryTracker mMemories;
201 };
202 
203 } // namespace nn
204 } // namespace android
205 
206 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
207