1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_CPU_EXECUTOR_H
18 #define ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_CPU_EXECUTOR_H
19
20 #include <android-base/macros.h>
21 #include <nnapi/Types.h>
22
23 #include <algorithm>
24 #include <memory>
25 #include <optional>
26 #include <vector>
27
28 #include "ControlFlow.h"
29 #include "LegacyUtils.h"
30 #include "OperationResolver.h"
31 #include "OperationsExecutionUtils.h"
32
33 namespace android {
34 namespace nn {
35
36 // Information we maintain about each operand during execution that
37 // may change during execution.
38 struct RunTimeOperandInfo {
39 // TODO Storing the type here is redundant, as it won't change during execution.
40 OperandType type;
41 // The type and dimensions of the operand. The dimensions can
42 // change at runtime. We include the type because it's useful
43 // to pass together with the dimension to the functions implementing
44 // the operators.
45 //
46 // A dimension being zero has different meanings for different operands at different stages:
47 // - Model inputs:
48 // * Specified in model: implies "dynamic", and must be fully-specified in request.
49 // * Specified in request: illegal.
50 // - Constant operands: illegal.
51 // - Model outputs and internal operands:
52 // * Before evaluation: implies unknown and to be deduced from execution.
53 // * After evaluation:
54 // - If isSufficient reports true: the tensor is zero-sized.
55 // - Otherwise: implies unknown.
56 std::vector<uint32_t> dimensions;
57
58 float scale;
59 int32_t zeroPoint;
60 // Where the operand's data is stored. Check the corresponding
61 // location information in the model to figure out if this points
62 // to memory we have allocated for an temporary operand.
63 uint8_t* buffer; // TODO(b/148273353): Change the type to void*.
64 // The length of the buffer.
65 uint32_t length;
66 // Whether this is a temporary variable, a model input, a constant, etc.
67 Operand::LifeTime lifetime;
68 // Keeps track of how many operations have yet to make use
69 // of this temporary variable. When the count is decremented to 0,
70 // we free the buffer. For non-temporary variables, this count is
71 // always 0.
72 uint32_t numberOfUsesLeft;
73
74 Operand::ExtraParams extraParams;
75
shapeRunTimeOperandInfo76 Shape shape() const {
77 return {
78 .type = type,
79 .dimensions = dimensions,
80 .scale = scale,
81 .offset = zeroPoint,
82 .extraParams = extraParams,
83 };
84 }
85
isSufficientRunTimeOperandInfo86 bool isSufficient() const {
87 if (isExtension(type)) {
88 // We don't know sizes of extension types.
89 return true;
90 }
91 return length >= nonExtensionOperandSizeOfData(type, dimensions);
92 }
93 };
94
95 // Used to keep a pointer to each of the memory pools.
96 //
97 // RunTimePoolInfo references a region of memory. Other RunTimePoolInfo objects
98 // may reference the same region of memory by either:
99 // (1) copying an existing RunTimePoolInfo object, or
100 // (2) creating multiple RunTimePoolInfo objects from the same memory resource
101 // (e.g., "createFromMemory" or "createFromExistingBuffer")
102 //
103 // If the underlying region of memory is mapped by "createFromMemory", the
104 // mapping will be sustained until it is no longer referenced by any
105 // RunTimePoolInfo objects.
106 class RunTimePoolInfo {
107 public:
108 static std::optional<RunTimePoolInfo> createFromMemory(const SharedMemory& memory);
109 static RunTimePoolInfo createFromExistingBuffer(uint8_t* buffer, uint32_t size = 0);
110
111 uint8_t* getBuffer() const;
112 bool flush() const;
113 const SharedMemory& getMemory() const;
114 uint32_t getSize() const;
115
116 private:
117 class RunTimePoolInfoImpl;
118 RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl);
119
120 std::shared_ptr<const RunTimePoolInfoImpl> mImpl;
121 };
122
123 bool setRunTimePoolInfosFromCanonicalMemories(std::vector<RunTimePoolInfo>* poolInfos,
124 const std::vector<SharedMemory>& pools);
125
126 bool setRunTimePoolInfosFromMemoryPools(std::vector<RunTimePoolInfo>* poolInfos,
127 const std::vector<Request::MemoryPool>& pools);
128
129 // This class is used to execute a model on the CPU.
130 class CpuExecutor {
131 public:
132 // This constructor allows clients of CpuExecutor to provide custom CPU
133 // operation implementations. It is used by a sample driver to test
134 // extension support.
135 //
136 // Note that it is not possible to provide custom CPU implementations for
137 // non-OperationResolver operations (b/124041202).
138 //
139 // The operation resolver must outlive the executor.
CpuExecutor(const IOperationResolver * operationResolver)140 explicit CpuExecutor(const IOperationResolver* operationResolver)
141 : mOperationResolver(operationResolver) {}
142
CpuExecutor()143 CpuExecutor() : CpuExecutor(BuiltinOperationResolver::get()) {}
144
145 // Executes the model. The results will be stored at the locations
146 // specified in the constructor.
147 // The model must outlive the executor. We prevent it from being modified
148 // while this is executing.
149 int run(const Model& model, const Request& request,
150 const std::vector<RunTimePoolInfo>& modelPoolInfos,
151 const std::vector<RunTimePoolInfo>& requestPoolInfos);
152
getOutputShapes()153 const std::vector<OutputShape>& getOutputShapes() const {
154 CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor.";
155 return mOutputShapes;
156 }
157
setDeadline(const TimePoint & deadline)158 void setDeadline(const TimePoint& deadline) { mDeadline = deadline; }
setLoopTimeout(uint64_t duration)159 void setLoopTimeout(uint64_t duration) { mLoopTimeoutDuration = duration; }
160
161 private:
162 // Creates runtime info from what's in the model.
163 std::vector<RunTimeOperandInfo> initializeRunTimeInfo(const Model::Subgraph& subgraph);
164 // Adjusts the runtime info for the arguments passed to the model,
165 // modifying the buffer location, and possibly the dimensions.
166 void updateForArguments(const std::vector<uint32_t>& indexes,
167 const std::vector<Request::Argument>& arguments,
168 const std::vector<RunTimePoolInfo>& requestPoolInfos,
169 RunTimeOperandInfo* operands);
170 // Runs one subgraph.
171 int executeSubgraph(const Model::Subgraph& subgraph, RunTimeOperandInfo* operands);
172 // Runs one operation of the graph.
173 int executeOperation(const Operation& operation, RunTimeOperandInfo* operands);
174 int executeIfOperation(const Operation& operation, RunTimeOperandInfo* operands);
175 int executeWhileOperation(const Operation& operation, RunTimeOperandInfo* operands);
176
177 void setOutputShapes(const std::vector<uint32_t>& outputIndexes,
178 const std::vector<RunTimeOperandInfo>& operands);
179
180 // Compile-time operand value information used by initializeRunTimeInfo.
181 // The fields are only valid while run() is being executed.
182 const uint8_t* mModelOperandValues = nullptr;
183 const std::vector<RunTimePoolInfo>* mModelPoolInfos = nullptr;
184 const std::vector<Model::Subgraph>* mReferencedSubgraphs = nullptr;
185
186 // The output operand shapes returning to the runtime.
187 std::vector<OutputShape> mOutputShapes;
188
189 // Whether execution is finished and mOutputShapes is ready
190 bool mFinished = false;
191
192 // The deadline hint for the maximum amount of time the client expects the
193 // execution will take. If this deadline is exceeded, the CpuExecutor will
194 // abort the execution if there are remaining ops to execute.
195 OptionalTimePoint mDeadline;
196
197 // The maximum amount of time in nanoseconds that can be spent executing a
198 // WHILE loop.
199 uint64_t mLoopTimeoutDuration = operation_while::kTimeoutNsDefault;
200
201 [[maybe_unused]] const IOperationResolver* mOperationResolver;
202 };
203
204 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
205 // the Eigen matrix library.)
206 //
207 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
208 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
209 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
210 // cores enabled throughout inference computation without too much extra power
211 // consumption afterwards.
212 //
213 // The OpenMP settings are thread-local (applying only to worker threads formed
214 // from that thread), see https://software.intel.com/en-us/node/522688 and
215 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
216 // ensures that within the scope in which an object is instantiated we use the
217 // right settings (scopes may be nested), as long as no other library changes
218 // them. (Note that in current NNAPI usage only one instance is used in the
219 // CpuExecutor thread).
220 //
221 // TODO(mikie): consider also setting the number of threads used. Using as many
222 // threads as there are cores results in more variable performance: if we don't
223 // get all cores for our threads, the latency is doubled as we wait for one core
224 // to do twice the amount of work. Reality is complicated though as not all
225 // cores are the same. Decision to be based on benchmarking against a
226 // representative set of workloads and devices. I'm keeping the code here for
227 // reference.
228 // b/109953668, disable OpenMP
229 #ifdef NNAPI_OPENMP
230 class ScopedOpenmpSettings {
231 public:
232 ScopedOpenmpSettings();
233 ~ScopedOpenmpSettings();
234 DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
235
236 private:
237 int mBlocktimeInitial;
238 #if NNAPI_LIMIT_CPU_THREADS
239 int mMaxThreadsInitial;
240 #endif
241 };
242 #endif // NNAPI_OPENMP
243
244 namespace {
245
246 template <typename T>
getScalarData(const RunTimeOperandInfo & info)247 T getScalarData(const RunTimeOperandInfo& info) {
248 CHECK_GE(info.length, sizeof(T)) << "Cannot get scalar data: buffer too short";
249 T* data = reinterpret_cast<T*>(info.buffer);
250 return data[0];
251 }
252
253 template <typename T>
getScalarDataWithDefault(const RunTimeOperandInfo & info,T defaultValue)254 T getScalarDataWithDefault(const RunTimeOperandInfo& info, T defaultValue) {
255 if (info.length < sizeof(T)) {
256 return defaultValue;
257 }
258 return getScalarData<T>(info);
259 }
260
IsNullInput(const RunTimeOperandInfo * input)261 inline bool IsNullInput(const RunTimeOperandInfo* input) {
262 return input->lifetime == Operand::LifeTime::NO_VALUE;
263 }
264
NumInputsWithValues(const Operation & operation,const RunTimeOperandInfo * operands)265 inline int NumInputsWithValues(const Operation& operation, const RunTimeOperandInfo* operands) {
266 const std::vector<uint32_t>& inputs = operation.inputs;
267 return std::count_if(inputs.begin(), inputs.end(),
268 [&operands](uint32_t i) { return !IsNullInput(&operands[i]); });
269 }
270
NumOutputs(const Operation & operation)271 inline int NumOutputs(const Operation& operation) {
272 return operation.outputs.size();
273 }
274
NumDimensions(const RunTimeOperandInfo * operand)275 inline size_t NumDimensions(const RunTimeOperandInfo* operand) {
276 return operand->shape().dimensions.size();
277 }
278
SizeOfDimension(const RunTimeOperandInfo * operand,int i)279 inline uint32_t SizeOfDimension(const RunTimeOperandInfo* operand, int i) {
280 return operand->shape().dimensions[i];
281 }
282
GetInput(const Operation & operation,RunTimeOperandInfo * operands,int index)283 inline RunTimeOperandInfo* GetInput(const Operation& operation, RunTimeOperandInfo* operands,
284 int index) {
285 return &operands[operation.inputs[index]];
286 }
287
GetOutput(const Operation & operation,RunTimeOperandInfo * operands,int index)288 inline RunTimeOperandInfo* GetOutput(const Operation& operation, RunTimeOperandInfo* operands,
289 int index) {
290 return &operands[operation.outputs[index]];
291 }
292
293 } // anonymous namespace
294
295 } // namespace nn
296 } // namespace android
297
298 #endif // ANDROID_PACKAGES_MODULES_NEURALNETWORKS_COMMON_CPU_EXECUTOR_H
299