1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
19
20 #include "HalInterfaces.h"
21 #include "OperationsUtils.h"
22 #include "Utils.h"
23
24 #include <algorithm>
25 #include <android-base/macros.h>
26 #include <vector>
27
28 namespace android {
29 namespace nn {
30
31 // Information we maintain about each operand during execution that
32 // may change during execution.
33 struct RunTimeOperandInfo {
34 // TODO Storing the type here is redundant, as it won't change during execution.
35 OperandType type;
36 // The type and dimensions of the operand. The dimensions can
37 // change at runtime. We include the type because it's useful
38 // to pass together with the dimension to the functions implementing
39 // the operators.
40 std::vector<uint32_t> dimensions;
41
42 float scale;
43 int32_t zeroPoint;
44 // Where the operand's data is stored. Check the corresponding
45 // location information in the model to figure out if this points
46 // to memory we have allocated for an temporary operand.
47 uint8_t* buffer;
48 // The length of the buffer.
49 uint32_t length;
50 // Whether this is a temporary variable, a model input, a constant, etc.
51 OperandLifeTime lifetime;
52 // Keeps track of how many operations have yet to make use
53 // of this temporary variable. When the count is decremented to 0,
54 // we free the buffer. For non-temporary variables, this count is
55 // always 0.
56 uint32_t numberOfUsesLeft;
57
shapeRunTimeOperandInfo58 Shape shape() const {
59 return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
60 }
61 };
62
63 // Used to keep a pointer to each of the memory pools.
64 //
65 // In the case of an "mmap_fd" pool, owns the mmap region
66 // returned by getBuffer() -- i.e., that region goes away
67 // when the RunTimePoolInfo is destroyed or is assigned to.
68 class RunTimePoolInfo {
69 public:
70 // If "fail" is not nullptr, and construction fails, then set *fail = true.
71 // If construction succeeds, leave *fail unchanged.
72 // getBuffer() == nullptr IFF construction fails.
73 explicit RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail);
74
75 explicit RunTimePoolInfo(uint8_t* buffer);
76
77 // Implement move
78 RunTimePoolInfo(RunTimePoolInfo&& other);
79 RunTimePoolInfo& operator=(RunTimePoolInfo&& other);
80
81 // Forbid copy
82 RunTimePoolInfo(const RunTimePoolInfo&) = delete;
83 RunTimePoolInfo& operator=(const RunTimePoolInfo&) = delete;
84
~RunTimePoolInfo()85 ~RunTimePoolInfo() { release(); }
86
getBuffer()87 uint8_t* getBuffer() const { return mBuffer; }
88
89 bool update() const;
90
91 private:
92 void release();
93 void moveFrom(RunTimePoolInfo&& other);
94
95 hidl_memory mHidlMemory; // always used
96 uint8_t* mBuffer = nullptr; // always used
97 sp<IMemory> mMemory; // only used when hidlMemory.name() == "ashmem"
98 };
99
100 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
101 const hidl_vec<hidl_memory>& pools);
102
103 // This class is used to execute a model on the CPU.
104 class CpuExecutor {
105 public:
106 // Executes the model. The results will be stored at the locations
107 // specified in the constructor.
108 // The model must outlive the executor. We prevent it from being modified
109 // while this is executing.
110 int run(const V1_0::Model& model, const Request& request,
111 const std::vector<RunTimePoolInfo>& modelPoolInfos,
112 const std::vector<RunTimePoolInfo>& requestPoolInfos);
113 int run(const V1_1::Model& model, const Request& request,
114 const std::vector<RunTimePoolInfo>& modelPoolInfos,
115 const std::vector<RunTimePoolInfo>& requestPoolInfos);
116
117 private:
118 bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
119 const std::vector<RunTimePoolInfo>& requestPoolInfos);
120 // Runs one operation of the graph.
121 int executeOperation(const Operation& entry);
122 // Decrement the usage count for the operands listed. Frees the memory
123 // allocated for any temporary variable with a count of zero.
124 void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
125
126 // The model and the request that we'll execute. Only valid while run()
127 // is being executed.
128 const Model* mModel = nullptr;
129 const Request* mRequest = nullptr;
130
131 // We're copying the list of all the dimensions from the model, as
132 // these may be modified when we run the operatins. Since we're
133 // making a full copy, the indexes used in the operand description
134 // stay valid.
135 // std::vector<uint32_t> mDimensions;
136 // Runtime information about all the operands.
137 std::vector<RunTimeOperandInfo> mOperands;
138 };
139
140 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
141 // the Eigen matrix library.)
142 //
143 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
144 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
145 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
146 // cores enabled throughout inference computation without too much extra power
147 // consumption afterwards.
148 //
149 // The OpenMP settings are thread-local (applying only to worker threads formed
150 // from that thread), see https://software.intel.com/en-us/node/522688 and
151 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
152 // ensures that within the scope in which an object is instantiated we use the
153 // right settings (scopes may be nested), as long as no other library changes
154 // them. (Note that in current NNAPI usage only one instance is used in the
155 // CpuExecutor thread).
156 //
157 // TODO(mikie): consider also setting the number of threads used. Using as many
158 // threads as there are cores results in more variable performance: if we don't
159 // get all cores for our threads, the latency is doubled as we wait for one core
160 // to do twice the amount of work. Reality is complicated though as not all
161 // cores are the same. Decision to be based on benchmarking against a
162 // representative set of workloads and devices. I'm keeping the code here for
163 // reference.
164 class ScopedOpenmpSettings {
165 public:
166 ScopedOpenmpSettings();
167 ~ScopedOpenmpSettings();
168 DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
169 private:
170 int mBlocktimeInitial;
171 #if NNAPI_LIMIT_CPU_THREADS
172 int mMaxThreadsInitial;
173 #endif
174 };
175
176
177 namespace {
178
179 template <typename T>
getScalarData(const RunTimeOperandInfo & info)180 T getScalarData(const RunTimeOperandInfo& info) {
181 // TODO: Check buffer is at least as long as size of data.
182 T* data = reinterpret_cast<T*>(info.buffer);
183 return data[0];
184 }
185
IsNullInput(const RunTimeOperandInfo * input)186 inline bool IsNullInput(const RunTimeOperandInfo *input) {
187 return input->lifetime == OperandLifeTime::NO_VALUE;
188 }
189
NumInputsWithValues(const Operation & operation,std::vector<RunTimeOperandInfo> & operands)190 inline int NumInputsWithValues(const Operation &operation,
191 std::vector<RunTimeOperandInfo> &operands) {
192 const std::vector<uint32_t> &inputs = operation.inputs;
193 return std::count_if(inputs.begin(), inputs.end(),
194 [&operands](uint32_t i) {
195 return !IsNullInput(&operands[i]);
196 });
197 }
198
NumOutputs(const Operation & operation)199 inline int NumOutputs(const Operation &operation) {
200 return operation.outputs.size();
201 }
202
NumDimensions(const RunTimeOperandInfo * operand)203 inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
204 return operand->shape().dimensions.size();
205 }
206
SizeOfDimension(const RunTimeOperandInfo * operand,int i)207 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
208 return operand->shape().dimensions[i];
209 }
210
GetInput(const Operation & operation,std::vector<RunTimeOperandInfo> & operands,int index)211 inline RunTimeOperandInfo *GetInput(const Operation &operation,
212 std::vector<RunTimeOperandInfo> &operands,
213 int index) {
214 return &operands[operation.inputs[index]];
215 }
216
GetOutput(const Operation & operation,std::vector<RunTimeOperandInfo> & operands,int index)217 inline RunTimeOperandInfo *GetOutput(const Operation &operation,
218 std::vector<RunTimeOperandInfo> &operands,
219 int index) {
220 return &operands[operation.outputs[index]];
221 }
222
223 } // anonymous namespace
224
225 } // namespace nn
226 } // namespace android
227
228 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
229