1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "ExecutionBuilder"
18
19 #include "ExecutionBuilder.h"
20
21 #include "CompilationBuilder.h"
22 #include "CpuExecutor.h"
23 #include "ExecutionBurstController.h"
24 #include "HalInterfaces.h"
25 #include "Manager.h"
26 #include "ModelBuilder.h"
27 #include "Tracing.h"
28 #include "TypeManager.h"
29 #include "Utils.h"
30
31 #include <mutex>
32 #include <optional>
33 #include <thread>
34 #include <vector>
35
36 namespace android {
37 namespace nn {
38
39 using HidlToken = hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
40
41 const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
42
measureTiming(const ExecutionBuilder * execution)43 static MeasureTiming measureTiming(const ExecutionBuilder* execution) {
44 return execution->measureTiming() ? MeasureTiming::YES : MeasureTiming::NO;
45 }
46
checkDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType,const char * tag,bool allowUnspecified)47 static bool checkDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType,
48 const char* tag, bool allowUnspecified) {
49 if (newType != nullptr) {
50 const Extension::OperandTypeInformation* info = nullptr;
51 if (isExtensionOperandType(operand.type)) {
52 NN_RET_CHECK(TypeManager::get()->getExtensionOperandTypeInfo(operand.type, &info));
53 }
54 if (validateOperandType(*newType, info, tag, allowUnspecified) !=
55 ANEURALNETWORKS_NO_ERROR) {
56 LOG(ERROR) << tag << ": Invalid newType";
57 return false;
58 }
59 if (operand.dimensions.size() == 0) {
60 return true;
61 }
62 if (operand.dimensions.size() != newType->dimensionCount) {
63 LOG(ERROR) << tag << ": Setting with incompatible dimension count";
64 return false;
65 }
66 for (uint32_t i = 0; i < newType->dimensionCount; i++) {
67 if (operand.dimensions[i] != newType->dimensions[i] && operand.dimensions[i] != 0) {
68 LOG(ERROR) << tag << ": Overriding a fully specified dimension is disallowed";
69 return false;
70 }
71 }
72 } else {
73 if (!allowUnspecified && TypeManager::get()->isTensorType(operand.type) &&
74 tensorHasUnspecifiedDimensions(operand)) {
75 LOG(ERROR) << tag << ": Setting with operand type that is not fully specified";
76 return false;
77 }
78 }
79 return true;
80 }
81
setFromPointer(const Operand & operand,const ANeuralNetworksOperandType * type,void * data,uint32_t length)82 int ModelArgumentInfo::setFromPointer(const Operand& operand,
83 const ANeuralNetworksOperandType* type, void* data,
84 uint32_t length) {
85 if ((data == nullptr) != (length == 0)) {
86 const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR";
87 LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = "
88 << dataPtrMsg << ", length = " << length << ")";
89 return ANEURALNETWORKS_BAD_DATA;
90 }
91 if (data == nullptr) {
92 state = ModelArgumentInfo::HAS_NO_VALUE;
93 } else {
94 NN_RETURN_IF_ERROR(updateDimensionInfo(operand, type));
95 if (operand.type != OperandType::OEM) {
96 uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
97 if (neededLength != length && neededLength != 0) {
98 LOG(ERROR) << "Setting argument with invalid length: " << length
99 << ", expected length: " << neededLength;
100 return ANEURALNETWORKS_BAD_DATA;
101 }
102 }
103 state = ModelArgumentInfo::POINTER;
104 }
105 buffer = data;
106 locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
107 return ANEURALNETWORKS_NO_ERROR;
108 }
109
setFromMemory(const Operand & operand,const ANeuralNetworksOperandType * type,uint32_t poolIndex,uint32_t offset,uint32_t length)110 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
111 uint32_t poolIndex, uint32_t offset, uint32_t length) {
112 NN_RETURN_IF_ERROR(updateDimensionInfo(operand, type));
113 if (operand.type != OperandType::OEM) {
114 uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
115 if (neededLength != length && neededLength != 0) {
116 LOG(ERROR) << "Setting argument with invalid length: " << length
117 << ", expected length: " << neededLength;
118 return ANEURALNETWORKS_BAD_DATA;
119 }
120 }
121
122 state = ModelArgumentInfo::MEMORY;
123 locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
124 buffer = nullptr;
125 return ANEURALNETWORKS_NO_ERROR;
126 }
127
setFromTemporaryMemory(const Operand & operand,uint32_t poolIndex,uint32_t offset,uint32_t length)128 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex,
129 uint32_t offset, uint32_t length) {
130 NN_RETURN_IF_ERROR(updateDimensionInfo(operand, nullptr));
131 if (operand.type != OperandType::OEM) {
132 uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
133 if (neededLength != length) {
134 LOG(ERROR) << "Setting argument with invalid length: " << length
135 << ", expected length: " << neededLength;
136 return ANEURALNETWORKS_BAD_DATA;
137 }
138 }
139
140 state = ModelArgumentInfo::MEMORY;
141 locationAndLength = {
142 .poolIndex = poolIndex,
143 .offset = offset,
144 .length = length,
145 };
146 buffer = nullptr;
147 return ANEURALNETWORKS_NO_ERROR;
148 }
149
updateDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType)150 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
151 const ANeuralNetworksOperandType* newType) {
152 if (newType == nullptr) {
153 dimensions = operand.dimensions;
154 } else {
155 const uint32_t count = newType->dimensionCount;
156 dimensions = hidl_vec<uint32_t>(count);
157 std::copy(&newType->dimensions[0], &newType->dimensions[count], dimensions.begin());
158 }
159 return ANEURALNETWORKS_NO_ERROR;
160 }
161
ExecutionBuilder(const CompilationBuilder * compilation)162 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation)
163 : mCompilation(compilation),
164 mModel(compilation->mModel),
165 mPlan(&compilation->mPlan),
166 mPartitioning(compilation->mPartitioning),
167 mInputs(mModel->inputCount()),
168 mOutputs(mModel->outputCount()) {
169 VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
170 }
171
setInput(uint32_t index,const ANeuralNetworksOperandType * type,const void * buffer,size_t length)172 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
173 const void* buffer, size_t length) {
174 if (mStarted) {
175 LOG(ERROR) << "ANeuralNetworksExecution_setInput called after the "
176 "execution has started.";
177 return ANEURALNETWORKS_BAD_STATE;
178 }
179 uint32_t count = static_cast<uint32_t>(mInputs.size());
180 if (index >= count) {
181 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
182 return ANEURALNETWORKS_BAD_DATA;
183 }
184 if (!checkDimensionInfo(mModel->getInputOperand(index), type,
185 "ANeuralNetworksExecution_setInput", buffer == nullptr)) {
186 return ANEURALNETWORKS_BAD_DATA;
187 }
188 if (length > 0xFFFFFFFF) {
189 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
190 return ANEURALNETWORKS_BAD_DATA;
191 }
192 uint32_t l = static_cast<uint32_t>(length);
193 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
194 const_cast<void*>(buffer), l);
195 }
196
setInputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)197 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
198 const Memory* memory, size_t offset, size_t length) {
199 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
200
201 if (mStarted) {
202 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory called after the "
203 "execution has started.";
204 return ANEURALNETWORKS_BAD_STATE;
205 }
206 uint32_t count = static_cast<uint32_t>(mInputs.size());
207 if (index >= count) {
208 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
209 << count;
210 return ANEURALNETWORKS_BAD_DATA;
211 }
212 if (!checkDimensionInfo(mModel->getInputOperand(index), type,
213 "ANeuralNetworksExecution_setInputFromMemory", false)) {
214 return ANEURALNETWORKS_BAD_DATA;
215 }
216 // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
217 if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
218 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory has non-zero offset and length"
219 << " for Non-BLOB format AHardwareBuffer.";
220 return ANEURALNETWORKS_BAD_DATA;
221 } else if (!memory->validateSize(offset, length)) {
222 return ANEURALNETWORKS_BAD_DATA;
223 }
224 // TODO validate the rest
225 uint32_t poolIndex = mMemories.add(memory);
226 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
227 length);
228 }
229
setOutput(uint32_t index,const ANeuralNetworksOperandType * type,void * buffer,size_t length)230 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type,
231 void* buffer, size_t length) {
232 if (mStarted) {
233 LOG(ERROR) << "ANeuralNetworksExecution_setOutput called after the "
234 "execution has started.";
235 return ANEURALNETWORKS_BAD_STATE;
236 }
237 uint32_t count = static_cast<uint32_t>(mOutputs.size());
238 if (index >= count) {
239 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
240 return ANEURALNETWORKS_BAD_DATA;
241 }
242 if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
243 "ANeuralNetworksExecution_setOutput", true)) {
244 return ANEURALNETWORKS_BAD_DATA;
245 }
246 if (length > 0xFFFFFFFF) {
247 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
248 return ANEURALNETWORKS_BAD_DATA;
249 }
250 uint32_t l = static_cast<uint32_t>(length);
251 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
252 }
253
setOutputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)254 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
255 const Memory* memory, size_t offset, size_t length) {
256 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
257
258 if (mStarted) {
259 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called after the "
260 "execution has started.";
261 return ANEURALNETWORKS_BAD_STATE;
262 }
263 uint32_t count = static_cast<uint32_t>(mOutputs.size());
264 if (index >= count) {
265 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
266 << count;
267 return ANEURALNETWORKS_BAD_DATA;
268 }
269 if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
270 "ANeuralNetworksExecution_setOutputFromMemory", true)) {
271 return ANEURALNETWORKS_BAD_DATA;
272 }
273 // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
274 if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
275 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory has non-zero offset and length"
276 << " for Non-BLOB format AHardwareBuffer.";
277 return ANEURALNETWORKS_BAD_DATA;
278 } else if (!memory->validateSize(offset, length)) {
279 return ANEURALNETWORKS_BAD_DATA;
280 }
281 // TODO validate the rest
282 uint32_t poolIndex = mMemories.add(memory);
283 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
284 length);
285 }
286
setMeasureTiming(bool measure)287 int ExecutionBuilder::setMeasureTiming(bool measure) {
288 if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
289 LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called on "
290 << "an ANeuralNetworksExecution created from an ANeuralNetworksCompilation "
291 << "that was not created by ANeuralNetworksCompilation_createForDevices "
292 << "with numDevices = 1";
293 return ANEURALNETWORKS_BAD_DATA;
294 }
295 if (mStarted) {
296 LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called after the "
297 "execution has started.";
298 return ANEURALNETWORKS_BAD_STATE;
299 }
300 mMeasureTiming = measure;
301 return ANEURALNETWORKS_NO_ERROR;
302 }
303
getDuration(int32_t durationCode,uint64_t * duration) const304 int ExecutionBuilder::getDuration(int32_t durationCode, uint64_t* duration) const {
305 if (!mFinished) {
306 LOG(ERROR) << "ANeuralNetworksExecution_getDuration called before the "
307 "execution has finished.";
308 return ANEURALNETWORKS_BAD_STATE;
309 }
310
311 // NOTE: At the HAL level, timing is in microseconds. At the NDK level, nanoseconds.
312 const uint64_t kNanoPerMicro = 1000;
313
314 if (!mMeasureTiming) {
315 *duration = UINT64_MAX;
316 return ANEURALNETWORKS_BAD_STATE;
317 }
318
319 uint64_t microDuration = UINT64_MAX;
320 switch (durationCode) {
321 case ANEURALNETWORKS_DURATION_ON_HARDWARE:
322 microDuration = mTiming.timeOnDevice;
323 break;
324 case ANEURALNETWORKS_DURATION_IN_DRIVER:
325 microDuration = mTiming.timeInDriver;
326 break;
327 default:
328 CHECK(!"unexpected");
329 }
330 *duration = (microDuration == UINT64_MAX) ? UINT64_MAX : kNanoPerMicro * microDuration;
331
332 VLOG(EXECUTION) << "getDuration(" << durationCode << "): " << *duration;
333 return ANEURALNETWORKS_NO_ERROR;
334 }
335
getOutputOperandDimensions(uint32_t index,uint32_t * dimensions)336 int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
337 if (!mFinished) {
338 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
339 "execution has finished.";
340 return ANEURALNETWORKS_BAD_STATE;
341 }
342 uint32_t count = static_cast<uint32_t>(mOutputs.size());
343 if (index >= count) {
344 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions bad index " << index
345 << " " << count;
346 return ANEURALNETWORKS_BAD_DATA;
347 }
348 const auto& dims = mOutputs[index].dimensions;
349 if (dims.empty()) {
350 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions can not query "
351 "dimensions of a scalar";
352 return ANEURALNETWORKS_BAD_DATA;
353 }
354 std::copy(dims.begin(), dims.end(), dimensions);
355 return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
356 : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
357 }
358
getOutputOperandRank(uint32_t index,uint32_t * rank)359 int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) {
360 if (!mFinished) {
361 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called before the "
362 "execution has finished.";
363 return ANEURALNETWORKS_BAD_STATE;
364 }
365 uint32_t count = static_cast<uint32_t>(mOutputs.size());
366 if (index >= count) {
367 LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank bad index " << index << " "
368 << count;
369 return ANEURALNETWORKS_BAD_DATA;
370 }
371 *rank = static_cast<uint32_t>(mOutputs[index].dimensions.size());
372 return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
373 : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
374 }
375
376 // Attempt synchronous execution of full model on CPU.
377 // Ensure that executionCallback->notify() is called.
378 // TODO: How should we handle timing in this case?
379 // For Q this is irrelevant: We only support timing in conjunction
380 // with an explicit device list; and we do not support CPU fallback
381 // with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
cpuFallbackFull(ExecutionBuilder * executionBuilder,const sp<ExecutionCallback> & executionCallback)382 static void cpuFallbackFull(ExecutionBuilder* executionBuilder,
383 const sp<ExecutionCallback>& executionCallback) {
384 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
385 VLOG(EXECUTION) << "cpuFallbackFull";
386 StepExecutor executor(executionBuilder, executionBuilder->getModel(),
387 DeviceManager::getCpuDevice(), /*preparedModel=*/nullptr);
388 executor.mapInputsAndOutputsTrivially();
389 sp<ExecutionCallback> fallbackCallback;
390 int n = executor.startCompute(&fallbackCallback);
391 if (n != ANEURALNETWORKS_NO_ERROR) {
392 executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
393 return;
394 }
395 fallbackCallback->wait();
396 executionCallback->notify(fallbackCallback->getStatus(), fallbackCallback->getOutputShapes(),
397 fallbackCallback->getTiming());
398 }
399
400 // Attempt synchronous execution on CPU.
401 // (1) First, attempt to execute this step on CPU. If successful,
402 // return true. (Do not call executionCallback->notify().)
403 // (2) If unsuccessful, attempt to execute the full model on CPU,
404 // ensure that executionCallback->notify() is called, and return
405 // false.
406 // TODO: How should we handle timing in this case?
407 // For Q this is irrelevant: We only support timing in conjunction
408 // with an explicit device list; and we do not support CPU fallback
409 // with an explicit device list. See CompilationBuilder::mExplicitDeviceList.
cpuFallbackPartial(ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,const sp<ExecutionCallback> & executionCallback,std::vector<OutputShape> * outputShapes)410 static bool cpuFallbackPartial(ExecutionBuilder* executionBuilder, const ExecutionPlan* plan,
411 std::shared_ptr<ExecutionPlan::Controller> controller,
412 const sp<ExecutionCallback>& executionCallback,
413 std::vector<OutputShape>* outputShapes) {
414 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
415 VLOG(EXECUTION) << "cpuFallbackPartial";
416 std::shared_ptr<StepExecutor> executor;
417 int n = plan->fallback(controller, &executor);
418 if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
419 cpuFallbackFull(executionBuilder, executionCallback);
420 return false;
421 }
422 sp<ExecutionCallback> fallbackCallback;
423 if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
424 cpuFallbackFull(executionBuilder, executionCallback);
425 return false;
426 }
427 fallbackCallback->wait();
428 ErrorStatus status = fallbackCallback->getStatus();
429 const auto& stepOutputShapes = fallbackCallback->getOutputShapes();
430 if (!executor->updateOutputShapes(stepOutputShapes, outputShapes)) {
431 status = ErrorStatus::GENERAL_FAILURE;
432 }
433 if (status != ErrorStatus::NONE) {
434 // OUTPUT_INSUFFICIENT_SIZE is not recoverable
435 if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
436 executionCallback->notify(status, *outputShapes, kNoTiming);
437 } else {
438 cpuFallbackFull(executionBuilder, executionCallback);
439 }
440 return false;
441 }
442 return true;
443 }
444
asyncStartComputePartitioned(ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,bool allowFallback,const sp<ExecutionCallback> & executionCallback)445 static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
446 const ExecutionPlan* plan,
447 std::shared_ptr<ExecutionPlan::Controller> controller,
448 bool allowFallback,
449 const sp<ExecutionCallback>& executionCallback) {
450 VLOG(EXECUTION) << "ExecutionBuilder::compute (from plan, iteratively)";
451 std::vector<OutputShape> outputShapes;
452 Timing timing = kNoTiming;
453 executionBuilder->initializeOutputShapes(&outputShapes);
454 while (true) {
455 std::shared_ptr<StepExecutor> executor;
456 VLOG(EXECUTION) << "looking for next StepExecutor";
457 std::shared_ptr<ExecutionBurstController> burstController = nullptr;
458 int n = plan->next(controller, &executor, &burstController);
459 if (n != ANEURALNETWORKS_NO_ERROR) {
460 if (allowFallback) {
461 cpuFallbackFull(executionBuilder, executionCallback);
462 } else {
463 executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
464 }
465 return;
466 }
467 if (executor == nullptr) {
468 executionCallback->notify(ErrorStatus::NONE, outputShapes, timing);
469 return;
470 }
471
472 sp<ExecutionCallback> stepCallback;
473 n = executor->startCompute(&stepCallback, burstController);
474 if (n != ANEURALNETWORKS_NO_ERROR) {
475 if (allowFallback) {
476 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
477 &outputShapes)) {
478 // Successfully executed one step on CPU.
479 continue;
480 } else {
481 // Either successfully executed entire plan on
482 // CPU, or tried and failed to do so.
483 return;
484 }
485 } else {
486 executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
487 return;
488 }
489 }
490 stepCallback->wait();
491 ErrorStatus status = stepCallback->getStatus();
492 const auto& stepOutputShapes = stepCallback->getOutputShapes();
493 if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
494 status = ErrorStatus::GENERAL_FAILURE;
495 }
496 if (status == ErrorStatus::NONE) {
497 // We only support collection of timing information in the case of a
498 // single step, so it's safe to just keep track of the last step's
499 // timing information.
500 timing = stepCallback->getTiming();
501 } else {
502 // OUTPUT_INSUFFICIENT_SIZE is not recoverable
503 if (allowFallback && status != ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
504 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
505 &outputShapes)) {
506 // Successfully executed one step on CPU.
507 continue;
508 } else {
509 // Either successfully executed entire plan on
510 // CPU, or tried and failed to do so.
511 return;
512 }
513 } else if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
514 executionCallback->notify(status, outputShapes, kNoTiming);
515 return;
516 } else {
517 executionCallback->notify(status, {}, kNoTiming);
518 return;
519 }
520 }
521 }
522 }
523
compute(sp<ExecutionCallback> * synchronizationCallback,BurstBuilder * burstBuilder)524 int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
525 BurstBuilder* burstBuilder) {
526 CHECK(synchronizationCallback == nullptr || burstBuilder == nullptr)
527 << "synchronizationCallback and burstBuilder cannot simultaneously be used";
528
529 const bool synchronous = (synchronizationCallback == nullptr);
530
531 if (!synchronous) {
532 *synchronizationCallback = nullptr;
533 }
534
535 // TODO validate that we have full types for all inputs and outputs,
536 // that the graph is not cyclic,
537
538 auto name = [synchronous, burstBuilder] {
539 return burstBuilder ? "burstCompute" : synchronous ? "compute" : "startCompute";
540 };
541 if (mStarted) {
542 LOG(ERROR) << "ANeuralNetworksExecution_" << name()
543 << " called on an execution that has already started";
544 return ANEURALNETWORKS_BAD_STATE;
545 }
546 for (auto& p : mInputs) {
547 if (p.state == ModelArgumentInfo::UNSPECIFIED) {
548 LOG(ERROR) << "ANeuralNetworksExecution_" << name() << " not all inputs specified";
549 return ANEURALNETWORKS_BAD_DATA;
550 }
551 }
552 for (auto& p : mOutputs) {
553 if (p.state == ModelArgumentInfo::UNSPECIFIED) {
554 LOG(ERROR) << "ANeuralNetworksExecution_" << name() << " not all outputs specified";
555 return ANEURALNETWORKS_BAD_DATA;
556 }
557 }
558
559 auto wrappedFinish = [this](ErrorStatus error, const std::vector<OutputShape>& outputShapes) {
560 return finish(error, outputShapes);
561 };
562
563 // TODO: For asynchronous execution, entire plan-based-path should run in an
564 // asynchronous thread -- take the asynchronous thread logic out of
565 // startComputeOnCpu() and use it to wrap the plan-based-path.
566 mStarted = true;
567 const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
568 std::shared_ptr<ExecutionPlan::Controller> controller =
569 mPlan->makeController(this, burstBuilder);
570 if (synchronous) {
571 VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)";
572 sp<ExecutionCallback> localSynchronizationCallback = new ExecutionCallback();
573 localSynchronizationCallback->setOnFinish(wrappedFinish);
574 asyncStartComputePartitioned(this, mPlan, controller, allowFallback,
575 localSynchronizationCallback);
576 localSynchronizationCallback->wait();
577 if (mMeasureTiming) {
578 mTiming = localSynchronizationCallback->getTiming();
579 }
580 return convertErrorStatusToResultCode(localSynchronizationCallback->getStatus());
581 } else /* asynchronous */ {
582 // TODO: use a thread pool
583
584 // Prepare the callback for asynchronous execution.
585 // sp<ExecutionCallback> object is returned when the
586 // execution has been successfully launched, otherwise a
587 // nullptr is returned. The executionCallback is
588 // abstracted in the NN API as an "event".
589 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
590 executionCallback->setOnFinish(wrappedFinish);
591 if (DeviceManager::get()->syncExecRuntime()) {
592 VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)";
593 asyncStartComputePartitioned(this, mPlan, controller, allowFallback, executionCallback);
594 } else {
595 VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)";
596 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller, allowFallback,
597 executionCallback);
598 executionCallback->bindThread(std::move(thread));
599 }
600 *synchronizationCallback = executionCallback;
601 return ANEURALNETWORKS_NO_ERROR;
602 }
603 }
604
initializeOutputShapes(std::vector<OutputShape> * outputShapes) const605 void ExecutionBuilder::initializeOutputShapes(std::vector<OutputShape>* outputShapes) const {
606 outputShapes->resize(mOutputs.size());
607 for (uint32_t i = 0; i < mOutputs.size(); i++) {
608 (*outputShapes)[i].dimensions = mOutputs[i].dimensions;
609 (*outputShapes)[i].isSufficient = true;
610 }
611 }
612
613 // Check if the dimensions "to" is updatable by dimensions "from", where "from" must
614 // have a higher specification level.
isUpdatable(const std::vector<uint32_t> & to,const std::vector<uint32_t> & from)615 static bool isUpdatable(const std::vector<uint32_t>& to, const std::vector<uint32_t>& from) {
616 if (to.size() == 0) return true;
617 NN_RET_CHECK_EQ(to.size(), from.size());
618 for (uint32_t i = 0; i < to.size(); i++) {
619 NN_RET_CHECK(to[i] == from[i] || to[i] == 0);
620 }
621 return true;
622 }
623
updateOutputShapes(const std::vector<OutputShape> & outputShapes)624 bool ExecutionBuilder::updateOutputShapes(const std::vector<OutputShape>& outputShapes) {
625 if (outputShapes.size() == 0) {
626 return true;
627 }
628 NN_RET_CHECK_EQ(outputShapes.size(), mOutputs.size());
629 for (uint32_t i = 0; i < outputShapes.size(); i++) {
630 // Check if only unspecified dimensions or rank are overwritten.
631 NN_RET_CHECK(isUpdatable(mOutputs[i].dimensions, outputShapes[i].dimensions));
632 }
633 for (uint32_t i = 0; i < outputShapes.size(); i++) {
634 mOutputs[i].dimensions = outputShapes[i].dimensions;
635 mOutputs[i].isSufficient = outputShapes[i].isSufficient;
636 }
637 return true;
638 }
639
finish(ErrorStatus,const std::vector<OutputShape> & outputShapes)640 ErrorStatus ExecutionBuilder::finish(ErrorStatus, const std::vector<OutputShape>& outputShapes) {
641 CHECK(!mFinished) << "ExecutionBuilder::finish is called twice";
642 mFinished = true;
643 if (!updateOutputShapes(outputShapes)) {
644 return ErrorStatus::GENERAL_FAILURE;
645 }
646 return ErrorStatus::NONE;
647 }
648
updateOutputShapes(const std::vector<OutputShape> & from,std::vector<OutputShape> * to)649 bool StepExecutor::updateOutputShapes(const std::vector<OutputShape>& from,
650 std::vector<OutputShape>* to) {
651 if (from.size() == 0) {
652 return true;
653 }
654 if (mExecutionStep != nullptr) {
655 const auto& indexMapping = mExecutionStep->getOutputIndexSubModelToFromModel();
656 NN_RET_CHECK_LE(indexMapping.size(), from.size());
657 for (uint32_t i = 0, e = indexMapping.size(); i < e; i++) {
658 uint32_t toIndex = indexMapping[i];
659 NN_RET_CHECK_GT(to->size(), toIndex);
660 NN_RET_CHECK(isUpdatable(to->at(toIndex).dimensions, from[i].dimensions));
661 (*to)[toIndex] = from[i];
662 }
663 } else {
664 NN_RET_CHECK_EQ(from.size(), to->size());
665 for (uint32_t i = 0, e = from.size(); i < e; i++) {
666 NN_RET_CHECK(isUpdatable(to->at(i).dimensions, from[i].dimensions));
667 (*to)[i] = from[i];
668 }
669 }
670 return true;
671 }
672
673 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
674 // it does not copy data. Aligns each input a bit.
allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo> * args,Memory * memory)675 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
676 Memory* memory) {
677 uint32_t nextPoolIndex = mMemories.size();
678 int64_t total = 0;
679 for (auto& info : *args) {
680 if (info.state == ModelArgumentInfo::POINTER) {
681 DataLocation& loc = info.locationAndLength;
682 // TODO Good enough alignment?
683 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
684 loc.poolIndex = nextPoolIndex;
685 loc.offset = static_cast<uint32_t>(total);
686 total += loc.length;
687 }
688 };
689 if (total > 0xFFFFFFFF) {
690 LOG(ERROR) << "StepExecutor::allocatePointerArgumentsToPool: ANeuralNetworksExecution: "
691 "Size of all inputs or outputs exceeds 2^32.";
692 return ANEURALNETWORKS_BAD_DATA;
693 }
694 hidl_memory hidlMemory;
695 if (total > 0) {
696 memory->create(total); // TODO check error
697 mMemories.add(memory);
698 }
699 return ANEURALNETWORKS_NO_ERROR;
700 }
701
setRequestArgumentArray(const std::vector<ModelArgumentInfo> & argumentInfos,hidl_vec<RequestArgument> * ioInfos)702 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
703 hidl_vec<RequestArgument>* ioInfos) {
704 size_t count = argumentInfos.size();
705 ioInfos->resize(count);
706 for (size_t i = 0; i < count; i++) {
707 const auto& info = argumentInfos[i];
708 (*ioInfos)[i] = {
709 .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
710 .location = info.locationAndLength,
711 .dimensions = info.dimensions,
712 };
713 }
714 }
715
StepExecutor(ExecutionBuilder * executionBuilder,const ModelBuilder * model,std::shared_ptr<Device> device,std::shared_ptr<VersionedIPreparedModel> preparedModel)716 StepExecutor::StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model,
717 std::shared_ptr<Device> device,
718 std::shared_ptr<VersionedIPreparedModel> preparedModel)
719 : mExecutionBuilder(executionBuilder),
720 mModel(model),
721 mDevice(device),
722 mPreparedModel(preparedModel),
723 mInputs(model->inputCount()),
724 mOutputs(model->outputCount()) {
725 CHECK(mDevice != nullptr);
726 }
727
mapInputsAndOutputsTrivially()728 void StepExecutor::mapInputsAndOutputsTrivially() {
729 mInputs = mExecutionBuilder->mInputs;
730 mOutputs = mExecutionBuilder->mOutputs;
731 mMemories = mExecutionBuilder->mMemories;
732 }
733
mapInputOrOutput(const ModelArgumentInfo & builderInputOrOutput,ModelArgumentInfo * executorInputOrOutput)734 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
735 ModelArgumentInfo* executorInputOrOutput) {
736 *executorInputOrOutput = builderInputOrOutput;
737 switch (executorInputOrOutput->state) {
738 default:
739 nnAssert(!"unexpected ModelArgumentInfo::state");
740 break;
741 case ModelArgumentInfo::HAS_NO_VALUE:
742 case ModelArgumentInfo::POINTER:
743 case ModelArgumentInfo::UNSPECIFIED:
744 break;
745 case ModelArgumentInfo::MEMORY: {
746 const uint32_t builderPoolIndex = builderInputOrOutput.locationAndLength.poolIndex;
747 const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
748 const uint32_t executorPoolIndex = mMemories.add(memory);
749 executorInputOrOutput->locationAndLength.poolIndex = executorPoolIndex;
750 break;
751 }
752 }
753 }
754
setInputOrOutputFromTemporaryMemory(const Operand & inputOrOutputOperand,const Memory * memory,uint32_t offset,ModelArgumentInfo * inputOrOutputInfo)755 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
756 const Memory* memory, uint32_t offset,
757 ModelArgumentInfo* inputOrOutputInfo) {
758 // Should be similar to
759 // ExecutionBuilder::setInputFromMemory()
760 // ExecutionBuilder::setOutputFromMemory()
761
762 uint32_t poolIndex = mMemories.add(memory);
763 uint32_t length = TypeManager::get()->getSizeOfData(inputOrOutputOperand);
764 return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset,
765 length);
766 }
767
logArguments(const char * kind,const std::vector<ModelArgumentInfo> & args)768 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo>& args) {
769 for (unsigned i = 0; i < args.size(); i++) {
770 const auto& arg = args[i];
771 std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
772 switch (arg.state) {
773 case ModelArgumentInfo::POINTER:
774 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")";
775 break;
776 case ModelArgumentInfo::MEMORY:
777 VLOG(EXECUTION) << prefix << "MEMORY("
778 << "pool=" << arg.locationAndLength.poolIndex << ", "
779 << "off=" << arg.locationAndLength.offset << ")";
780 break;
781 case ModelArgumentInfo::HAS_NO_VALUE:
782 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
783 break;
784 case ModelArgumentInfo::UNSPECIFIED:
785 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
786 break;
787 default:
788 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
789 break;
790 }
791 }
792 }
793
isCpu() const794 bool StepExecutor::isCpu() const {
795 return mDevice->getInterface() == nullptr;
796 }
797
startCompute(sp<ExecutionCallback> * synchronizationCallback,const std::shared_ptr<ExecutionBurstController> & burstController)798 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback,
799 const std::shared_ptr<ExecutionBurstController>& burstController) {
800 if (VLOG_IS_ON(EXECUTION)) {
801 logArguments("input", mInputs);
802 logArguments("output", mOutputs);
803 }
804 if (isCpu()) {
805 return startComputeOnCpu(synchronizationCallback);
806 } else {
807 return startComputeOnDevice(synchronizationCallback, burstController);
808 }
809 }
810
startComputeOnDevice(sp<ExecutionCallback> * synchronizationCallback,const std::shared_ptr<ExecutionBurstController> & burstController)811 int StepExecutor::startComputeOnDevice(
812 sp<ExecutionCallback>* synchronizationCallback,
813 const std::shared_ptr<ExecutionBurstController>& burstController) {
814 CHECK(!isCpu());
815
816 // Initialize timing information in case we take an error path to exit.
817 mExecutionBuilder->reportTiming(kNoTiming);
818
819 *synchronizationCallback = nullptr;
820
821 // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
822 // ExecutionPlan with the compilation and execution phases of the NN API
823 if (mPreparedModel == nullptr) {
824 Model model;
825 mModel->setHidlModel(&model);
826
827 // TODO(butlermichael): Propagate user preference to this point instead of
828 // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or
829 // remove this entire block of code since it is a stale path that is only
830 // encountered on an #if-removed code.
831 ExecutionPreference preference =
832 static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
833
834 ErrorStatus status = ErrorStatus::GENERAL_FAILURE;
835 std::tie(status, mPreparedModel) =
836 mDevice->getInterface()->prepareModel(model, preference, {}, {}, {});
837 if (status != ErrorStatus::NONE) {
838 return convertErrorStatusToResultCode(status);
839 }
840 if (mPreparedModel == nullptr) {
841 return ANEURALNETWORKS_OP_FAILED;
842 }
843 }
844
845 NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "StepExecutor::startComputeOnDevice");
846 // We separate the input & output pools so that we reduce the copying done if we
847 // do an eventual remoting (hidl_memory->update()). We could also use it to set
848 // protection on read only memory but that's not currently done.
849 Memory inputPointerArguments;
850 Memory outputPointerArguments;
851
852 // Layout the input and output data
853 int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
854 if (n != ANEURALNETWORKS_NO_ERROR) {
855 return n;
856 }
857 n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
858 if (n != ANEURALNETWORKS_NO_ERROR) {
859 return n;
860 }
861
862 // Copy the input data that was specified via a pointer.
863 // inputPointerArguments.update();
864 for (auto& info : mInputs) {
865 if (info.state == ModelArgumentInfo::POINTER) {
866 DataLocation& loc = info.locationAndLength;
867 uint8_t* data = nullptr;
868 int n = inputPointerArguments.getPointer(&data);
869 if (n != ANEURALNETWORKS_NO_ERROR) {
870 return n;
871 }
872 memcpy(data + loc.offset, info.buffer, loc.length);
873 }
874 }
875 // TODO: Add inputPointerArguments.commit() and .update() at all the right places
876
877 Request request;
878 setRequestArgumentArray(mInputs, &request.inputs);
879 setRequestArgumentArray(mOutputs, &request.outputs);
880 uint32_t count = mMemories.size();
881 request.pools.resize(count);
882 for (uint32_t i = 0; i < count; i++) {
883 request.pools[i] = mMemories[i]->getHidlMemory();
884 }
885
886 NNTRACE_FULL_SWITCH(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
887 "StepExecutor::startComputeOnDevice::execute");
888
889 // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
890 // object is returned when the execution has been successfully launched,
891 // otherwise a nullptr is returned. The executionCallback is abstracted in
892 // the NN API as an "event".
893 //
894 // The sp is used for ref-counting purposes. Without it, the HIDL service
895 // could attempt to communicate with a dead callback object.
896 //
897 // TODO: Explain the "dead callback" problem further, either here or
898 // in the design document.
899 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
900
901 // compute using burst if present
902 const bool burstCompute = (burstController != nullptr);
903 bool burstFallback = false;
904 if (burstCompute) {
905 std::vector<intptr_t> memoryIds;
906 memoryIds.reserve(mMemories.size());
907 for (const Memory* memory : mMemories) {
908 memory->usedBy(burstController);
909 memoryIds.push_back(memory->getKey());
910 }
911
912 VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() "
913 << SHOW_IF_DEBUG(toString(request));
914 auto [status, outputShapes, timing, fallback] =
915 burstController->tryCompute(request, measureTiming(mExecutionBuilder), memoryIds);
916
917 burstFallback = fallback;
918 if (!fallback) {
919 executionCallback->notify(status, outputShapes, timing);
920 }
921 }
922
923 // compute from IPreparedModel if either:
924 // (1) burst was not supplied, or
925 // (2) the burst execution failed and requested a fallback execution
926 if (!burstCompute || burstFallback) {
927 if (DeviceManager::get()->syncExecHal()) {
928 VLOG(EXECUTION) << "Before mPreparedModel->executeSynchronously() "
929 << SHOW_IF_DEBUG(toString(request));
930 auto syncExecuteResult =
931 mPreparedModel->executeSynchronously(request, measureTiming(mExecutionBuilder));
932 executionCallback->notify(std::get<0>(syncExecuteResult),
933 std::get<1>(syncExecuteResult),
934 std::get<2>(syncExecuteResult));
935 } else {
936 VLOG(EXECUTION) << "Before mPreparedModel->execute() "
937 << SHOW_IF_DEBUG(toString(request));
938 // Execute.
939 // TODO: What happens to the Callback if the service dies abnormally
940 // -- won't that keep the Callback live forever, because the service
941 // never has the opportunity to bump the reference count down? Or
942 // maybe the HIDL infrastructure handles this magically? At worst,
943 // it seems like this is a small memory leak, if the Callback stays
944 // alive forever.
945 Return<ErrorStatus> executeStatus = mPreparedModel->execute(
946 request, measureTiming(mExecutionBuilder), executionCallback);
947 if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
948 VLOG(EXECUTION) << "**Execute launch failed**";
949 return executeStatus.isOk() ? convertErrorStatusToResultCode(executeStatus)
950 : ANEURALNETWORKS_OP_FAILED;
951 }
952 }
953 }
954
955 // TODO: Remove this synchronization point when the block of code below is
956 // removed.
957 executionCallback->wait();
958 NNTRACE_FULL_SWITCH(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_EXECUTION,
959 "StepExecutor::startComputeOnDevice::waited");
960 Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
961 if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
962 VLOG(EXECUTION) << "**Execution failed**";
963 if (callbackStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
964 *synchronizationCallback = executionCallback;
965 return ANEURALNETWORKS_NO_ERROR;
966 }
967 return callbackStatus.isOk() ? convertErrorStatusToResultCode(callbackStatus)
968 : ANEURALNETWORKS_OP_FAILED;
969 }
970
971 mExecutionBuilder->reportTiming(executionCallback->getTiming());
972
973 // Copy the output data from shared memory to the output buffers.
974 // TODO: Move this block of code somewhere else. It should not be in the
975 // startCompute function.
976 // TODO: outputMemory->update(); outputMemory->commit()
977 NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "StepExecutor::startComputeOnDevice");
978 for (auto& info : mOutputs) {
979 if (info.state == ModelArgumentInfo::POINTER) {
980 DataLocation& loc = info.locationAndLength;
981 uint8_t* data = nullptr;
982 int n = outputPointerArguments.getPointer(&data);
983 if (n != ANEURALNETWORKS_NO_ERROR) {
984 return n;
985 }
986 memcpy(info.buffer, data + loc.offset, loc.length);
987 }
988 }
989 VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
990
991 *synchronizationCallback = executionCallback;
992 return ANEURALNETWORKS_NO_ERROR;
993 }
994
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const sp<IExecutionCallback> & executionCallback)995 static void computeOnCpu(const Model& model, const Request& request,
996 const std::vector<RunTimePoolInfo>& modelPoolInfos,
997 const std::vector<RunTimePoolInfo>& requestPoolInfos,
998 const sp<IExecutionCallback>& executionCallback) {
999 NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1000 CpuExecutor executor;
1001 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1002 const auto& outputShapes = executor.getOutputShapes();
1003 executionCallback->notify_1_2(convertResultCodeToErrorStatus(err), outputShapes, kNoTiming);
1004 }
1005
startComputeOnCpu(sp<ExecutionCallback> * synchronizationCallback)1006 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
1007 // TODO: use a thread pool
1008 // TODO(mikie): this could have NNTRACE so we could measure the overhead of
1009 // spinning up a new thread.
1010
1011 Model model;
1012 mModel->setHidlModel(&model);
1013
1014 // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
1015 // object is returned when the execution has been successfully launched,
1016 // otherwise a nullptr is returned. The executionCallback is abstracted in
1017 // the NN API as an "event".
1018 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
1019 *synchronizationCallback = nullptr;
1020
1021 std::vector<RunTimePoolInfo> modelPoolInfos;
1022 if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
1023 return ANEURALNETWORKS_UNMAPPABLE;
1024 }
1025
1026 std::vector<RunTimePoolInfo> requestPoolInfos;
1027 requestPoolInfos.reserve(mMemories.size());
1028 for (const Memory* mem : mMemories) {
1029 if (std::optional<RunTimePoolInfo> poolInfo =
1030 RunTimePoolInfo::createFromHidlMemory(mem->getHidlMemory())) {
1031 requestPoolInfos.emplace_back(*poolInfo);
1032 } else {
1033 return ANEURALNETWORKS_UNMAPPABLE;
1034 }
1035 }
1036 // Create as many pools as there are input / output.
1037 auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
1038 for (ModelArgumentInfo& argumentInfo : argumentInfos) {
1039 if (argumentInfo.state == ModelArgumentInfo::POINTER) {
1040 argumentInfo.locationAndLength.poolIndex =
1041 static_cast<uint32_t>(requestPoolInfos.size());
1042 argumentInfo.locationAndLength.offset = 0;
1043 requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1044 static_cast<uint8_t*>(argumentInfo.buffer)));
1045 }
1046 }
1047 };
1048 fixPointerArguments(mInputs);
1049 fixPointerArguments(mOutputs);
1050
1051 Request request;
1052 setRequestArgumentArray(mInputs, &request.inputs);
1053 setRequestArgumentArray(mOutputs, &request.outputs);
1054
1055 if (DeviceManager::get()->syncExecCpu()) {
1056 computeOnCpu(model, request, modelPoolInfos, requestPoolInfos, executionCallback);
1057 } else {
1058 // TODO: should model be moved with a std::cref?
1059 std::thread thread(computeOnCpu, model, std::move(request), std::move(modelPoolInfos),
1060 std::move(requestPoolInfos), executionCallback);
1061 executionCallback->bindThread(std::move(thread));
1062 }
1063
1064 *synchronizationCallback = executionCallback;
1065 return ANEURALNETWORKS_NO_ERROR;
1066 }
1067
1068 } // namespace nn
1069 } // namespace android
1070