1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "ExecutionBuilder"
18
19 #include "ExecutionBuilder.h"
20
21 #include "CompilationBuilder.h"
22 #include "CpuExecutor.h"
23 #include "HalInterfaces.h"
24 #include "Manager.h"
25 #include "ModelBuilder.h"
26 #include "Utils.h"
27
28 #include <mutex>
29 #include <thread>
30 #include <vector>
31
32 namespace android {
33 namespace nn {
34
setFromPointer(const Operand & operand,const ANeuralNetworksOperandType * type,void * data,uint32_t length)35 int ModelArgumentInfo::setFromPointer(const Operand& operand,
36 const ANeuralNetworksOperandType* type, void* data,
37 uint32_t length) {
38 if ((data == nullptr) != (length == 0)) {
39 const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR";
40 LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = "
41 << dataPtrMsg << ", length = " << length << ")";
42 return ANEURALNETWORKS_BAD_DATA;
43 }
44 if (data == nullptr) {
45 state = ModelArgumentInfo::HAS_NO_VALUE;
46 } else {
47 int n = updateDimensionInfo(operand, type);
48 if (n != ANEURALNETWORKS_NO_ERROR) {
49 return n;
50 }
51 uint32_t neededLength = sizeOfData(operand.type, dimensions);
52 if (operand.type != OperandType::OEM && neededLength != length) {
53 LOG(ERROR) << "Setting argument with invalid length: " << length
54 << ", expected length: " << neededLength;
55 return ANEURALNETWORKS_BAD_DATA;
56 }
57 state = ModelArgumentInfo::POINTER;
58 }
59 buffer = data;
60 locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
61 return ANEURALNETWORKS_NO_ERROR;
62 }
63
setFromMemory(const Operand & operand,const ANeuralNetworksOperandType * type,uint32_t poolIndex,uint32_t offset,uint32_t length)64 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
65 uint32_t poolIndex, uint32_t offset, uint32_t length) {
66 int n = updateDimensionInfo(operand, type);
67 if (n != ANEURALNETWORKS_NO_ERROR) {
68 return n;
69 }
70 uint32_t neededLength = sizeOfData(operand.type, dimensions);
71 if (operand.type != OperandType::OEM && neededLength != length) {
72 LOG(ERROR) << "Setting argument with invalid length: " << length
73 << ", expected length: " << neededLength;
74 return ANEURALNETWORKS_BAD_DATA;
75 }
76
77 state = ModelArgumentInfo::MEMORY;
78 locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
79 buffer = nullptr;
80 return ANEURALNETWORKS_NO_ERROR;
81 }
82
setFromTemporaryMemory(const Operand & operand,uint32_t poolIndex,uint32_t offset)83 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand,
84 uint32_t poolIndex, uint32_t offset) {
85 int n = updateDimensionInfo(operand, nullptr);
86 if (n != ANEURALNETWORKS_NO_ERROR) {
87 return n;
88 }
89 state = ModelArgumentInfo::MEMORY;
90 locationAndLength =
91 {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
92 buffer = nullptr;
93 return ANEURALNETWORKS_NO_ERROR;
94 }
95
updateDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType)96 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
97 const ANeuralNetworksOperandType* newType) {
98 nnAssert(dimensions.empty());
99 if (newType == nullptr) {
100 for (auto i : operand.dimensions) {
101 if (i == 0) {
102 LOG(ERROR) << "Setting input/output with unspecified dimensions";
103 return ANEURALNETWORKS_BAD_DATA;
104 }
105 }
106 dimensions = operand.dimensions;
107 } else {
108 uint32_t count = newType->dimensionCount;
109 if (static_cast<OperandType>(newType->type) != operand.type ||
110 count != operand.dimensions.size()) {
111 LOG(ERROR) << "Setting input/output with incompatible types";
112 return ANEURALNETWORKS_BAD_DATA;
113 }
114
115 dimensions = hidl_vec<uint32_t>(count);
116 for (uint32_t i = 0; i < count; i++) {
117 if (operand.dimensions[i] != 0 && operand.dimensions[i] != newType->dimensions[i]) {
118 LOG(ERROR) << "Overriding a fully specified dimension is disallowed";
119 return ANEURALNETWORKS_BAD_DATA;
120 } else {
121 dimensions[i] = newType->dimensions[i];
122 }
123 }
124 }
125 return ANEURALNETWORKS_NO_ERROR;
126 }
127
ExecutionBuilder(const CompilationBuilder * compilation)128 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
129 mModel(compilation->mModel),
130 mPlan(&compilation->mPlan),
131 mPartitioning(compilation->mPartitioning),
132 mInputs(mModel->inputCount()),
133 mOutputs(mModel->outputCount()) {
134 VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
135 }
136
setInput(uint32_t index,const ANeuralNetworksOperandType * type,const void * buffer,size_t length)137 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
138 const void* buffer, size_t length) {
139 uint32_t count = static_cast<uint32_t>(mInputs.size());
140 if (index >= count) {
141 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
142 return ANEURALNETWORKS_BAD_DATA;
143 }
144 if (type != nullptr) {
145 int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false);
146 if (n != ANEURALNETWORKS_NO_ERROR) {
147 return n;
148 }
149 }
150 if (length > 0xFFFFFFFF) {
151 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
152 return ANEURALNETWORKS_BAD_DATA;
153 }
154 uint32_t l = static_cast<uint32_t>(length);
155 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
156 const_cast<void*>(buffer), l);
157 }
158
setInputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)159 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
160 const Memory* memory, size_t offset, size_t length) {
161 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
162
163 uint32_t count = static_cast<uint32_t>(mInputs.size());
164 if (index >= count) {
165 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
166 << count;
167 return ANEURALNETWORKS_BAD_DATA;
168 }
169 if (!memory->validateSize(offset, length)) {
170 return ANEURALNETWORKS_BAD_DATA;
171 }
172 // TODO validate the rest
173 uint32_t poolIndex = mMemories.add(memory);
174 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
175 length);
176 }
177
setOutput(uint32_t index,const ANeuralNetworksOperandType * type,void * buffer,size_t length)178 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
179 size_t length) {
180 uint32_t count = static_cast<uint32_t>(mOutputs.size());
181 if (index >= count) {
182 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
183 return ANEURALNETWORKS_BAD_DATA;
184 }
185 if (type != nullptr) {
186 int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false);
187 if (n != ANEURALNETWORKS_NO_ERROR) {
188 return n;
189 }
190 }
191 if (length > 0xFFFFFFFF) {
192 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
193 return ANEURALNETWORKS_BAD_DATA;
194 }
195 uint32_t l = static_cast<uint32_t>(length);
196 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
197 }
198
setOutputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)199 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
200 const Memory* memory, size_t offset, size_t length) {
201 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
202
203 uint32_t count = static_cast<uint32_t>(mOutputs.size());
204 if (index >= count) {
205 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
206 << count;
207 return ANEURALNETWORKS_BAD_DATA;
208 }
209 if (!memory->validateSize(offset, length)) {
210 return ANEURALNETWORKS_BAD_DATA;
211 }
212 // TODO validate the rest
213 uint32_t poolIndex = mMemories.add(memory);
214 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
215 length);
216 }
217
218 // Attempt synchronous execution of full model on CPU.
219 // Ensure that executionCallback->notify() is called.
cpuFallbackFull(const ExecutionBuilder * executionBuilder,const sp<ExecutionCallback> & executionCallback)220 static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
221 const sp<ExecutionCallback>& executionCallback) {
222 VLOG(EXECUTION) << "cpuFallbackFull";
223 StepExecutor executor(executionBuilder, executionBuilder->getModel(),
224 nullptr /* no VersionedIDevice, so CPU */,
225 nullptr /* no IPreparedModel */);
226 executor.mapInputsAndOutputsTrivially();
227 sp<ExecutionCallback> fallbackCallback;
228 int n = executor.startCompute(&fallbackCallback);
229 if (n != ANEURALNETWORKS_NO_ERROR) {
230 executionCallback->notify(convertResultCodeToErrorStatus(n));
231 return;
232 }
233 fallbackCallback->wait();
234 executionCallback->notify(fallbackCallback->getStatus());
235 }
236
237 // Attempt synchronous execution on CPU.
238 // (1) First, attempt to execute this step on CPU. If successful,
239 // return true. (Do not call executionCallback->notify().)
240 // (2) If unsuccessful, attempt to execute the full model on CPU,
241 // ensure that executionCallback->notify() is called, and return
242 // false.
cpuFallbackPartial(const ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,const sp<ExecutionCallback> & executionCallback)243 static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
244 const ExecutionPlan* plan,
245 std::shared_ptr<ExecutionPlan::Controller> controller,
246 const sp<ExecutionCallback>& executionCallback) {
247 VLOG(EXECUTION) << "cpuFallbackPartial";
248 std::shared_ptr<StepExecutor> executor;
249 int n = plan->fallback(controller, &executor);
250 if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
251 cpuFallbackFull(executionBuilder, executionCallback);
252 return false;
253 }
254 sp<ExecutionCallback> fallbackCallback;
255 if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
256 cpuFallbackFull(executionBuilder, executionCallback);
257 return false;
258 }
259 fallbackCallback->wait();
260 if (fallbackCallback->getStatus() != ErrorStatus::NONE) {
261 cpuFallbackFull(executionBuilder, executionCallback);
262 return false;
263 }
264 return true;
265 }
266
asyncStartComputePartitioned(const ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,bool allowFallback,const sp<ExecutionCallback> & executionCallback)267 static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder,
268 const ExecutionPlan* plan,
269 std::shared_ptr<ExecutionPlan::Controller> controller,
270 bool allowFallback,
271 const sp<ExecutionCallback>& executionCallback) {
272 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)";
273 while (true) {
274 std::shared_ptr<StepExecutor> executor;
275 VLOG(EXECUTION) << "looking for next StepExecutor";
276 int n = plan->next(controller, &executor);
277 if (n != ANEURALNETWORKS_NO_ERROR) {
278 if (allowFallback) {
279 cpuFallbackFull(executionBuilder, executionCallback);
280 } else {
281 executionCallback->notify(convertResultCodeToErrorStatus(n));
282 }
283 return;
284 }
285 if (executor == nullptr) {
286 executionCallback->notify(ErrorStatus::NONE);
287 return;
288 }
289
290 sp<ExecutionCallback> stepCallback;
291 n = executor->startCompute(&stepCallback);
292 if (n != ANEURALNETWORKS_NO_ERROR) {
293 if (allowFallback) {
294 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
295 // Successfully executed one step on CPU.
296 continue;
297 } else {
298 // Either successfully executed entire plan on
299 // CPU, or tried and failed to do so.
300 return;
301 }
302 } else {
303 executionCallback->notify(convertResultCodeToErrorStatus(n));
304 return;
305 }
306 }
307 stepCallback->wait();
308 ErrorStatus status = stepCallback->getStatus();
309 if (status != ErrorStatus::NONE) {
310 if (allowFallback) {
311 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
312 // Successfully executed one step on CPU.
313 continue;
314 } else {
315 // Either successfully executed entire plan on
316 // CPU, or tried and failed to do so.
317 return;
318 }
319 } else {
320 executionCallback->notify(status);
321 return;
322 }
323 }
324 }
325 }
326
startCompute(sp<ExecutionCallback> * synchronizationCallback)327 int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
328 *synchronizationCallback = nullptr;
329
330 // TODO validate that we have full types for all inputs and outputs,
331 // that the graph is not cyclic,
332
333 for (auto& p : mInputs) {
334 if (p.state == ModelArgumentInfo::UNSPECIFIED) {
335 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
336 return ANEURALNETWORKS_BAD_DATA;
337 }
338 }
339 for (auto& p : mOutputs) {
340 if (p.state == ModelArgumentInfo::UNSPECIFIED) {
341 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
342 return ANEURALNETWORKS_BAD_DATA;
343 }
344 }
345
346 #ifndef DISABLE_PARTITIONED_EXECUTION
347 {
348 // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
349 // with the compilation and execution phases of the NN API? Or retain that path
350 // as a fallback in the case of partitioning failure?
351 //
352 // TODO: Entire plan-based-path should run in an asynchronous thread --
353 // take the asynchronous thread logic out of startComputeOnCpu() and use
354 // it to wrap the plan-based-path.
355 if (mPartitioning > 0) {
356 const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
357 std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
358 if (controller == nullptr) {
359 if (!allowFallback) {
360 return ANEURALNETWORKS_OP_FAILED;
361 }
362 } else {
363 // TODO: use a thread pool
364
365 // Prepare the callback for asynchronous execution.
366 // sp<ExecutionCallback> object is returned when the
367 // execution has been successfully launched, otherwise a
368 // nullptr is returned. The executionCallback is
369 // abstracted in the NN API as an "event".
370 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
371 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller,
372 allowFallback,
373 executionCallback);
374 executionCallback->bind_thread(std::move(thread));
375 *synchronizationCallback = executionCallback;
376 return ANEURALNETWORKS_NO_ERROR;
377 }
378 }
379 }
380 #else
381 {
382 // Find a driver that can handle all the operations.
383 // TODO: Does not handle CPU fallback (which is tricky because
384 // StepExecutor::startCompute() is designed as
385 // asynchronous).
386 // TODO: Does not actually behave asynchronously (because
387 // StepExecutor::startCompute() isn't actually asynchronous
388 // on a device as opposed to a CPU).
389 Model hidlModel;
390 mModel->setHidlModel(&hidlModel);
391 const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
392 for (const auto& device : devices) {
393 hidl_vec<bool> supports;
394 VLOG(EXECUTION) << "Checking " << device->getName();
395 device->getSupportedOperations(hidlModel, &supports);
396 if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
397 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
398 StepExecutor executor(this, mModel, device->getInterface(),
399 nullptr /* no IPreparedModel, so compile */);
400 executor.mapInputsAndOutputsTrivially();
401 return executor.startCompute(synchronizationCallback);
402 }
403 }
404 }
405 #endif // DISABLE_PARTITIONED_EXECUTION
406
407 // Run on the CPU.
408 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU";
409 StepExecutor executor(this, mModel,
410 nullptr /* no VersionedIDevice, so CPU */,
411 nullptr /* no IPreparedModel */);
412 executor.mapInputsAndOutputsTrivially();
413 return executor.startCompute(synchronizationCallback);
414 }
415
416 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
417 // it does not copy data. Aligns each input a bit.
allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo> * args,Memory * memory)418 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
419 Memory* memory) {
420 uint32_t nextPoolIndex = mMemories.size();
421 int64_t total = 0;
422 for (auto& info : *args) {
423 if (info.state == ModelArgumentInfo::POINTER) {
424 DataLocation& loc = info.locationAndLength;
425 // TODO Good enough alignment?
426 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
427 loc.poolIndex = nextPoolIndex;
428 loc.offset = static_cast<uint32_t>(total);
429 total += loc.length;
430 }
431 };
432 if (total > 0xFFFFFFFF) {
433 LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
434 "2^32.";
435 return ANEURALNETWORKS_BAD_DATA;
436 }
437 hidl_memory hidlMemory;
438 if (total > 0) {
439 memory->create(total); // TODO check error
440 mMemories.add(memory);
441 }
442 return ANEURALNETWORKS_NO_ERROR;
443 }
444
setRequestArgumentArray(const std::vector<ModelArgumentInfo> & argumentInfos,hidl_vec<RequestArgument> * ioInfos)445 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
446 hidl_vec<RequestArgument>* ioInfos) {
447 size_t count = argumentInfos.size();
448 ioInfos->resize(count);
449 for (size_t i = 0; i < count; i++) {
450 const auto& info = argumentInfos[i];
451 (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
452 .location = info.locationAndLength,
453 .dimensions = info.dimensions,
454 };
455 }
456 }
457
StepExecutor(const ExecutionBuilder * executionBuilder,const ModelBuilder * model,VersionedIDevice * driver,sp<IPreparedModel> preparedModel)458 StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder,
459 const ModelBuilder* model,
460 VersionedIDevice* driver, sp<IPreparedModel> preparedModel) :
461 mExecutionBuilder(executionBuilder), mModel(model),
462 mDriver(driver), mPreparedModel(preparedModel),
463 mInputs(model->inputCount()), mOutputs(model->outputCount()) {}
464
mapInputsAndOutputsTrivially()465 void StepExecutor::mapInputsAndOutputsTrivially() {
466 mInputs = mExecutionBuilder->mInputs;
467 mOutputs = mExecutionBuilder->mOutputs;
468 mMemories = mExecutionBuilder->mMemories;
469 }
470
mapInputOrOutput(const ModelArgumentInfo & builderInputOrOutput,ModelArgumentInfo * executorInputOrOutput)471 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
472 ModelArgumentInfo* executorInputOrOutput) {
473 *executorInputOrOutput = builderInputOrOutput;
474 switch (executorInputOrOutput->state) {
475 default:
476 nnAssert(!"unexpected ModelArgumentInfo::state");
477 case ModelArgumentInfo::POINTER:
478 case ModelArgumentInfo::UNSPECIFIED:
479 break;
480 case ModelArgumentInfo::MEMORY: {
481 const uint32_t builderPoolIndex =
482 builderInputOrOutput.locationAndLength.poolIndex;
483 const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
484 const uint32_t executorPoolIndex = mMemories.add(memory);
485 executorInputOrOutput->locationAndLength.poolIndex =
486 executorPoolIndex;
487 break;
488 }
489 }
490 }
491
setInputOrOutputFromTemporaryMemory(const Operand & inputOrOutputOperand,const Memory * memory,uint32_t offset,ModelArgumentInfo * inputOrOutputInfo)492 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
493 const Memory* memory, uint32_t offset,
494 ModelArgumentInfo* inputOrOutputInfo) {
495 // Should be similar to
496 // ExecutionBuilder::setInputFromMemory()
497 // ExecutionBuilder::setOutputFromMemory()
498
499 uint32_t poolIndex = mMemories.add(memory);
500 return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset);
501 }
502
logArguments(const char * kind,const std::vector<ModelArgumentInfo> & args)503 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) {
504 for (unsigned i = 0; i < args.size(); i++) {
505 const auto& arg = args[i];
506 std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
507 switch (arg.state) {
508 case ModelArgumentInfo::POINTER:
509 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")";
510 break;
511 case ModelArgumentInfo::MEMORY:
512 VLOG(EXECUTION) << prefix << "MEMORY("
513 << "pool=" << arg.locationAndLength.poolIndex
514 << ", "
515 << "off=" << arg.locationAndLength.offset
516 << ")";
517 break;
518 case ModelArgumentInfo::HAS_NO_VALUE:
519 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
520 break;
521 case ModelArgumentInfo::UNSPECIFIED:
522 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
523 break;
524 default:
525 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
526 break;
527 }
528 }
529 }
530
startCompute(sp<ExecutionCallback> * synchronizationCallback)531 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
532 if (VLOG_IS_ON(EXECUTION)) {
533 logArguments("input", mInputs);
534 logArguments("output", mOutputs);
535 }
536 if (mDriver == nullptr) {
537 return startComputeOnCpu(synchronizationCallback);
538 } else {
539 return startComputeOnDevice(synchronizationCallback);
540 }
541 }
542
startComputeOnDevice(sp<ExecutionCallback> * synchronizationCallback)543 int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) {
544 nnAssert(mDriver != nullptr);
545
546 *synchronizationCallback = nullptr;
547
548 // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
549 // ExecutionPlan with the compilation and execution phases of the NN API
550 if (mPreparedModel == nullptr) {
551 Model model;
552 mModel->setHidlModel(&model);
553
554 // TODO Dangerous! In async, the model will outlive it here. Safe for now
555 sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
556 // TODO(butlermichael): Propagate user preference to this point instead of
557 // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or
558 // remove this entire block of code since it is a stale path that is only
559 // encountered on an #if-removed code.
560 ExecutionPreference preference =
561 static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
562 ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preference,
563 preparedModelCallback);
564 if (prepareLaunchStatus != ErrorStatus::NONE) {
565 return convertErrorStatusToResultCode(prepareLaunchStatus);
566 }
567
568 // Immediately synchronize with callback object for now
569 // TODO: change to asynchronous later
570 preparedModelCallback->wait();
571 ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
572 mPreparedModel = preparedModelCallback->getPreparedModel();
573 if (prepareReturnStatus != ErrorStatus::NONE) {
574 return convertErrorStatusToResultCode(prepareReturnStatus);
575 }
576 if (mPreparedModel == nullptr) {
577 return ANEURALNETWORKS_OP_FAILED;
578 }
579 }
580
581 // We separate the input & output pools so that we reduce the copying done if we
582 // do an eventual remoting (hidl_memory->update()). We could also use it to set
583 // protection on read only memory but that's not currently done.
584 Memory inputPointerArguments;
585 Memory outputPointerArguments;
586
587 // Layout the input and output data
588 int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
589 if (n != ANEURALNETWORKS_NO_ERROR) {
590 return n;
591 }
592 n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
593 if (n != ANEURALNETWORKS_NO_ERROR) {
594 return n;
595 }
596
597 // Copy the input data that was specified via a pointer.
598 // inputPointerArguments.update();
599 for (auto& info : mInputs) {
600 if (info.state == ModelArgumentInfo::POINTER) {
601 DataLocation& loc = info.locationAndLength;
602 uint8_t* data = nullptr;
603 int n = inputPointerArguments.getPointer(&data);
604 if (n != ANEURALNETWORKS_NO_ERROR) {
605 return n;
606 }
607 memcpy(data + loc.offset, info.buffer, loc.length);
608 }
609 }
610 // TODO: Add inputPointerArguments.commit() and .update() at all the right places
611
612 Request request;
613 setRequestArgumentArray(mInputs, &request.inputs);
614 setRequestArgumentArray(mOutputs, &request.outputs);
615 uint32_t count = mMemories.size();
616 request.pools.resize(count);
617 for (uint32_t i = 0; i < count; i++) {
618 request.pools[i] = mMemories[i]->getHidlMemory();
619 }
620
621 // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
622 // object is returned when the execution has been successfully launched,
623 // otherwise a nullptr is returned. The executionCallback is abstracted in
624 // the NN API as an "event".
625 //
626 // The sp is used for ref-counting purposes. Without it, the HIDL service
627 // could attempt to communicate with a dead callback object.
628 //
629 // TODO: Explain the "dead callback" problem further, either here or
630 // in the design document.
631 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
632
633 VLOG(EXECUTION) << "Before mPreparedModel->execute() " << SHOW_IF_DEBUG(toString(request));
634 // Execute.
635 // TODO: What happens to the Callback if the service dies abnormally
636 // -- won't that keep the Callback live forever, because the service
637 // never has the opportunity to bump the reference count down? Or
638 // maybe the HIDL infrastructure handles this magically? At worst,
639 // it seems like this is a small memory leak, if the Callback stays
640 // alive forever.
641 Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback);
642 if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
643 VLOG(EXECUTION) << "**Execute failed**";
644 return executeStatus.isOk()
645 ? convertErrorStatusToResultCode(executeStatus)
646 : ANEURALNETWORKS_OP_FAILED;
647 }
648
649 // TODO: Remove this synchronization point when the block of code below is
650 // removed.
651 executionCallback->wait();
652 Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
653 if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
654 VLOG(EXECUTION) << "**Execute async failed**";
655 return callbackStatus.isOk()
656 ? convertErrorStatusToResultCode(callbackStatus)
657 : ANEURALNETWORKS_OP_FAILED;
658 }
659
660 // Copy the output data from shared memory to the output buffers.
661 // TODO: Move this block of code somewhere else. It should not be in the
662 // startCompute function.
663 // TODO: outputMemory->update(); outputMemory->commit()
664 for (auto& info : mOutputs) {
665 if (info.state == ModelArgumentInfo::POINTER) {
666 DataLocation& loc = info.locationAndLength;
667 uint8_t* data = nullptr;
668 int n = outputPointerArguments.getPointer(&data);
669 if (n != ANEURALNETWORKS_NO_ERROR) {
670 return n;
671 }
672 memcpy(info.buffer, data + loc.offset, loc.length);
673 }
674 }
675 VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
676
677 *synchronizationCallback = executionCallback;
678 return ANEURALNETWORKS_NO_ERROR;
679 }
680
asyncStartComputeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const sp<IExecutionCallback> & executionCallback)681 static void asyncStartComputeOnCpu(const Model& model, const Request& request,
682 const std::vector<RunTimePoolInfo>& modelPoolInfos,
683 const std::vector<RunTimePoolInfo>& requestPoolInfos,
684 const sp<IExecutionCallback>& executionCallback) {
685 CpuExecutor executor;
686 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
687 executionCallback->notify(convertResultCodeToErrorStatus(err));
688 }
689
startComputeOnCpu(sp<ExecutionCallback> * synchronizationCallback)690 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
691 // TODO: use a thread pool
692
693 Model model;
694 mModel->setHidlModel(&model);
695
696 // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
697 // object is returned when the execution has been successfully launched,
698 // otherwise a nullptr is returned. The executionCallback is abstracted in
699 // the NN API as an "event".
700 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
701 *synchronizationCallback = nullptr;
702
703 std::vector<RunTimePoolInfo> modelPoolInfos;
704 if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
705 return ANEURALNETWORKS_UNMAPPABLE;
706 }
707
708 std::vector<RunTimePoolInfo> requestPoolInfos;
709 requestPoolInfos.reserve(mMemories.size());
710 bool fail = false;
711 for (const Memory* mem : mMemories) {
712 requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail);
713 }
714 if (fail) {
715 return ANEURALNETWORKS_UNMAPPABLE;
716 }
717 // Create as many pools as there are input / output.
718 auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
719 for (ModelArgumentInfo& argumentInfo : argumentInfos) {
720 if (argumentInfo.state == ModelArgumentInfo::POINTER) {
721 argumentInfo.locationAndLength.poolIndex =
722 static_cast<uint32_t>(requestPoolInfos.size());
723 argumentInfo.locationAndLength.offset = 0;
724 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer));
725 }
726 }
727 };
728 fixPointerArguments(mInputs);
729 fixPointerArguments(mOutputs);
730
731 Request request;
732 setRequestArgumentArray(mInputs, &request.inputs);
733 setRequestArgumentArray(mOutputs, &request.outputs);
734
735 // TODO: should model be moved with a std::cref?
736 std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
737 std::move(modelPoolInfos), std::move(requestPoolInfos),
738 executionCallback);
739 executionCallback->bind_thread(std::move(thread));
740
741 *synchronizationCallback = executionCallback;
742 return ANEURALNETWORKS_NO_ERROR;
743 }
744
745 } // namespace nn
746 } // namespace android
747