1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "ExecutionBuilder"
18 
19 #include "ExecutionBuilder.h"
20 
21 #include "CompilationBuilder.h"
22 #include "CpuExecutor.h"
23 #include "HalInterfaces.h"
24 #include "Manager.h"
25 #include "ModelBuilder.h"
26 #include "Utils.h"
27 
28 #include <mutex>
29 #include <thread>
30 #include <vector>
31 
32 namespace android {
33 namespace nn {
34 
setFromPointer(const Operand & operand,const ANeuralNetworksOperandType * type,void * data,uint32_t length)35 int ModelArgumentInfo::setFromPointer(const Operand& operand,
36                                       const ANeuralNetworksOperandType* type, void* data,
37                                       uint32_t length) {
38     if ((data == nullptr) != (length == 0)) {
39         const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR";
40         LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = "
41                    << dataPtrMsg << ", length = " << length << ")";
42         return ANEURALNETWORKS_BAD_DATA;
43     }
44     if (data == nullptr) {
45         state = ModelArgumentInfo::HAS_NO_VALUE;
46     } else {
47         int n = updateDimensionInfo(operand, type);
48         if (n != ANEURALNETWORKS_NO_ERROR) {
49             return n;
50         }
51         uint32_t neededLength = sizeOfData(operand.type, dimensions);
52         if (operand.type != OperandType::OEM && neededLength != length) {
53             LOG(ERROR) << "Setting argument with invalid length: " << length
54                        << ", expected length: " << neededLength;
55             return ANEURALNETWORKS_BAD_DATA;
56         }
57         state = ModelArgumentInfo::POINTER;
58     }
59     buffer = data;
60     locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
61     return ANEURALNETWORKS_NO_ERROR;
62 }
63 
setFromMemory(const Operand & operand,const ANeuralNetworksOperandType * type,uint32_t poolIndex,uint32_t offset,uint32_t length)64 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
65                                      uint32_t poolIndex, uint32_t offset, uint32_t length) {
66     int n = updateDimensionInfo(operand, type);
67     if (n != ANEURALNETWORKS_NO_ERROR) {
68         return n;
69     }
70     uint32_t neededLength = sizeOfData(operand.type, dimensions);
71     if (operand.type != OperandType::OEM && neededLength != length) {
72         LOG(ERROR) << "Setting argument with invalid length: " << length
73                    << ", expected length: " << neededLength;
74         return ANEURALNETWORKS_BAD_DATA;
75     }
76 
77     state = ModelArgumentInfo::MEMORY;
78     locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
79     buffer = nullptr;
80     return ANEURALNETWORKS_NO_ERROR;
81 }
82 
setFromTemporaryMemory(const Operand & operand,uint32_t poolIndex,uint32_t offset)83 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand,
84                                               uint32_t poolIndex, uint32_t offset) {
85     int n = updateDimensionInfo(operand, nullptr);
86     if (n != ANEURALNETWORKS_NO_ERROR) {
87         return n;
88     }
89     state = ModelArgumentInfo::MEMORY;
90     locationAndLength =
91             {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
92     buffer = nullptr;
93     return ANEURALNETWORKS_NO_ERROR;
94 }
95 
updateDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType)96 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
97                                            const ANeuralNetworksOperandType* newType) {
98     nnAssert(dimensions.empty());
99     if (newType == nullptr) {
100         for (auto i : operand.dimensions) {
101             if (i == 0) {
102                 LOG(ERROR) << "Setting input/output with unspecified dimensions";
103                 return ANEURALNETWORKS_BAD_DATA;
104             }
105         }
106         dimensions = operand.dimensions;
107     } else {
108         uint32_t count = newType->dimensionCount;
109         if (static_cast<OperandType>(newType->type) != operand.type ||
110             count != operand.dimensions.size()) {
111             LOG(ERROR) << "Setting input/output with incompatible types";
112             return ANEURALNETWORKS_BAD_DATA;
113         }
114 
115         dimensions = hidl_vec<uint32_t>(count);
116         for (uint32_t i = 0; i < count; i++) {
117             if (operand.dimensions[i] != 0 && operand.dimensions[i] != newType->dimensions[i]) {
118                 LOG(ERROR) << "Overriding a fully specified dimension is disallowed";
119                 return ANEURALNETWORKS_BAD_DATA;
120             } else {
121                 dimensions[i] = newType->dimensions[i];
122             }
123         }
124     }
125     return ANEURALNETWORKS_NO_ERROR;
126 }
127 
ExecutionBuilder(const CompilationBuilder * compilation)128 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
129         mModel(compilation->mModel),
130         mPlan(&compilation->mPlan),
131         mPartitioning(compilation->mPartitioning),
132         mInputs(mModel->inputCount()),
133         mOutputs(mModel->outputCount()) {
134     VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
135 }
136 
setInput(uint32_t index,const ANeuralNetworksOperandType * type,const void * buffer,size_t length)137 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
138                                const void* buffer, size_t length) {
139     uint32_t count = static_cast<uint32_t>(mInputs.size());
140     if (index >= count) {
141         LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
142         return ANEURALNETWORKS_BAD_DATA;
143     }
144     if (type != nullptr) {
145         int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false);
146         if (n != ANEURALNETWORKS_NO_ERROR) {
147             return n;
148         }
149     }
150     if (length > 0xFFFFFFFF) {
151         LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
152         return ANEURALNETWORKS_BAD_DATA;
153     }
154     uint32_t l = static_cast<uint32_t>(length);
155     return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
156                                          const_cast<void*>(buffer), l);
157 }
158 
setInputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)159 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
160                                          const Memory* memory, size_t offset, size_t length) {
161     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
162 
163     uint32_t count = static_cast<uint32_t>(mInputs.size());
164     if (index >= count) {
165         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
166                    << count;
167         return ANEURALNETWORKS_BAD_DATA;
168     }
169     if (!memory->validateSize(offset, length)) {
170         return ANEURALNETWORKS_BAD_DATA;
171     }
172     // TODO validate the rest
173     uint32_t poolIndex = mMemories.add(memory);
174     return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
175                                         length);
176 }
177 
setOutput(uint32_t index,const ANeuralNetworksOperandType * type,void * buffer,size_t length)178 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
179                                 size_t length) {
180     uint32_t count = static_cast<uint32_t>(mOutputs.size());
181     if (index >= count) {
182         LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
183         return ANEURALNETWORKS_BAD_DATA;
184     }
185     if (type != nullptr) {
186         int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false);
187         if (n != ANEURALNETWORKS_NO_ERROR) {
188             return n;
189         }
190     }
191     if (length > 0xFFFFFFFF) {
192         LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
193         return ANEURALNETWORKS_BAD_DATA;
194     }
195     uint32_t l = static_cast<uint32_t>(length);
196     return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
197 }
198 
setOutputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)199 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
200                                           const Memory* memory, size_t offset, size_t length) {
201     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
202 
203     uint32_t count = static_cast<uint32_t>(mOutputs.size());
204     if (index >= count) {
205         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
206                    << count;
207         return ANEURALNETWORKS_BAD_DATA;
208     }
209     if (!memory->validateSize(offset, length)) {
210         return ANEURALNETWORKS_BAD_DATA;
211     }
212     // TODO validate the rest
213     uint32_t poolIndex = mMemories.add(memory);
214     return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
215                                          length);
216 }
217 
218 // Attempt synchronous execution of full model on CPU.
219 // Ensure that executionCallback->notify() is called.
cpuFallbackFull(const ExecutionBuilder * executionBuilder,const sp<ExecutionCallback> & executionCallback)220 static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
221                             const sp<ExecutionCallback>& executionCallback) {
222     VLOG(EXECUTION) << "cpuFallbackFull";
223     StepExecutor executor(executionBuilder, executionBuilder->getModel(),
224                           nullptr /* no VersionedIDevice, so CPU */,
225                           nullptr /* no IPreparedModel */);
226     executor.mapInputsAndOutputsTrivially();
227     sp<ExecutionCallback> fallbackCallback;
228     int n = executor.startCompute(&fallbackCallback);
229     if (n != ANEURALNETWORKS_NO_ERROR) {
230         executionCallback->notify(convertResultCodeToErrorStatus(n));
231         return;
232     }
233     fallbackCallback->wait();
234     executionCallback->notify(fallbackCallback->getStatus());
235 }
236 
237 // Attempt synchronous execution on CPU.
238 // (1) First, attempt to execute this step on CPU.  If successful,
239 //     return true.  (Do not call executionCallback->notify().)
240 // (2) If unsuccessful, attempt to execute the full model on CPU,
241 //     ensure that executionCallback->notify() is called, and return
242 //     false.
cpuFallbackPartial(const ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,const sp<ExecutionCallback> & executionCallback)243 static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
244                                const ExecutionPlan* plan,
245                                std::shared_ptr<ExecutionPlan::Controller> controller,
246                                const sp<ExecutionCallback>& executionCallback) {
247     VLOG(EXECUTION) << "cpuFallbackPartial";
248     std::shared_ptr<StepExecutor> executor;
249     int n = plan->fallback(controller, &executor);
250     if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
251         cpuFallbackFull(executionBuilder, executionCallback);
252         return false;
253     }
254     sp<ExecutionCallback> fallbackCallback;
255     if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
256         cpuFallbackFull(executionBuilder, executionCallback);
257         return false;
258     }
259     fallbackCallback->wait();
260     if (fallbackCallback->getStatus() != ErrorStatus::NONE) {
261         cpuFallbackFull(executionBuilder, executionCallback);
262         return false;
263     }
264     return true;
265 }
266 
asyncStartComputePartitioned(const ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,bool allowFallback,const sp<ExecutionCallback> & executionCallback)267 static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder,
268                                          const ExecutionPlan* plan,
269                                          std::shared_ptr<ExecutionPlan::Controller> controller,
270                                          bool allowFallback,
271                                          const sp<ExecutionCallback>& executionCallback) {
272     VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)";
273     while (true) {
274         std::shared_ptr<StepExecutor> executor;
275         VLOG(EXECUTION) << "looking for next StepExecutor";
276         int n = plan->next(controller, &executor);
277         if (n != ANEURALNETWORKS_NO_ERROR) {
278             if (allowFallback) {
279                 cpuFallbackFull(executionBuilder, executionCallback);
280             } else {
281                 executionCallback->notify(convertResultCodeToErrorStatus(n));
282             }
283             return;
284         }
285         if (executor == nullptr) {
286             executionCallback->notify(ErrorStatus::NONE);
287             return;
288         }
289 
290         sp<ExecutionCallback> stepCallback;
291         n = executor->startCompute(&stepCallback);
292         if (n != ANEURALNETWORKS_NO_ERROR) {
293             if (allowFallback) {
294                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
295                     // Successfully executed one step on CPU.
296                     continue;
297                 } else {
298                     // Either successfully executed entire plan on
299                     // CPU, or tried and failed to do so.
300                     return;
301                 }
302             } else {
303                 executionCallback->notify(convertResultCodeToErrorStatus(n));
304                 return;
305             }
306         }
307         stepCallback->wait();
308         ErrorStatus status = stepCallback->getStatus();
309         if (status != ErrorStatus::NONE) {
310             if (allowFallback) {
311                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
312                     // Successfully executed one step on CPU.
313                     continue;
314                 } else {
315                     // Either successfully executed entire plan on
316                     // CPU, or tried and failed to do so.
317                     return;
318                 }
319             } else {
320                 executionCallback->notify(status);
321                 return;
322             }
323         }
324     }
325 }
326 
startCompute(sp<ExecutionCallback> * synchronizationCallback)327 int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
328     *synchronizationCallback = nullptr;
329 
330     // TODO validate that we have full types for all inputs and outputs,
331     // that the graph is not cyclic,
332 
333     for (auto& p : mInputs) {
334         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
335             LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
336             return ANEURALNETWORKS_BAD_DATA;
337         }
338     }
339     for (auto& p : mOutputs) {
340         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
341             LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
342             return ANEURALNETWORKS_BAD_DATA;
343         }
344     }
345 
346 #ifndef DISABLE_PARTITIONED_EXECUTION
347     {
348         // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
349         // with the compilation and execution phases of the NN API?  Or retain that path
350         // as a fallback in the case of partitioning failure?
351         //
352         // TODO: Entire plan-based-path should run in an asynchronous thread --
353         // take the asynchronous thread logic out of startComputeOnCpu() and use
354         // it to wrap the plan-based-path.
355         if (mPartitioning > 0) {
356             const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
357             std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
358             if (controller == nullptr) {
359                 if (!allowFallback) {
360                     return ANEURALNETWORKS_OP_FAILED;
361                 }
362             } else {
363                 // TODO: use a thread pool
364 
365                 // Prepare the callback for asynchronous execution.
366                 // sp<ExecutionCallback> object is returned when the
367                 // execution has been successfully launched, otherwise a
368                 // nullptr is returned.  The executionCallback is
369                 // abstracted in the NN API as an "event".
370                 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
371                 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller,
372                                    allowFallback,
373                                    executionCallback);
374                 executionCallback->bind_thread(std::move(thread));
375                 *synchronizationCallback = executionCallback;
376                 return ANEURALNETWORKS_NO_ERROR;
377             }
378         }
379     }
380 #else
381     {
382         // Find a driver that can handle all the operations.
383         // TODO: Does not handle CPU fallback (which is tricky because
384         //       StepExecutor::startCompute() is designed as
385         //       asynchronous).
386         // TODO: Does not actually behave asynchronously (because
387         //       StepExecutor::startCompute() isn't actually asynchronous
388         //       on a device as opposed to a CPU).
389         Model hidlModel;
390         mModel->setHidlModel(&hidlModel);
391         const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
392         for (const auto& device : devices) {
393             hidl_vec<bool> supports;
394             VLOG(EXECUTION) << "Checking " << device->getName();
395             device->getSupportedOperations(hidlModel, &supports);
396             if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
397                 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
398                 StepExecutor executor(this, mModel, device->getInterface(),
399                                       nullptr /* no IPreparedModel, so compile */);
400                 executor.mapInputsAndOutputsTrivially();
401                 return executor.startCompute(synchronizationCallback);
402             }
403         }
404     }
405 #endif  // DISABLE_PARTITIONED_EXECUTION
406 
407     // Run on the CPU.
408     VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU";
409     StepExecutor executor(this, mModel,
410                           nullptr /* no VersionedIDevice, so CPU */,
411                           nullptr /* no IPreparedModel */);
412     executor.mapInputsAndOutputsTrivially();
413     return executor.startCompute(synchronizationCallback);
414 }
415 
416 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
417 // it does not copy data.  Aligns each input a bit.
allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo> * args,Memory * memory)418 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
419                                                  Memory* memory) {
420     uint32_t nextPoolIndex = mMemories.size();
421     int64_t total = 0;
422     for (auto& info : *args) {
423         if (info.state == ModelArgumentInfo::POINTER) {
424             DataLocation& loc = info.locationAndLength;
425             // TODO Good enough alignment?
426             total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
427             loc.poolIndex = nextPoolIndex;
428             loc.offset = static_cast<uint32_t>(total);
429             total += loc.length;
430         }
431     };
432     if (total > 0xFFFFFFFF) {
433         LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
434                       "2^32.";
435         return ANEURALNETWORKS_BAD_DATA;
436     }
437     hidl_memory hidlMemory;
438     if (total > 0) {
439         memory->create(total);  // TODO check error
440         mMemories.add(memory);
441     }
442     return ANEURALNETWORKS_NO_ERROR;
443 }
444 
setRequestArgumentArray(const std::vector<ModelArgumentInfo> & argumentInfos,hidl_vec<RequestArgument> * ioInfos)445 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
446                                      hidl_vec<RequestArgument>* ioInfos) {
447     size_t count = argumentInfos.size();
448     ioInfos->resize(count);
449     for (size_t i = 0; i < count; i++) {
450         const auto& info = argumentInfos[i];
451         (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
452                           .location = info.locationAndLength,
453                           .dimensions = info.dimensions,
454                         };
455     }
456 }
457 
StepExecutor(const ExecutionBuilder * executionBuilder,const ModelBuilder * model,VersionedIDevice * driver,sp<IPreparedModel> preparedModel)458 StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder,
459                            const ModelBuilder* model,
460                            VersionedIDevice* driver, sp<IPreparedModel> preparedModel) :
461     mExecutionBuilder(executionBuilder), mModel(model),
462     mDriver(driver), mPreparedModel(preparedModel),
463     mInputs(model->inputCount()), mOutputs(model->outputCount()) {}
464 
mapInputsAndOutputsTrivially()465 void StepExecutor::mapInputsAndOutputsTrivially() {
466     mInputs = mExecutionBuilder->mInputs;
467     mOutputs = mExecutionBuilder->mOutputs;
468     mMemories = mExecutionBuilder->mMemories;
469 }
470 
mapInputOrOutput(const ModelArgumentInfo & builderInputOrOutput,ModelArgumentInfo * executorInputOrOutput)471 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
472                                     ModelArgumentInfo* executorInputOrOutput) {
473     *executorInputOrOutput = builderInputOrOutput;
474     switch (executorInputOrOutput->state) {
475         default:
476             nnAssert(!"unexpected ModelArgumentInfo::state");
477         case ModelArgumentInfo::POINTER:
478         case ModelArgumentInfo::UNSPECIFIED:
479             break;
480         case ModelArgumentInfo::MEMORY: {
481             const uint32_t builderPoolIndex =
482                     builderInputOrOutput.locationAndLength.poolIndex;
483             const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
484             const uint32_t executorPoolIndex = mMemories.add(memory);
485             executorInputOrOutput->locationAndLength.poolIndex =
486                     executorPoolIndex;
487             break;
488         }
489     }
490 }
491 
setInputOrOutputFromTemporaryMemory(const Operand & inputOrOutputOperand,const Memory * memory,uint32_t offset,ModelArgumentInfo * inputOrOutputInfo)492 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
493                                                       const Memory* memory, uint32_t offset,
494                                                       ModelArgumentInfo* inputOrOutputInfo) {
495     // Should be similar to
496     //     ExecutionBuilder::setInputFromMemory()
497     //     ExecutionBuilder::setOutputFromMemory()
498 
499     uint32_t poolIndex = mMemories.add(memory);
500     return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset);
501 }
502 
logArguments(const char * kind,const std::vector<ModelArgumentInfo> & args)503 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) {
504     for (unsigned i = 0; i < args.size(); i++) {
505         const auto& arg = args[i];
506         std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
507         switch (arg.state) {
508             case ModelArgumentInfo::POINTER:
509                 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")";
510                 break;
511             case ModelArgumentInfo::MEMORY:
512                 VLOG(EXECUTION) << prefix << "MEMORY("
513                                 << "pool=" << arg.locationAndLength.poolIndex
514                                 << ", "
515                                 << "off=" << arg.locationAndLength.offset
516                                 << ")";
517                 break;
518             case ModelArgumentInfo::HAS_NO_VALUE:
519                 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
520                 break;
521             case ModelArgumentInfo::UNSPECIFIED:
522                 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
523                 break;
524             default:
525                 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
526                 break;
527         }
528     }
529 }
530 
startCompute(sp<ExecutionCallback> * synchronizationCallback)531 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
532     if (VLOG_IS_ON(EXECUTION)) {
533         logArguments("input", mInputs);
534         logArguments("output", mOutputs);
535     }
536     if (mDriver == nullptr) {
537         return startComputeOnCpu(synchronizationCallback);
538     } else {
539         return startComputeOnDevice(synchronizationCallback);
540     }
541 }
542 
startComputeOnDevice(sp<ExecutionCallback> * synchronizationCallback)543 int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) {
544     nnAssert(mDriver != nullptr);
545 
546     *synchronizationCallback = nullptr;
547 
548     // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
549     // ExecutionPlan with the compilation and execution phases of the NN API
550     if (mPreparedModel == nullptr) {
551         Model model;
552         mModel->setHidlModel(&model);
553 
554         // TODO Dangerous!  In async, the model will outlive it here. Safe for now
555         sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
556         // TODO(butlermichael): Propagate user preference to this point instead of
557         // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or
558         // remove this entire block of code since it is a stale path that is only
559         // encountered on an #if-removed code.
560         ExecutionPreference preference =
561             static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
562         ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preference,
563                                                                 preparedModelCallback);
564         if (prepareLaunchStatus != ErrorStatus::NONE) {
565             return convertErrorStatusToResultCode(prepareLaunchStatus);
566         }
567 
568         // Immediately synchronize with callback object for now
569         // TODO: change to asynchronous later
570         preparedModelCallback->wait();
571         ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
572         mPreparedModel = preparedModelCallback->getPreparedModel();
573         if (prepareReturnStatus != ErrorStatus::NONE) {
574             return convertErrorStatusToResultCode(prepareReturnStatus);
575         }
576         if (mPreparedModel == nullptr) {
577             return ANEURALNETWORKS_OP_FAILED;
578         }
579     }
580 
581     // We separate the input & output pools so that we reduce the copying done if we
582     // do an eventual remoting (hidl_memory->update()).  We could also use it to set
583     // protection on read only memory but that's not currently done.
584     Memory inputPointerArguments;
585     Memory outputPointerArguments;
586 
587     // Layout the input and output data
588     int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
589     if (n != ANEURALNETWORKS_NO_ERROR) {
590         return n;
591     }
592     n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
593     if (n != ANEURALNETWORKS_NO_ERROR) {
594         return n;
595     }
596 
597     // Copy the input data that was specified via a pointer.
598     // inputPointerArguments.update();
599     for (auto& info : mInputs) {
600         if (info.state == ModelArgumentInfo::POINTER) {
601             DataLocation& loc = info.locationAndLength;
602             uint8_t* data = nullptr;
603             int n = inputPointerArguments.getPointer(&data);
604             if (n != ANEURALNETWORKS_NO_ERROR) {
605                 return n;
606             }
607             memcpy(data + loc.offset, info.buffer, loc.length);
608         }
609     }
610     // TODO: Add inputPointerArguments.commit() and .update() at all the right places
611 
612     Request request;
613     setRequestArgumentArray(mInputs, &request.inputs);
614     setRequestArgumentArray(mOutputs, &request.outputs);
615     uint32_t count = mMemories.size();
616     request.pools.resize(count);
617     for (uint32_t i = 0; i < count; i++) {
618         request.pools[i] = mMemories[i]->getHidlMemory();
619     }
620 
621     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
622     // object is returned when the execution has been successfully launched,
623     // otherwise a nullptr is returned. The executionCallback is abstracted in
624     // the NN API as an "event".
625     //
626     // The sp is used for ref-counting purposes. Without it, the HIDL service
627     // could attempt to communicate with a dead callback object.
628     //
629     // TODO: Explain the "dead callback" problem further, either here or
630     // in the design document.
631     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
632 
633     VLOG(EXECUTION) << "Before mPreparedModel->execute() " << SHOW_IF_DEBUG(toString(request));
634     // Execute.
635     // TODO: What happens to the Callback if the service dies abnormally
636     // -- won't that keep the Callback live forever, because the service
637     // never has the opportunity to bump the reference count down? Or
638     // maybe the HIDL infrastructure handles this magically? At worst,
639     // it seems like this is a small memory leak, if the Callback stays
640     // alive forever.
641     Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback);
642     if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
643         VLOG(EXECUTION) << "**Execute failed**";
644         return executeStatus.isOk()
645                 ? convertErrorStatusToResultCode(executeStatus)
646                 : ANEURALNETWORKS_OP_FAILED;
647     }
648 
649     // TODO: Remove this synchronization point when the block of code below is
650     // removed.
651     executionCallback->wait();
652     Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
653     if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
654         VLOG(EXECUTION) << "**Execute async failed**";
655         return callbackStatus.isOk()
656                 ? convertErrorStatusToResultCode(callbackStatus)
657                 : ANEURALNETWORKS_OP_FAILED;
658     }
659 
660     // Copy the output data from shared memory to the output buffers.
661     // TODO: Move this block of code somewhere else. It should not be in the
662     // startCompute function.
663     // TODO: outputMemory->update(); outputMemory->commit()
664     for (auto& info : mOutputs) {
665         if (info.state == ModelArgumentInfo::POINTER) {
666             DataLocation& loc = info.locationAndLength;
667             uint8_t* data = nullptr;
668             int n = outputPointerArguments.getPointer(&data);
669             if (n != ANEURALNETWORKS_NO_ERROR) {
670                 return n;
671             }
672             memcpy(info.buffer, data + loc.offset, loc.length);
673         }
674     }
675     VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
676 
677     *synchronizationCallback = executionCallback;
678     return ANEURALNETWORKS_NO_ERROR;
679 }
680 
asyncStartComputeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const sp<IExecutionCallback> & executionCallback)681 static void asyncStartComputeOnCpu(const Model& model, const Request& request,
682                                    const std::vector<RunTimePoolInfo>& modelPoolInfos,
683                                    const std::vector<RunTimePoolInfo>& requestPoolInfos,
684                                    const sp<IExecutionCallback>& executionCallback) {
685     CpuExecutor executor;
686     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
687     executionCallback->notify(convertResultCodeToErrorStatus(err));
688 }
689 
startComputeOnCpu(sp<ExecutionCallback> * synchronizationCallback)690 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
691     // TODO: use a thread pool
692 
693     Model model;
694     mModel->setHidlModel(&model);
695 
696     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
697     // object is returned when the execution has been successfully launched,
698     // otherwise a nullptr is returned. The executionCallback is abstracted in
699     // the NN API as an "event".
700     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
701     *synchronizationCallback = nullptr;
702 
703     std::vector<RunTimePoolInfo> modelPoolInfos;
704     if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
705         return ANEURALNETWORKS_UNMAPPABLE;
706     }
707 
708     std::vector<RunTimePoolInfo> requestPoolInfos;
709     requestPoolInfos.reserve(mMemories.size());
710     bool fail = false;
711     for (const Memory* mem : mMemories) {
712         requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail);
713     }
714     if (fail) {
715         return ANEURALNETWORKS_UNMAPPABLE;
716     }
717     // Create as many pools as there are input / output.
718     auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
719         for (ModelArgumentInfo& argumentInfo : argumentInfos) {
720             if (argumentInfo.state == ModelArgumentInfo::POINTER) {
721                 argumentInfo.locationAndLength.poolIndex =
722                             static_cast<uint32_t>(requestPoolInfos.size());
723                 argumentInfo.locationAndLength.offset = 0;
724                 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer));
725             }
726         }
727     };
728     fixPointerArguments(mInputs);
729     fixPointerArguments(mOutputs);
730 
731     Request request;
732     setRequestArgumentArray(mInputs, &request.inputs);
733     setRequestArgumentArray(mOutputs, &request.outputs);
734 
735     // TODO: should model be moved with a std::cref?
736     std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
737                        std::move(modelPoolInfos), std::move(requestPoolInfos),
738                        executionCallback);
739     executionCallback->bind_thread(std::move(thread));
740 
741     *synchronizationCallback = executionCallback;
742     return ANEURALNETWORKS_NO_ERROR;
743 }
744 
745 }  // namespace nn
746 }  // namespace android
747