1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "ExecutionBuilder"
18 
19 #include "ExecutionBuilder.h"
20 
21 #include "CompilationBuilder.h"
22 #include "CpuExecutor.h"
23 #include "ExecutionBurstController.h"
24 #include "HalInterfaces.h"
25 #include "Manager.h"
26 #include "ModelBuilder.h"
27 #include "Tracing.h"
28 #include "TypeManager.h"
29 #include "Utils.h"
30 
31 #include <mutex>
32 #include <optional>
33 #include <thread>
34 #include <vector>
35 
36 namespace android {
37 namespace nn {
38 
39 using HidlToken = hidl_array<uint8_t, ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN>;
40 
41 const Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
42 
measureTiming(const ExecutionBuilder * execution)43 static MeasureTiming measureTiming(const ExecutionBuilder* execution) {
44     return execution->measureTiming() ? MeasureTiming::YES : MeasureTiming::NO;
45 }
46 
checkDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType,const char * tag,bool allowUnspecified)47 static bool checkDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType,
48                                const char* tag, bool allowUnspecified) {
49     if (newType != nullptr) {
50         const Extension::OperandTypeInformation* info = nullptr;
51         if (isExtensionOperandType(operand.type)) {
52             NN_RET_CHECK(TypeManager::get()->getExtensionOperandTypeInfo(operand.type, &info));
53         }
54         if (validateOperandType(*newType, info, tag, allowUnspecified) !=
55             ANEURALNETWORKS_NO_ERROR) {
56             LOG(ERROR) << tag << ": Invalid newType";
57             return false;
58         }
59         if (operand.dimensions.size() == 0) {
60             return true;
61         }
62         if (operand.dimensions.size() != newType->dimensionCount) {
63             LOG(ERROR) << tag << ": Setting with incompatible dimension count";
64             return false;
65         }
66         for (uint32_t i = 0; i < newType->dimensionCount; i++) {
67             if (operand.dimensions[i] != newType->dimensions[i] && operand.dimensions[i] != 0) {
68                 LOG(ERROR) << tag << ": Overriding a fully specified dimension is disallowed";
69                 return false;
70             }
71         }
72     } else {
73         if (!allowUnspecified && TypeManager::get()->isTensorType(operand.type) &&
74             tensorHasUnspecifiedDimensions(operand)) {
75             LOG(ERROR) << tag << ": Setting with operand type that is not fully specified";
76             return false;
77         }
78     }
79     return true;
80 }
81 
setFromPointer(const Operand & operand,const ANeuralNetworksOperandType * type,void * data,uint32_t length)82 int ModelArgumentInfo::setFromPointer(const Operand& operand,
83                                       const ANeuralNetworksOperandType* type, void* data,
84                                       uint32_t length) {
85     if ((data == nullptr) != (length == 0)) {
86         const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR";
87         LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = "
88                    << dataPtrMsg << ", length = " << length << ")";
89         return ANEURALNETWORKS_BAD_DATA;
90     }
91     if (data == nullptr) {
92         state = ModelArgumentInfo::HAS_NO_VALUE;
93     } else {
94         NN_RETURN_IF_ERROR(updateDimensionInfo(operand, type));
95         if (operand.type != OperandType::OEM) {
96             uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
97             if (neededLength != length && neededLength != 0) {
98                 LOG(ERROR) << "Setting argument with invalid length: " << length
99                            << ", expected length: " << neededLength;
100                 return ANEURALNETWORKS_BAD_DATA;
101             }
102         }
103         state = ModelArgumentInfo::POINTER;
104     }
105     buffer = data;
106     locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
107     return ANEURALNETWORKS_NO_ERROR;
108 }
109 
setFromMemory(const Operand & operand,const ANeuralNetworksOperandType * type,uint32_t poolIndex,uint32_t offset,uint32_t length)110 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
111                                      uint32_t poolIndex, uint32_t offset, uint32_t length) {
112     NN_RETURN_IF_ERROR(updateDimensionInfo(operand, type));
113     if (operand.type != OperandType::OEM) {
114         uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
115         if (neededLength != length && neededLength != 0) {
116             LOG(ERROR) << "Setting argument with invalid length: " << length
117                        << ", expected length: " << neededLength;
118             return ANEURALNETWORKS_BAD_DATA;
119         }
120     }
121 
122     state = ModelArgumentInfo::MEMORY;
123     locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
124     buffer = nullptr;
125     return ANEURALNETWORKS_NO_ERROR;
126 }
127 
setFromTemporaryMemory(const Operand & operand,uint32_t poolIndex,uint32_t offset,uint32_t length)128 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex,
129                                               uint32_t offset, uint32_t length) {
130     NN_RETURN_IF_ERROR(updateDimensionInfo(operand, nullptr));
131     if (operand.type != OperandType::OEM) {
132         uint32_t neededLength = TypeManager::get()->getSizeOfData(operand.type, dimensions);
133         if (neededLength != length) {
134             LOG(ERROR) << "Setting argument with invalid length: " << length
135                        << ", expected length: " << neededLength;
136             return ANEURALNETWORKS_BAD_DATA;
137         }
138     }
139 
140     state = ModelArgumentInfo::MEMORY;
141     locationAndLength = {
142             .poolIndex = poolIndex,
143             .offset = offset,
144             .length = length,
145     };
146     buffer = nullptr;
147     return ANEURALNETWORKS_NO_ERROR;
148 }
149 
updateDimensionInfo(const Operand & operand,const ANeuralNetworksOperandType * newType)150 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
151                                            const ANeuralNetworksOperandType* newType) {
152     if (newType == nullptr) {
153         dimensions = operand.dimensions;
154     } else {
155         const uint32_t count = newType->dimensionCount;
156         dimensions = hidl_vec<uint32_t>(count);
157         std::copy(&newType->dimensions[0], &newType->dimensions[count], dimensions.begin());
158     }
159     return ANEURALNETWORKS_NO_ERROR;
160 }
161 
ExecutionBuilder(const CompilationBuilder * compilation)162 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation)
163     : mCompilation(compilation),
164       mModel(compilation->mModel),
165       mPlan(&compilation->mPlan),
166       mPartitioning(compilation->mPartitioning),
167       mInputs(mModel->inputCount()),
168       mOutputs(mModel->outputCount()) {
169     VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
170 }
171 
setInput(uint32_t index,const ANeuralNetworksOperandType * type,const void * buffer,size_t length)172 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
173                                const void* buffer, size_t length) {
174     if (mStarted) {
175         LOG(ERROR) << "ANeuralNetworksExecution_setInput called after the "
176                       "execution has started.";
177         return ANEURALNETWORKS_BAD_STATE;
178     }
179     uint32_t count = static_cast<uint32_t>(mInputs.size());
180     if (index >= count) {
181         LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
182         return ANEURALNETWORKS_BAD_DATA;
183     }
184     if (!checkDimensionInfo(mModel->getInputOperand(index), type,
185                             "ANeuralNetworksExecution_setInput", buffer == nullptr)) {
186         return ANEURALNETWORKS_BAD_DATA;
187     }
188     if (length > 0xFFFFFFFF) {
189         LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
190         return ANEURALNETWORKS_BAD_DATA;
191     }
192     uint32_t l = static_cast<uint32_t>(length);
193     return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
194                                          const_cast<void*>(buffer), l);
195 }
196 
setInputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)197 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
198                                          const Memory* memory, size_t offset, size_t length) {
199     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
200 
201     if (mStarted) {
202         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory called after the "
203                       "execution has started.";
204         return ANEURALNETWORKS_BAD_STATE;
205     }
206     uint32_t count = static_cast<uint32_t>(mInputs.size());
207     if (index >= count) {
208         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
209                    << count;
210         return ANEURALNETWORKS_BAD_DATA;
211     }
212     if (!checkDimensionInfo(mModel->getInputOperand(index), type,
213                             "ANeuralNetworksExecution_setInputFromMemory", false)) {
214         return ANEURALNETWORKS_BAD_DATA;
215     }
216     // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
217     if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
218         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory has non-zero offset and length"
219                    << " for Non-BLOB format AHardwareBuffer.";
220         return ANEURALNETWORKS_BAD_DATA;
221     } else if (!memory->validateSize(offset, length)) {
222         return ANEURALNETWORKS_BAD_DATA;
223     }
224     // TODO validate the rest
225     uint32_t poolIndex = mMemories.add(memory);
226     return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
227                                         length);
228 }
229 
setOutput(uint32_t index,const ANeuralNetworksOperandType * type,void * buffer,size_t length)230 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type,
231                                 void* buffer, size_t length) {
232     if (mStarted) {
233         LOG(ERROR) << "ANeuralNetworksExecution_setOutput called after the "
234                       "execution has started.";
235         return ANEURALNETWORKS_BAD_STATE;
236     }
237     uint32_t count = static_cast<uint32_t>(mOutputs.size());
238     if (index >= count) {
239         LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
240         return ANEURALNETWORKS_BAD_DATA;
241     }
242     if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
243                             "ANeuralNetworksExecution_setOutput", true)) {
244         return ANEURALNETWORKS_BAD_DATA;
245     }
246     if (length > 0xFFFFFFFF) {
247         LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
248         return ANEURALNETWORKS_BAD_DATA;
249     }
250     uint32_t l = static_cast<uint32_t>(length);
251     return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
252 }
253 
setOutputFromMemory(uint32_t index,const ANeuralNetworksOperandType * type,const Memory * memory,size_t offset,size_t length)254 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
255                                           const Memory* memory, size_t offset, size_t length) {
256     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
257 
258     if (mStarted) {
259         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory called after the "
260                       "execution has started.";
261         return ANEURALNETWORKS_BAD_STATE;
262     }
263     uint32_t count = static_cast<uint32_t>(mOutputs.size());
264     if (index >= count) {
265         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
266                    << count;
267         return ANEURALNETWORKS_BAD_DATA;
268     }
269     if (!checkDimensionInfo(mModel->getOutputOperand(index), type,
270                             "ANeuralNetworksExecution_setOutputFromMemory", true)) {
271         return ANEURALNETWORKS_BAD_DATA;
272     }
273     // Both offset & length must be zero for Non-BLOB format AHardwareBuffer.
274     if (memory->getHidlMemory().name() == "hardware_buffer" && (offset != 0 || length != 0)) {
275         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory has non-zero offset and length"
276                    << " for Non-BLOB format AHardwareBuffer.";
277         return ANEURALNETWORKS_BAD_DATA;
278     } else if (!memory->validateSize(offset, length)) {
279         return ANEURALNETWORKS_BAD_DATA;
280     }
281     // TODO validate the rest
282     uint32_t poolIndex = mMemories.add(memory);
283     return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
284                                          length);
285 }
286 
setMeasureTiming(bool measure)287 int ExecutionBuilder::setMeasureTiming(bool measure) {
288     if (!mCompilation->mExplicitDeviceList || (mCompilation->mDevices.size() != 1)) {
289         LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called on "
290                    << "an ANeuralNetworksExecution created from an ANeuralNetworksCompilation "
291                    << "that was not created by ANeuralNetworksCompilation_createForDevices "
292                    << "with numDevices = 1";
293         return ANEURALNETWORKS_BAD_DATA;
294     }
295     if (mStarted) {
296         LOG(ERROR) << "ANeuralNetworksExecution_setMeasureTiming called after the "
297                       "execution has started.";
298         return ANEURALNETWORKS_BAD_STATE;
299     }
300     mMeasureTiming = measure;
301     return ANEURALNETWORKS_NO_ERROR;
302 }
303 
getDuration(int32_t durationCode,uint64_t * duration) const304 int ExecutionBuilder::getDuration(int32_t durationCode, uint64_t* duration) const {
305     if (!mFinished) {
306         LOG(ERROR) << "ANeuralNetworksExecution_getDuration called before the "
307                       "execution has finished.";
308         return ANEURALNETWORKS_BAD_STATE;
309     }
310 
311     // NOTE: At the HAL level, timing is in microseconds. At the NDK level, nanoseconds.
312     const uint64_t kNanoPerMicro = 1000;
313 
314     if (!mMeasureTiming) {
315         *duration = UINT64_MAX;
316         return ANEURALNETWORKS_BAD_STATE;
317     }
318 
319     uint64_t microDuration = UINT64_MAX;
320     switch (durationCode) {
321         case ANEURALNETWORKS_DURATION_ON_HARDWARE:
322             microDuration = mTiming.timeOnDevice;
323             break;
324         case ANEURALNETWORKS_DURATION_IN_DRIVER:
325             microDuration = mTiming.timeInDriver;
326             break;
327         default:
328             CHECK(!"unexpected");
329     }
330     *duration = (microDuration == UINT64_MAX) ? UINT64_MAX : kNanoPerMicro * microDuration;
331 
332     VLOG(EXECUTION) << "getDuration(" << durationCode << "): " << *duration;
333     return ANEURALNETWORKS_NO_ERROR;
334 }
335 
getOutputOperandDimensions(uint32_t index,uint32_t * dimensions)336 int ExecutionBuilder::getOutputOperandDimensions(uint32_t index, uint32_t* dimensions) {
337     if (!mFinished) {
338         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions called before the "
339                       "execution has finished.";
340         return ANEURALNETWORKS_BAD_STATE;
341     }
342     uint32_t count = static_cast<uint32_t>(mOutputs.size());
343     if (index >= count) {
344         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions bad index " << index
345                    << " " << count;
346         return ANEURALNETWORKS_BAD_DATA;
347     }
348     const auto& dims = mOutputs[index].dimensions;
349     if (dims.empty()) {
350         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandDimensions can not query "
351                       "dimensions of a scalar";
352         return ANEURALNETWORKS_BAD_DATA;
353     }
354     std::copy(dims.begin(), dims.end(), dimensions);
355     return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
356                                         : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
357 }
358 
getOutputOperandRank(uint32_t index,uint32_t * rank)359 int ExecutionBuilder::getOutputOperandRank(uint32_t index, uint32_t* rank) {
360     if (!mFinished) {
361         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank called before the "
362                       "execution has finished.";
363         return ANEURALNETWORKS_BAD_STATE;
364     }
365     uint32_t count = static_cast<uint32_t>(mOutputs.size());
366     if (index >= count) {
367         LOG(ERROR) << "ANeuralNetworksExecution_getOutputOperandRank bad index " << index << " "
368                    << count;
369         return ANEURALNETWORKS_BAD_DATA;
370     }
371     *rank = static_cast<uint32_t>(mOutputs[index].dimensions.size());
372     return mOutputs[index].isSufficient ? ANEURALNETWORKS_NO_ERROR
373                                         : ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE;
374 }
375 
376 // Attempt synchronous execution of full model on CPU.
377 // Ensure that executionCallback->notify() is called.
378 // TODO: How should we handle timing in this case?
379 //       For Q this is irrelevant: We only support timing in conjunction
380 //         with an explicit device list; and we do not support CPU fallback
381 //         with an explicit device list.  See CompilationBuilder::mExplicitDeviceList.
cpuFallbackFull(ExecutionBuilder * executionBuilder,const sp<ExecutionCallback> & executionCallback)382 static void cpuFallbackFull(ExecutionBuilder* executionBuilder,
383                             const sp<ExecutionCallback>& executionCallback) {
384     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackFull");
385     VLOG(EXECUTION) << "cpuFallbackFull";
386     StepExecutor executor(executionBuilder, executionBuilder->getModel(),
387                           DeviceManager::getCpuDevice(), /*preparedModel=*/nullptr);
388     executor.mapInputsAndOutputsTrivially();
389     sp<ExecutionCallback> fallbackCallback;
390     int n = executor.startCompute(&fallbackCallback);
391     if (n != ANEURALNETWORKS_NO_ERROR) {
392         executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
393         return;
394     }
395     fallbackCallback->wait();
396     executionCallback->notify(fallbackCallback->getStatus(), fallbackCallback->getOutputShapes(),
397                               fallbackCallback->getTiming());
398 }
399 
400 // Attempt synchronous execution on CPU.
401 // (1) First, attempt to execute this step on CPU.  If successful,
402 //     return true.  (Do not call executionCallback->notify().)
403 // (2) If unsuccessful, attempt to execute the full model on CPU,
404 //     ensure that executionCallback->notify() is called, and return
405 //     false.
406 // TODO: How should we handle timing in this case?
407 //       For Q this is irrelevant: We only support timing in conjunction
408 //         with an explicit device list; and we do not support CPU fallback
409 //         with an explicit device list.  See CompilationBuilder::mExplicitDeviceList.
cpuFallbackPartial(ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,const sp<ExecutionCallback> & executionCallback,std::vector<OutputShape> * outputShapes)410 static bool cpuFallbackPartial(ExecutionBuilder* executionBuilder, const ExecutionPlan* plan,
411                                std::shared_ptr<ExecutionPlan::Controller> controller,
412                                const sp<ExecutionCallback>& executionCallback,
413                                std::vector<OutputShape>* outputShapes) {
414     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "cpuFallbackPartial");
415     VLOG(EXECUTION) << "cpuFallbackPartial";
416     std::shared_ptr<StepExecutor> executor;
417     int n = plan->fallback(controller, &executor);
418     if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
419         cpuFallbackFull(executionBuilder, executionCallback);
420         return false;
421     }
422     sp<ExecutionCallback> fallbackCallback;
423     if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
424         cpuFallbackFull(executionBuilder, executionCallback);
425         return false;
426     }
427     fallbackCallback->wait();
428     ErrorStatus status = fallbackCallback->getStatus();
429     const auto& stepOutputShapes = fallbackCallback->getOutputShapes();
430     if (!executor->updateOutputShapes(stepOutputShapes, outputShapes)) {
431         status = ErrorStatus::GENERAL_FAILURE;
432     }
433     if (status != ErrorStatus::NONE) {
434         // OUTPUT_INSUFFICIENT_SIZE is not recoverable
435         if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
436             executionCallback->notify(status, *outputShapes, kNoTiming);
437         } else {
438             cpuFallbackFull(executionBuilder, executionCallback);
439         }
440         return false;
441     }
442     return true;
443 }
444 
asyncStartComputePartitioned(ExecutionBuilder * executionBuilder,const ExecutionPlan * plan,std::shared_ptr<ExecutionPlan::Controller> controller,bool allowFallback,const sp<ExecutionCallback> & executionCallback)445 static void asyncStartComputePartitioned(ExecutionBuilder* executionBuilder,
446                                          const ExecutionPlan* plan,
447                                          std::shared_ptr<ExecutionPlan::Controller> controller,
448                                          bool allowFallback,
449                                          const sp<ExecutionCallback>& executionCallback) {
450     VLOG(EXECUTION) << "ExecutionBuilder::compute (from plan, iteratively)";
451     std::vector<OutputShape> outputShapes;
452     Timing timing = kNoTiming;
453     executionBuilder->initializeOutputShapes(&outputShapes);
454     while (true) {
455         std::shared_ptr<StepExecutor> executor;
456         VLOG(EXECUTION) << "looking for next StepExecutor";
457         std::shared_ptr<ExecutionBurstController> burstController = nullptr;
458         int n = plan->next(controller, &executor, &burstController);
459         if (n != ANEURALNETWORKS_NO_ERROR) {
460             if (allowFallback) {
461                 cpuFallbackFull(executionBuilder, executionCallback);
462             } else {
463                 executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
464             }
465             return;
466         }
467         if (executor == nullptr) {
468             executionCallback->notify(ErrorStatus::NONE, outputShapes, timing);
469             return;
470         }
471 
472         sp<ExecutionCallback> stepCallback;
473         n = executor->startCompute(&stepCallback, burstController);
474         if (n != ANEURALNETWORKS_NO_ERROR) {
475             if (allowFallback) {
476                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
477                                        &outputShapes)) {
478                     // Successfully executed one step on CPU.
479                     continue;
480                 } else {
481                     // Either successfully executed entire plan on
482                     // CPU, or tried and failed to do so.
483                     return;
484                 }
485             } else {
486                 executionCallback->notify(convertResultCodeToErrorStatus(n), {}, kNoTiming);
487                 return;
488             }
489         }
490         stepCallback->wait();
491         ErrorStatus status = stepCallback->getStatus();
492         const auto& stepOutputShapes = stepCallback->getOutputShapes();
493         if (!executor->updateOutputShapes(stepOutputShapes, &outputShapes)) {
494             status = ErrorStatus::GENERAL_FAILURE;
495         }
496         if (status == ErrorStatus::NONE) {
497             // We only support collection of timing information in the case of a
498             // single step, so it's safe to just keep track of the last step's
499             // timing information.
500             timing = stepCallback->getTiming();
501         } else {
502             // OUTPUT_INSUFFICIENT_SIZE is not recoverable
503             if (allowFallback && status != ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
504                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback,
505                                        &outputShapes)) {
506                     // Successfully executed one step on CPU.
507                     continue;
508                 } else {
509                     // Either successfully executed entire plan on
510                     // CPU, or tried and failed to do so.
511                     return;
512                 }
513             } else if (status == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
514                 executionCallback->notify(status, outputShapes, kNoTiming);
515                 return;
516             } else {
517                 executionCallback->notify(status, {}, kNoTiming);
518                 return;
519             }
520         }
521     }
522 }
523 
compute(sp<ExecutionCallback> * synchronizationCallback,BurstBuilder * burstBuilder)524 int ExecutionBuilder::compute(sp<ExecutionCallback>* synchronizationCallback,
525                               BurstBuilder* burstBuilder) {
526     CHECK(synchronizationCallback == nullptr || burstBuilder == nullptr)
527             << "synchronizationCallback and burstBuilder cannot simultaneously be used";
528 
529     const bool synchronous = (synchronizationCallback == nullptr);
530 
531     if (!synchronous) {
532         *synchronizationCallback = nullptr;
533     }
534 
535     // TODO validate that we have full types for all inputs and outputs,
536     // that the graph is not cyclic,
537 
538     auto name = [synchronous, burstBuilder] {
539         return burstBuilder ? "burstCompute" : synchronous ? "compute" : "startCompute";
540     };
541     if (mStarted) {
542         LOG(ERROR) << "ANeuralNetworksExecution_" << name()
543                    << " called on an execution that has already started";
544         return ANEURALNETWORKS_BAD_STATE;
545     }
546     for (auto& p : mInputs) {
547         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
548             LOG(ERROR) << "ANeuralNetworksExecution_" << name() << " not all inputs specified";
549             return ANEURALNETWORKS_BAD_DATA;
550         }
551     }
552     for (auto& p : mOutputs) {
553         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
554             LOG(ERROR) << "ANeuralNetworksExecution_" << name() << " not all outputs specified";
555             return ANEURALNETWORKS_BAD_DATA;
556         }
557     }
558 
559     auto wrappedFinish = [this](ErrorStatus error, const std::vector<OutputShape>& outputShapes) {
560         return finish(error, outputShapes);
561     };
562 
563     // TODO: For asynchronous execution, entire plan-based-path should run in an
564     // asynchronous thread -- take the asynchronous thread logic out of
565     // startComputeOnCpu() and use it to wrap the plan-based-path.
566     mStarted = true;
567     const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
568     std::shared_ptr<ExecutionPlan::Controller> controller =
569             mPlan->makeController(this, burstBuilder);
570     if (synchronous) {
571         VLOG(EXECUTION) << "ExecutionBuilder::compute (synchronous API)";
572         sp<ExecutionCallback> localSynchronizationCallback = new ExecutionCallback();
573         localSynchronizationCallback->setOnFinish(wrappedFinish);
574         asyncStartComputePartitioned(this, mPlan, controller, allowFallback,
575                                      localSynchronizationCallback);
576         localSynchronizationCallback->wait();
577         if (mMeasureTiming) {
578             mTiming = localSynchronizationCallback->getTiming();
579         }
580         return convertErrorStatusToResultCode(localSynchronizationCallback->getStatus());
581     } else /* asynchronous */ {
582         // TODO: use a thread pool
583 
584         // Prepare the callback for asynchronous execution.
585         // sp<ExecutionCallback> object is returned when the
586         // execution has been successfully launched, otherwise a
587         // nullptr is returned.  The executionCallback is
588         // abstracted in the NN API as an "event".
589         sp<ExecutionCallback> executionCallback = new ExecutionCallback();
590         executionCallback->setOnFinish(wrappedFinish);
591         if (DeviceManager::get()->syncExecRuntime()) {
592             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API, non-threaded)";
593             asyncStartComputePartitioned(this, mPlan, controller, allowFallback, executionCallback);
594         } else {
595             VLOG(EXECUTION) << "ExecutionBuilder::compute (asynchronous API)";
596             std::thread thread(asyncStartComputePartitioned, this, mPlan, controller, allowFallback,
597                                executionCallback);
598             executionCallback->bindThread(std::move(thread));
599         }
600         *synchronizationCallback = executionCallback;
601         return ANEURALNETWORKS_NO_ERROR;
602     }
603 }
604 
initializeOutputShapes(std::vector<OutputShape> * outputShapes) const605 void ExecutionBuilder::initializeOutputShapes(std::vector<OutputShape>* outputShapes) const {
606     outputShapes->resize(mOutputs.size());
607     for (uint32_t i = 0; i < mOutputs.size(); i++) {
608         (*outputShapes)[i].dimensions = mOutputs[i].dimensions;
609         (*outputShapes)[i].isSufficient = true;
610     }
611 }
612 
613 // Check if the dimensions "to" is updatable by dimensions "from", where "from" must
614 // have a higher specification level.
isUpdatable(const std::vector<uint32_t> & to,const std::vector<uint32_t> & from)615 static bool isUpdatable(const std::vector<uint32_t>& to, const std::vector<uint32_t>& from) {
616     if (to.size() == 0) return true;
617     NN_RET_CHECK_EQ(to.size(), from.size());
618     for (uint32_t i = 0; i < to.size(); i++) {
619         NN_RET_CHECK(to[i] == from[i] || to[i] == 0);
620     }
621     return true;
622 }
623 
updateOutputShapes(const std::vector<OutputShape> & outputShapes)624 bool ExecutionBuilder::updateOutputShapes(const std::vector<OutputShape>& outputShapes) {
625     if (outputShapes.size() == 0) {
626         return true;
627     }
628     NN_RET_CHECK_EQ(outputShapes.size(), mOutputs.size());
629     for (uint32_t i = 0; i < outputShapes.size(); i++) {
630         // Check if only unspecified dimensions or rank are overwritten.
631         NN_RET_CHECK(isUpdatable(mOutputs[i].dimensions, outputShapes[i].dimensions));
632     }
633     for (uint32_t i = 0; i < outputShapes.size(); i++) {
634         mOutputs[i].dimensions = outputShapes[i].dimensions;
635         mOutputs[i].isSufficient = outputShapes[i].isSufficient;
636     }
637     return true;
638 }
639 
finish(ErrorStatus,const std::vector<OutputShape> & outputShapes)640 ErrorStatus ExecutionBuilder::finish(ErrorStatus, const std::vector<OutputShape>& outputShapes) {
641     CHECK(!mFinished) << "ExecutionBuilder::finish is called twice";
642     mFinished = true;
643     if (!updateOutputShapes(outputShapes)) {
644         return ErrorStatus::GENERAL_FAILURE;
645     }
646     return ErrorStatus::NONE;
647 }
648 
updateOutputShapes(const std::vector<OutputShape> & from,std::vector<OutputShape> * to)649 bool StepExecutor::updateOutputShapes(const std::vector<OutputShape>& from,
650                                       std::vector<OutputShape>* to) {
651     if (from.size() == 0) {
652         return true;
653     }
654     if (mExecutionStep != nullptr) {
655         const auto& indexMapping = mExecutionStep->getOutputIndexSubModelToFromModel();
656         NN_RET_CHECK_LE(indexMapping.size(), from.size());
657         for (uint32_t i = 0, e = indexMapping.size(); i < e; i++) {
658             uint32_t toIndex = indexMapping[i];
659             NN_RET_CHECK_GT(to->size(), toIndex);
660             NN_RET_CHECK(isUpdatable(to->at(toIndex).dimensions, from[i].dimensions));
661             (*to)[toIndex] = from[i];
662         }
663     } else {
664         NN_RET_CHECK_EQ(from.size(), to->size());
665         for (uint32_t i = 0, e = from.size(); i < e; i++) {
666             NN_RET_CHECK(isUpdatable(to->at(i).dimensions, from[i].dimensions));
667             (*to)[i] = from[i];
668         }
669     }
670     return true;
671 }
672 
673 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
674 // it does not copy data.  Aligns each input a bit.
allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo> * args,Memory * memory)675 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
676                                                  Memory* memory) {
677     uint32_t nextPoolIndex = mMemories.size();
678     int64_t total = 0;
679     for (auto& info : *args) {
680         if (info.state == ModelArgumentInfo::POINTER) {
681             DataLocation& loc = info.locationAndLength;
682             // TODO Good enough alignment?
683             total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
684             loc.poolIndex = nextPoolIndex;
685             loc.offset = static_cast<uint32_t>(total);
686             total += loc.length;
687         }
688     };
689     if (total > 0xFFFFFFFF) {
690         LOG(ERROR) << "StepExecutor::allocatePointerArgumentsToPool: ANeuralNetworksExecution: "
691                       "Size of all inputs or outputs exceeds 2^32.";
692         return ANEURALNETWORKS_BAD_DATA;
693     }
694     hidl_memory hidlMemory;
695     if (total > 0) {
696         memory->create(total);  // TODO check error
697         mMemories.add(memory);
698     }
699     return ANEURALNETWORKS_NO_ERROR;
700 }
701 
setRequestArgumentArray(const std::vector<ModelArgumentInfo> & argumentInfos,hidl_vec<RequestArgument> * ioInfos)702 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
703                                     hidl_vec<RequestArgument>* ioInfos) {
704     size_t count = argumentInfos.size();
705     ioInfos->resize(count);
706     for (size_t i = 0; i < count; i++) {
707         const auto& info = argumentInfos[i];
708         (*ioInfos)[i] = {
709                 .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
710                 .location = info.locationAndLength,
711                 .dimensions = info.dimensions,
712         };
713     }
714 }
715 
StepExecutor(ExecutionBuilder * executionBuilder,const ModelBuilder * model,std::shared_ptr<Device> device,std::shared_ptr<VersionedIPreparedModel> preparedModel)716 StepExecutor::StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model,
717                            std::shared_ptr<Device> device,
718                            std::shared_ptr<VersionedIPreparedModel> preparedModel)
719     : mExecutionBuilder(executionBuilder),
720       mModel(model),
721       mDevice(device),
722       mPreparedModel(preparedModel),
723       mInputs(model->inputCount()),
724       mOutputs(model->outputCount()) {
725     CHECK(mDevice != nullptr);
726 }
727 
mapInputsAndOutputsTrivially()728 void StepExecutor::mapInputsAndOutputsTrivially() {
729     mInputs = mExecutionBuilder->mInputs;
730     mOutputs = mExecutionBuilder->mOutputs;
731     mMemories = mExecutionBuilder->mMemories;
732 }
733 
mapInputOrOutput(const ModelArgumentInfo & builderInputOrOutput,ModelArgumentInfo * executorInputOrOutput)734 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
735                                     ModelArgumentInfo* executorInputOrOutput) {
736     *executorInputOrOutput = builderInputOrOutput;
737     switch (executorInputOrOutput->state) {
738         default:
739             nnAssert(!"unexpected ModelArgumentInfo::state");
740             break;
741         case ModelArgumentInfo::HAS_NO_VALUE:
742         case ModelArgumentInfo::POINTER:
743         case ModelArgumentInfo::UNSPECIFIED:
744             break;
745         case ModelArgumentInfo::MEMORY: {
746             const uint32_t builderPoolIndex = builderInputOrOutput.locationAndLength.poolIndex;
747             const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
748             const uint32_t executorPoolIndex = mMemories.add(memory);
749             executorInputOrOutput->locationAndLength.poolIndex = executorPoolIndex;
750             break;
751         }
752     }
753 }
754 
setInputOrOutputFromTemporaryMemory(const Operand & inputOrOutputOperand,const Memory * memory,uint32_t offset,ModelArgumentInfo * inputOrOutputInfo)755 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
756                                                       const Memory* memory, uint32_t offset,
757                                                       ModelArgumentInfo* inputOrOutputInfo) {
758     // Should be similar to
759     //     ExecutionBuilder::setInputFromMemory()
760     //     ExecutionBuilder::setOutputFromMemory()
761 
762     uint32_t poolIndex = mMemories.add(memory);
763     uint32_t length = TypeManager::get()->getSizeOfData(inputOrOutputOperand);
764     return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset,
765                                                      length);
766 }
767 
logArguments(const char * kind,const std::vector<ModelArgumentInfo> & args)768 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo>& args) {
769     for (unsigned i = 0; i < args.size(); i++) {
770         const auto& arg = args[i];
771         std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
772         switch (arg.state) {
773             case ModelArgumentInfo::POINTER:
774                 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")";
775                 break;
776             case ModelArgumentInfo::MEMORY:
777                 VLOG(EXECUTION) << prefix << "MEMORY("
778                                 << "pool=" << arg.locationAndLength.poolIndex << ", "
779                                 << "off=" << arg.locationAndLength.offset << ")";
780                 break;
781             case ModelArgumentInfo::HAS_NO_VALUE:
782                 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
783                 break;
784             case ModelArgumentInfo::UNSPECIFIED:
785                 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
786                 break;
787             default:
788                 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
789                 break;
790         }
791     }
792 }
793 
isCpu() const794 bool StepExecutor::isCpu() const {
795     return mDevice->getInterface() == nullptr;
796 }
797 
startCompute(sp<ExecutionCallback> * synchronizationCallback,const std::shared_ptr<ExecutionBurstController> & burstController)798 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback,
799                                const std::shared_ptr<ExecutionBurstController>& burstController) {
800     if (VLOG_IS_ON(EXECUTION)) {
801         logArguments("input", mInputs);
802         logArguments("output", mOutputs);
803     }
804     if (isCpu()) {
805         return startComputeOnCpu(synchronizationCallback);
806     } else {
807         return startComputeOnDevice(synchronizationCallback, burstController);
808     }
809 }
810 
startComputeOnDevice(sp<ExecutionCallback> * synchronizationCallback,const std::shared_ptr<ExecutionBurstController> & burstController)811 int StepExecutor::startComputeOnDevice(
812         sp<ExecutionCallback>* synchronizationCallback,
813         const std::shared_ptr<ExecutionBurstController>& burstController) {
814     CHECK(!isCpu());
815 
816     // Initialize timing information in case we take an error path to exit.
817     mExecutionBuilder->reportTiming(kNoTiming);
818 
819     *synchronizationCallback = nullptr;
820 
821     // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
822     // ExecutionPlan with the compilation and execution phases of the NN API
823     if (mPreparedModel == nullptr) {
824         Model model;
825         mModel->setHidlModel(&model);
826 
827         // TODO(butlermichael): Propagate user preference to this point instead of
828         // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or
829         // remove this entire block of code since it is a stale path that is only
830         // encountered on an #if-removed code.
831         ExecutionPreference preference =
832                 static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
833 
834         ErrorStatus status = ErrorStatus::GENERAL_FAILURE;
835         std::tie(status, mPreparedModel) =
836                 mDevice->getInterface()->prepareModel(model, preference, {}, {}, {});
837         if (status != ErrorStatus::NONE) {
838             return convertErrorStatusToResultCode(status);
839         }
840         if (mPreparedModel == nullptr) {
841             return ANEURALNETWORKS_OP_FAILED;
842         }
843     }
844 
845     NNTRACE_RT(NNTRACE_PHASE_INPUTS_AND_OUTPUTS, "StepExecutor::startComputeOnDevice");
846     // We separate the input & output pools so that we reduce the copying done if we
847     // do an eventual remoting (hidl_memory->update()).  We could also use it to set
848     // protection on read only memory but that's not currently done.
849     Memory inputPointerArguments;
850     Memory outputPointerArguments;
851 
852     // Layout the input and output data
853     int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
854     if (n != ANEURALNETWORKS_NO_ERROR) {
855         return n;
856     }
857     n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
858     if (n != ANEURALNETWORKS_NO_ERROR) {
859         return n;
860     }
861 
862     // Copy the input data that was specified via a pointer.
863     // inputPointerArguments.update();
864     for (auto& info : mInputs) {
865         if (info.state == ModelArgumentInfo::POINTER) {
866             DataLocation& loc = info.locationAndLength;
867             uint8_t* data = nullptr;
868             int n = inputPointerArguments.getPointer(&data);
869             if (n != ANEURALNETWORKS_NO_ERROR) {
870                 return n;
871             }
872             memcpy(data + loc.offset, info.buffer, loc.length);
873         }
874     }
875     // TODO: Add inputPointerArguments.commit() and .update() at all the right places
876 
877     Request request;
878     setRequestArgumentArray(mInputs, &request.inputs);
879     setRequestArgumentArray(mOutputs, &request.outputs);
880     uint32_t count = mMemories.size();
881     request.pools.resize(count);
882     for (uint32_t i = 0; i < count; i++) {
883         request.pools[i] = mMemories[i]->getHidlMemory();
884     }
885 
886     NNTRACE_FULL_SWITCH(NNTRACE_LAYER_IPC, NNTRACE_PHASE_EXECUTION,
887                         "StepExecutor::startComputeOnDevice::execute");
888 
889     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
890     // object is returned when the execution has been successfully launched,
891     // otherwise a nullptr is returned. The executionCallback is abstracted in
892     // the NN API as an "event".
893     //
894     // The sp is used for ref-counting purposes. Without it, the HIDL service
895     // could attempt to communicate with a dead callback object.
896     //
897     // TODO: Explain the "dead callback" problem further, either here or
898     // in the design document.
899     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
900 
901     // compute using burst if present
902     const bool burstCompute = (burstController != nullptr);
903     bool burstFallback = false;
904     if (burstCompute) {
905         std::vector<intptr_t> memoryIds;
906         memoryIds.reserve(mMemories.size());
907         for (const Memory* memory : mMemories) {
908             memory->usedBy(burstController);
909             memoryIds.push_back(memory->getKey());
910         }
911 
912         VLOG(EXECUTION) << "Before ExecutionBurstController->tryCompute() "
913                         << SHOW_IF_DEBUG(toString(request));
914         auto [status, outputShapes, timing, fallback] =
915                 burstController->tryCompute(request, measureTiming(mExecutionBuilder), memoryIds);
916 
917         burstFallback = fallback;
918         if (!fallback) {
919             executionCallback->notify(status, outputShapes, timing);
920         }
921     }
922 
923     // compute from IPreparedModel if either:
924     // (1) burst was not supplied, or
925     // (2) the burst execution failed and requested a fallback execution
926     if (!burstCompute || burstFallback) {
927         if (DeviceManager::get()->syncExecHal()) {
928             VLOG(EXECUTION) << "Before mPreparedModel->executeSynchronously() "
929                             << SHOW_IF_DEBUG(toString(request));
930             auto syncExecuteResult =
931                     mPreparedModel->executeSynchronously(request, measureTiming(mExecutionBuilder));
932             executionCallback->notify(std::get<0>(syncExecuteResult),
933                                       std::get<1>(syncExecuteResult),
934                                       std::get<2>(syncExecuteResult));
935         } else {
936             VLOG(EXECUTION) << "Before mPreparedModel->execute() "
937                             << SHOW_IF_DEBUG(toString(request));
938             // Execute.
939             // TODO: What happens to the Callback if the service dies abnormally
940             // -- won't that keep the Callback live forever, because the service
941             // never has the opportunity to bump the reference count down? Or
942             // maybe the HIDL infrastructure handles this magically? At worst,
943             // it seems like this is a small memory leak, if the Callback stays
944             // alive forever.
945             Return<ErrorStatus> executeStatus = mPreparedModel->execute(
946                     request, measureTiming(mExecutionBuilder), executionCallback);
947             if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
948                 VLOG(EXECUTION) << "**Execute launch failed**";
949                 return executeStatus.isOk() ? convertErrorStatusToResultCode(executeStatus)
950                                             : ANEURALNETWORKS_OP_FAILED;
951             }
952         }
953     }
954 
955     // TODO: Remove this synchronization point when the block of code below is
956     // removed.
957     executionCallback->wait();
958     NNTRACE_FULL_SWITCH(NNTRACE_LAYER_RUNTIME, NNTRACE_PHASE_EXECUTION,
959                         "StepExecutor::startComputeOnDevice::waited");
960     Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
961     if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
962         VLOG(EXECUTION) << "**Execution failed**";
963         if (callbackStatus == ErrorStatus::OUTPUT_INSUFFICIENT_SIZE) {
964             *synchronizationCallback = executionCallback;
965             return ANEURALNETWORKS_NO_ERROR;
966         }
967         return callbackStatus.isOk() ? convertErrorStatusToResultCode(callbackStatus)
968                                      : ANEURALNETWORKS_OP_FAILED;
969     }
970 
971     mExecutionBuilder->reportTiming(executionCallback->getTiming());
972 
973     // Copy the output data from shared memory to the output buffers.
974     // TODO: Move this block of code somewhere else. It should not be in the
975     // startCompute function.
976     // TODO: outputMemory->update(); outputMemory->commit()
977     NNTRACE_RT_SWITCH(NNTRACE_PHASE_RESULTS, "StepExecutor::startComputeOnDevice");
978     for (auto& info : mOutputs) {
979         if (info.state == ModelArgumentInfo::POINTER) {
980             DataLocation& loc = info.locationAndLength;
981             uint8_t* data = nullptr;
982             int n = outputPointerArguments.getPointer(&data);
983             if (n != ANEURALNETWORKS_NO_ERROR) {
984                 return n;
985             }
986             memcpy(info.buffer, data + loc.offset, loc.length);
987         }
988     }
989     VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
990 
991     *synchronizationCallback = executionCallback;
992     return ANEURALNETWORKS_NO_ERROR;
993 }
994 
computeOnCpu(const Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos,const sp<IExecutionCallback> & executionCallback)995 static void computeOnCpu(const Model& model, const Request& request,
996                          const std::vector<RunTimePoolInfo>& modelPoolInfos,
997                          const std::vector<RunTimePoolInfo>& requestPoolInfos,
998                          const sp<IExecutionCallback>& executionCallback) {
999     NNTRACE_RT(NNTRACE_PHASE_EXECUTION, "computeOnCpu");
1000     CpuExecutor executor;
1001     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
1002     const auto& outputShapes = executor.getOutputShapes();
1003     executionCallback->notify_1_2(convertResultCodeToErrorStatus(err), outputShapes, kNoTiming);
1004 }
1005 
startComputeOnCpu(sp<ExecutionCallback> * synchronizationCallback)1006 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
1007     // TODO: use a thread pool
1008     // TODO(mikie): this could have NNTRACE so we could measure the overhead of
1009     //              spinning up a new thread.
1010 
1011     Model model;
1012     mModel->setHidlModel(&model);
1013 
1014     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
1015     // object is returned when the execution has been successfully launched,
1016     // otherwise a nullptr is returned. The executionCallback is abstracted in
1017     // the NN API as an "event".
1018     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
1019     *synchronizationCallback = nullptr;
1020 
1021     std::vector<RunTimePoolInfo> modelPoolInfos;
1022     if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
1023         return ANEURALNETWORKS_UNMAPPABLE;
1024     }
1025 
1026     std::vector<RunTimePoolInfo> requestPoolInfos;
1027     requestPoolInfos.reserve(mMemories.size());
1028     for (const Memory* mem : mMemories) {
1029         if (std::optional<RunTimePoolInfo> poolInfo =
1030                     RunTimePoolInfo::createFromHidlMemory(mem->getHidlMemory())) {
1031             requestPoolInfos.emplace_back(*poolInfo);
1032         } else {
1033             return ANEURALNETWORKS_UNMAPPABLE;
1034         }
1035     }
1036     // Create as many pools as there are input / output.
1037     auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
1038         for (ModelArgumentInfo& argumentInfo : argumentInfos) {
1039             if (argumentInfo.state == ModelArgumentInfo::POINTER) {
1040                 argumentInfo.locationAndLength.poolIndex =
1041                         static_cast<uint32_t>(requestPoolInfos.size());
1042                 argumentInfo.locationAndLength.offset = 0;
1043                 requestPoolInfos.emplace_back(RunTimePoolInfo::createFromExistingBuffer(
1044                         static_cast<uint8_t*>(argumentInfo.buffer)));
1045             }
1046         }
1047     };
1048     fixPointerArguments(mInputs);
1049     fixPointerArguments(mOutputs);
1050 
1051     Request request;
1052     setRequestArgumentArray(mInputs, &request.inputs);
1053     setRequestArgumentArray(mOutputs, &request.outputs);
1054 
1055     if (DeviceManager::get()->syncExecCpu()) {
1056         computeOnCpu(model, request, modelPoolInfos, requestPoolInfos, executionCallback);
1057     } else {
1058         // TODO: should model be moved with a std::cref?
1059         std::thread thread(computeOnCpu, model, std::move(request), std::move(modelPoolInfos),
1060                            std::move(requestPoolInfos), executionCallback);
1061         executionCallback->bindThread(std::move(thread));
1062     }
1063 
1064     *synchronizationCallback = executionCallback;
1065     return ANEURALNETWORKS_NO_ERROR;
1066 }
1067 
1068 }  // namespace nn
1069 }  // namespace android
1070