1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "CpuExecutor"
18 
19 #include "CpuExecutor.h"
20 
21 #include "NeuralNetworks.h"
22 #include "Operations.h"
23 
24 #include "Eigen/Core"
25 #include <omp.h>
26 #include <sys/mman.h>
27 
28 namespace android {
29 namespace nn {
30 
31 // TODO: short term, make share memory mapping and updating a utility function.
32 // TODO: long term, implement mmap_fd as a hidl IMemory service.
RunTimePoolInfo(const hidl_memory & hidlMemory,bool * fail)33 RunTimePoolInfo::RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail) {
34     sp<IMemory> memory;
35     uint8_t* buffer = nullptr;
36 
37     auto memType = hidlMemory.name();
38     if (memType == "ashmem") {
39         memory = mapMemory(hidlMemory);
40         if (memory == nullptr) {
41             LOG(ERROR) << "Can't map shared memory.";
42             if (fail) *fail = true;
43             return;
44         }
45         memory->update();
46         buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
47         if (buffer == nullptr) {
48             LOG(ERROR) << "Can't access shared memory.";
49             if (fail) *fail = true;
50             return;
51         }
52     } else if (memType == "mmap_fd") {
53         size_t size = hidlMemory.size();
54         int fd = hidlMemory.handle()->data[0];
55         int prot = hidlMemory.handle()->data[1];
56         size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
57                                         hidlMemory.handle()->data[3]);
58         buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
59         if (buffer == MAP_FAILED) {
60             LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
61             if (fail) *fail = true;
62             return;
63         }
64     } else {
65         LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type";
66         if (fail) *fail = true;
67         return;
68     }
69 
70     mHidlMemory = hidlMemory;
71     mBuffer     = buffer;
72     mMemory     = memory;
73 }
74 
RunTimePoolInfo(uint8_t * buffer)75 RunTimePoolInfo::RunTimePoolInfo(uint8_t* buffer) {
76     mBuffer = buffer;
77 }
78 
RunTimePoolInfo(RunTimePoolInfo && other)79 RunTimePoolInfo::RunTimePoolInfo(RunTimePoolInfo&& other) {
80     moveFrom(std::move(other));
81     other.mBuffer = nullptr;
82 }
83 
operator =(RunTimePoolInfo && other)84 RunTimePoolInfo& RunTimePoolInfo::operator=(RunTimePoolInfo&& other) {
85     if (this != &other) {
86         release();
87         moveFrom(std::move(other));
88         other.mBuffer = nullptr;
89     }
90     return *this;
91 }
92 
moveFrom(RunTimePoolInfo && other)93 void RunTimePoolInfo::moveFrom(RunTimePoolInfo &&other) {
94     mHidlMemory = std::move(other.mHidlMemory);
95     mBuffer     = std::move(other.mBuffer);
96     mMemory     = std::move(other.mMemory);
97 }
98 
release()99 void RunTimePoolInfo::release() {
100     if (mBuffer == nullptr) {
101         return;
102     }
103 
104     auto memType = mHidlMemory.name();
105     if (memType == "ashmem") {
106         // nothing to do
107     } else if (memType == "mmap_fd") {
108         size_t size = mHidlMemory.size();
109         if (munmap(mBuffer, size)) {
110             LOG(ERROR) << "RunTimePoolInfo::release(): Can't munmap";
111         }
112     } else if (memType == "") {
113         // Represents a POINTER argument; nothing to do
114     } else {
115         LOG(ERROR) << "RunTimePoolInfo::release(): unsupported hidl_memory type";
116     }
117 
118     mHidlMemory = hidl_memory();
119     mMemory     = nullptr;
120     mBuffer     = nullptr;
121 }
122 
123 // Making sure the output data are correctly updated after execution.
update() const124 bool RunTimePoolInfo::update() const {
125     auto memType = mHidlMemory.name();
126     if (memType == "ashmem") {
127         mMemory->commit();
128         return true;
129     } else if (memType == "mmap_fd") {
130         int prot = mHidlMemory.handle()->data[1];
131         if (prot & PROT_WRITE) {
132             size_t size = mHidlMemory.size();
133             return msync(mBuffer, size, MS_SYNC) == 0;
134         }
135     }
136     // No-op for other types of memory.
137     return true;
138 }
139 
setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo> * poolInfos,const hidl_vec<hidl_memory> & pools)140 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
141                                          const hidl_vec<hidl_memory>& pools) {
142     poolInfos->clear();
143     poolInfos->reserve(pools.size());
144     bool fail = false;
145     for (const auto& pool : pools) {
146         poolInfos->emplace_back(pool, &fail);
147     }
148     if (fail) {
149         LOG(ERROR) << "Could not map pools";
150         poolInfos->clear();
151         return false;
152     }
153     return true;
154 }
155 
156 // Updates the RunTimeOperandInfo with the newly calculated shape.
157 // Allocate the buffer if we need to.
setInfoAndAllocateIfNeeded(RunTimeOperandInfo * info,const Shape & shape)158 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
159     // For user-provided model output operands, the parameters must match the Shape
160     // calculated from the preparation step.
161     if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
162         if (info->type != shape.type ||
163             info->dimensions != shape.dimensions) {
164             LOG(ERROR) << "Invalid type or dimensions for model output";
165             return false;
166         }
167         if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
168             (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
169             LOG(ERROR) << "Invalid scale or zeroPoint for model output";
170             return false;
171         }
172     }
173     info->type = shape.type;
174     info->dimensions = shape.dimensions;
175     info->scale = shape.scale;
176     info->zeroPoint = shape.offset;
177     if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
178         uint32_t length = sizeOfData(info->type, info->dimensions);
179         info->buffer = new uint8_t[length];
180         if (info->buffer == nullptr) {
181             return false;
182         }
183     }
184     return true;
185 }
186 
187 // Ignore the .pools entry in model and request.  This will have been taken care of
188 // by the caller.
run(const V1_0::Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)189 int CpuExecutor::run(const V1_0::Model& model, const Request& request,
190                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
191                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
192     return run(convertToV1_1(model), request, modelPoolInfos, requestPoolInfos);
193 }
194 
run(const V1_1::Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)195 int CpuExecutor::run(const V1_1::Model& model, const Request& request,
196                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
197                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
198     VLOG(CPUEXE) << "CpuExecutor::run() with request("
199                  << SHOW_IF_DEBUG(toString(request)) << ")";
200 
201     ScopedOpenmpSettings openMpSettings;
202 
203     mModel = &model;
204     mRequest = &request; // TODO check if mRequest is needed
205     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
206     // The model has serialized the operation in execution order.
207     for (const auto& operation : model.operations) {
208         int n = executeOperation(operation);
209         if (n != ANEURALNETWORKS_NO_ERROR) {
210             return n;
211         }
212     }
213     for (auto& runtimeInfo : modelPoolInfos) {
214         runtimeInfo.update();
215     }
216     for (auto& runtimeInfo : requestPoolInfos) {
217         runtimeInfo.update();
218     }
219     mModel = nullptr;
220     mRequest = nullptr;
221     VLOG(CPUEXE) << "Completed run normally";
222     return ANEURALNETWORKS_NO_ERROR;
223 }
224 
initializeRunTimeInfo(const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)225 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
226                                         const std::vector<RunTimePoolInfo>& requestPoolInfos) {
227     VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
228     const size_t count = mModel->operands.size();
229     mOperands.resize(count);
230 
231     // Start by setting the runtime info to what's in the model.
232     for (size_t i = 0; i < count; i++) {
233         const Operand& from = mModel->operands[i];
234         RunTimeOperandInfo& to = mOperands[i];
235         to.type = from.type;
236         to.dimensions = from.dimensions;
237         to.scale = from.scale;
238         to.zeroPoint = from.zeroPoint;
239         to.length = from.location.length;
240         to.lifetime = from.lifetime;
241         switch (from.lifetime) {
242             case OperandLifeTime::TEMPORARY_VARIABLE:
243                 to.buffer = nullptr;
244                 to.numberOfUsesLeft = from.numberOfConsumers;
245                 break;
246             case OperandLifeTime::CONSTANT_COPY:
247                 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
248                 to.numberOfUsesLeft = 0;
249                 break;
250             case OperandLifeTime::CONSTANT_REFERENCE: {
251                 auto poolIndex = from.location.poolIndex;
252                 nnAssert(poolIndex < modelPoolInfos.size());
253                 auto& r = modelPoolInfos[poolIndex];
254                 to.buffer = r.getBuffer() + from.location.offset;
255                 to.numberOfUsesLeft = 0;
256                 break;
257             }
258             case OperandLifeTime::MODEL_INPUT:
259             case OperandLifeTime::MODEL_OUTPUT:
260             case OperandLifeTime::NO_VALUE:
261                 to.buffer = nullptr;
262                 to.numberOfUsesLeft = 0;
263                 break;
264             default:
265                 nnAssert(false);
266                 break;
267         }
268     }
269 
270     // Adjust the runtime info for the arguments passed to the model,
271     // modifying the buffer location, and possibly the dimensions.
272     auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
273                                   const hidl_vec<RequestArgument>& arguments) {
274         nnAssert(indexes.size() == arguments.size());
275         for (size_t i = 0; i < indexes.size(); i++) {
276             const uint32_t operandIndex = indexes[i];
277             const RequestArgument& from = arguments[i];
278             RunTimeOperandInfo& to = mOperands[operandIndex];
279             if (from.dimensions.size() > 0) {
280                 // It's the responsibility of the caller to validate that
281                 // from.dimensions only modifies the dimensions that were
282                 // unspecified in the model.  That's the case in SampleDriver.cpp
283                 // with the call to validateRequest().
284                 // TODO make sure that's the case for the default CPU path.
285                 to.dimensions = from.dimensions;
286             }
287             if (from.hasNoValue) {
288                 to.lifetime = OperandLifeTime::NO_VALUE;
289                 nnAssert(to.buffer == nullptr);
290             } else {
291                 auto poolIndex = from.location.poolIndex;
292                 nnAssert(poolIndex < requestPoolInfos.size());
293                 auto& r = requestPoolInfos[poolIndex];
294                 to.buffer = r.getBuffer() + from.location.offset;
295             }
296         }
297     };
298     updateForArguments(mModel->inputIndexes, mRequest->inputs);
299     updateForArguments(mModel->outputIndexes, mRequest->outputs);
300 
301     return true;
302 }
303 
freeNoLongerUsedOperands(const std::vector<uint32_t> & inputs)304 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
305     for (uint32_t i : inputs) {
306         auto& info = mOperands[i];
307         // Check if it's a static or model input/output.
308         if (info.numberOfUsesLeft == 0) {
309             continue;
310         }
311         info.numberOfUsesLeft--;
312         if (info.numberOfUsesLeft == 0) {
313             nnAssert(info.buffer != nullptr);
314             delete[] info.buffer;
315             info.buffer = nullptr;
316         }
317     }
318 }
319 
executeOperation(const Operation & operation)320 int CpuExecutor::executeOperation(const Operation& operation) {
321     // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
322     const hidl_vec<uint32_t>& ins = operation.inputs;
323     const hidl_vec<uint32_t>& outs = operation.outputs;
324     bool success = false;
325 
326     // Function to verify that the number of input and output parameters
327     // matches what is expected.  Also checks that all the parameters have
328     // values. This function is to be used only for operations that do not
329     // accept optional arguments.
330     // TODO Have a version that works for optional arguments.
331     auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
332                                                                 size_t requiredOuts) -> bool {
333         auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
334                           const char* type) -> bool {
335             size_t actualCount = indexes.size();
336             if (actualCount != requiredCount) {
337                 LOG(ERROR) << getOperationName(operation.type)
338                            << ": Invalid number of " << type << " operands. Got " << actualCount
339                            << " of " << requiredCount;
340                 return false;
341             }
342             for (size_t i = 0; i < actualCount; i++) {
343                 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
344                     LOG(ERROR) << getOperationName(operation.type) << " " << type
345                                << " operand " << i << " is required but missing.";
346                     return false;
347                 }
348             }
349             return true;
350         };
351         return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
352     };
353 
354     switch (operation.type) {
355         case OperationType::OEM_OPERATION: {
356             LOG(ERROR) << "OEM operation not supported for CPU execution";
357             success = false;
358         } break;
359         case OperationType::ADD: {
360             if (!allParametersPresent(3, 1)) {
361                 return ANEURALNETWORKS_BAD_DATA;
362             }
363             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
364             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
365             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
366 
367             RunTimeOperandInfo& out = mOperands[outs[0]];
368             Shape outShape = out.shape();
369 
370             if (in1.type == OperandType::TENSOR_FLOAT32) {
371                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
372                           setInfoAndAllocateIfNeeded(&out, outShape) &&
373                           addFloat32(reinterpret_cast<const float*>(in1.buffer),
374                                      in1.shape(),
375                                      reinterpret_cast<const float*>(in2.buffer),
376                                      in2.shape(),
377                                      activation,
378                                      reinterpret_cast<float*>(out.buffer),
379                                      outShape);
380             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
381                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
382                           setInfoAndAllocateIfNeeded(&out, outShape) &&
383                           addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
384                                     in1.shape(),
385                                     reinterpret_cast<const uint8_t*>(in2.buffer),
386                                     in2.shape(),
387                                     activation,
388                                     reinterpret_cast<uint8_t*>(out.buffer),
389                                     outShape);
390             }
391         } break;
392         case OperationType::MUL: {
393             if (!allParametersPresent(3, 1)) {
394                 return ANEURALNETWORKS_BAD_DATA;
395             }
396             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
397             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
398             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
399 
400             RunTimeOperandInfo& out = mOperands[outs[0]];
401             Shape outShape = out.shape();
402 
403             if (in1.type == OperandType::TENSOR_FLOAT32) {
404                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
405                           setInfoAndAllocateIfNeeded(&out, outShape) &&
406                           mulFloat32(reinterpret_cast<const float*>(in1.buffer),
407                                      in1.shape(),
408                                      reinterpret_cast<const float*>(in2.buffer),
409                                      in2.shape(),
410                                      activation,
411                                      reinterpret_cast<float*>(out.buffer),
412                                      outShape);
413             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
414                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
415                           setInfoAndAllocateIfNeeded(&out, outShape) &&
416                           mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
417                                     in1.shape(),
418                                     reinterpret_cast<const uint8_t*>(in2.buffer),
419                                     in2.shape(),
420                                     activation,
421                                     reinterpret_cast<uint8_t*>(out.buffer),
422                                     outShape);
423             }
424         } break;
425         case OperationType::FLOOR: {
426             if (!allParametersPresent(1, 1)) {
427                 return ANEURALNETWORKS_BAD_DATA;
428             }
429             const RunTimeOperandInfo& input = mOperands[ins[0]];
430             RunTimeOperandInfo& output = mOperands[outs[0]];
431             Shape outShape = output.shape();
432 
433             if (input.type == OperandType::TENSOR_FLOAT32) {
434                 success = floorPrepare(input.shape(), &outShape) &&
435                           setInfoAndAllocateIfNeeded(&output, outShape) &&
436                           floorFloat32(reinterpret_cast<const float*>(input.buffer),
437                                        reinterpret_cast<float*>(output.buffer),
438                                        outShape);
439             }
440         } break;
441         case OperationType::DEQUANTIZE: {
442             if (!allParametersPresent(1, 1)) {
443                 return ANEURALNETWORKS_BAD_DATA;
444             }
445             const RunTimeOperandInfo& input = mOperands[ins[0]];
446             RunTimeOperandInfo& output = mOperands[outs[0]];
447             Shape outShape = output.shape();
448 
449             if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
450                 success = dequantizePrepare(input.shape(), &outShape) &&
451                           setInfoAndAllocateIfNeeded(&output, outShape) &&
452                           dequantizeQuant8ToFloat32(
453                                   reinterpret_cast<const uint8_t*>(input.buffer),
454                                   reinterpret_cast<float*>(output.buffer),
455                                   input.shape());
456             }
457         } break;
458         case OperationType::DEPTHWISE_CONV_2D: {
459             const size_t inCount = ins.size();
460             if ((inCount != 11 && inCount != 8) ||
461                     !allParametersPresent(inCount, 1)) {
462                 return ANEURALNETWORKS_BAD_DATA;
463             }
464             const RunTimeOperandInfo& input  = mOperands[ins[0]];
465             const RunTimeOperandInfo& filter = mOperands[ins[1]];
466             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
467 
468             int32_t padding_left, padding_right;
469             int32_t padding_top, padding_bottom;
470             int32_t stride_width, stride_height;
471             int32_t depth_multiplier;
472             int32_t activation;
473 
474             if (inCount == 11) {
475                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
476                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
477                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
478                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
479                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
480                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
481                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
482                 activation       = getScalarData<int32_t>(mOperands[ins[10]]);
483             } else {
484                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
485                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
486                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
487                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
488                 activation       = getScalarData<int32_t>(mOperands[ins[7]]);
489 
490                 Shape inputShape = input.shape();
491                 Shape filterShape = filter.shape();
492                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
493                 int32_t input_height = getSizeOfDimension(inputShape, 1);
494                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
495                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
496                 calculateExplicitPadding(input_width, stride_width,
497                                          filter_width, padding_implicit,
498                                          &padding_left, &padding_right);
499                 calculateExplicitPadding(input_height, stride_height,
500                                          filter_height, padding_implicit,
501                                          &padding_top, &padding_bottom);
502             }
503 
504             RunTimeOperandInfo& output = mOperands[outs[0]];
505             Shape outShape = output.shape();
506 
507             if (input.type == OperandType::TENSOR_FLOAT32) {
508                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
509                                                padding_left, padding_right,
510                                                padding_top, padding_bottom,
511                                                stride_width, stride_height,
512                                                &outShape) &&
513                           setInfoAndAllocateIfNeeded(&output, outShape) &&
514                           depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
515                                                input.shape(),
516                                                reinterpret_cast<const float*>(filter.buffer),
517                                                filter.shape(),
518                                                reinterpret_cast<const float*>(bias.buffer),
519                                                bias.shape(),
520                                                padding_left, padding_right,
521                                                padding_top, padding_bottom,
522                                                stride_width, stride_height,
523                                                depth_multiplier, activation,
524                                                reinterpret_cast<float*>(output.buffer),
525                                                outShape);
526             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
527                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
528                                                padding_left, padding_right,
529                                                padding_top, padding_bottom,
530                                                stride_width, stride_height,
531                                                &outShape) &&
532                           setInfoAndAllocateIfNeeded(&output, outShape) &&
533                           depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
534                                               input.shape(),
535                                               reinterpret_cast<const uint8_t*>(filter.buffer),
536                                               filter.shape(),
537                                               reinterpret_cast<const int32_t*>(bias.buffer),
538                                               bias.shape(),
539                                               padding_left, padding_right,
540                                               padding_top, padding_bottom,
541                                               stride_width, stride_height,
542                                               depth_multiplier, activation,
543                                               reinterpret_cast<uint8_t*>(output.buffer),
544                                               outShape);
545             }
546 
547         } break;
548         case OperationType::CONV_2D: {
549             const size_t inCount = ins.size();
550             if ((inCount != 10 && inCount != 7) ||
551                     !allParametersPresent(inCount, 1)) {
552                 return ANEURALNETWORKS_BAD_DATA;
553             }
554             const RunTimeOperandInfo& input  = mOperands[ins[0]];
555             const RunTimeOperandInfo& filter = mOperands[ins[1]];
556             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
557 
558             int32_t padding_left, padding_right;
559             int32_t padding_top, padding_bottom;
560             int32_t stride_width, stride_height;
561             int32_t activation;
562 
563             if (inCount == 10) {
564                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
565                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
566                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
567                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
568                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
569                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
570                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
571             } else {
572                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
573                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
574                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
575                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
576 
577                 Shape inputShape = input.shape();
578                 Shape filterShape = filter.shape();
579                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
580                 int32_t input_height = getSizeOfDimension(inputShape, 1);
581                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
582                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
583                 calculateExplicitPadding(input_width, stride_width,
584                                          filter_width, padding_implicit,
585                                          &padding_left, &padding_right);
586                 calculateExplicitPadding(input_height, stride_height,
587                                          filter_height, padding_implicit,
588                                          &padding_top, &padding_bottom);
589             }
590 
591             RunTimeOperandInfo& output = mOperands[outs[0]];
592             Shape outShape = output.shape();
593 
594             if (input.type == OperandType::TENSOR_FLOAT32) {
595                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
596                                       padding_left, padding_right,
597                                       padding_top, padding_bottom,
598                                       stride_width, stride_height,
599                                       &outShape) &&
600                           setInfoAndAllocateIfNeeded(&output, outShape) &&
601                           convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
602                                       reinterpret_cast<const float*>(filter.buffer), filter.shape(),
603                                       reinterpret_cast<const float*>(bias.buffer), bias.shape(),
604                                       padding_left, padding_right,
605                                       padding_top, padding_bottom,
606                                       stride_width, stride_height, activation,
607                                       reinterpret_cast<float*>(output.buffer), outShape);
608             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
609                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
610                                       padding_left, padding_right,
611                                       padding_top, padding_bottom,
612                                       stride_width, stride_height,
613                                       &outShape) &&
614                           setInfoAndAllocateIfNeeded(&output, outShape) &&
615                           convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
616                                      input.shape(),
617                                      reinterpret_cast<const uint8_t*>(filter.buffer),
618                                      filter.shape(),
619                                      reinterpret_cast<const int32_t*>(bias.buffer),
620                                      bias.shape(),
621                                      padding_left, padding_right,
622                                      padding_top, padding_bottom,
623                                      stride_width, stride_height, activation,
624                                      reinterpret_cast<uint8_t*>(output.buffer),
625                                      outShape);
626             }
627         } break;
628         case OperationType::AVERAGE_POOL_2D: {
629             const size_t inCount = ins.size();
630             if ((inCount != 10 && inCount != 7) ||
631                     !allParametersPresent(inCount, 1)) {
632                 return ANEURALNETWORKS_BAD_DATA;
633             }
634             const RunTimeOperandInfo& input = mOperands[ins[0]];
635 
636             int32_t padding_left, padding_right;
637             int32_t padding_top, padding_bottom;
638             int32_t stride_width, stride_height;
639             int32_t filter_width, filter_height;
640             int32_t activation;
641 
642             if (inCount == 10) {
643                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
644                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
645                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
646                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
647                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
648                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
649                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
650                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
651                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
652             } else {
653                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
654                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
655                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
656                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
657                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
658                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
659 
660                 Shape inputShape = input.shape();
661                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
662                 int32_t input_height = getSizeOfDimension(inputShape, 1);
663                 calculateExplicitPadding(input_width, stride_width,
664                                          filter_width, padding_implicit,
665                                          &padding_left, &padding_right);
666                 calculateExplicitPadding(input_height, stride_height,
667                                          filter_height, padding_implicit,
668                                          &padding_top, &padding_bottom);
669             }
670 
671             RunTimeOperandInfo& output = mOperands[outs[0]];
672             Shape outShape = output.shape();
673 
674             if (input.type == OperandType::TENSOR_FLOAT32) {
675                 success = genericPoolingPrepare(input.shape(),
676                                                 padding_left, padding_right,
677                                                 padding_top, padding_bottom,
678                                                 stride_width, stride_height,
679                                                 filter_width, filter_height,
680                                                 &outShape) &&
681                           setInfoAndAllocateIfNeeded(&output, outShape) &&
682                           averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
683                                              input.shape(),
684                                              padding_left, padding_right,
685                                              padding_top, padding_bottom,
686                                              stride_width, stride_height,
687                                              filter_width, filter_height, activation,
688                                              reinterpret_cast<float*>(output.buffer),
689                                              outShape);
690             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
691                 success = genericPoolingPrepare(input.shape(),
692                                                 padding_left, padding_right,
693                                                 padding_top, padding_bottom,
694                                                 stride_width, stride_height,
695                                                 filter_width, filter_height,
696                                                 &outShape) &&
697                           setInfoAndAllocateIfNeeded(&output, outShape) &&
698                           averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
699                                             input.shape(),
700                                             padding_left, padding_right,
701                                             padding_top, padding_bottom,
702                                             stride_width, stride_height,
703                                             filter_width, filter_height, activation,
704                                             reinterpret_cast<uint8_t*>(output.buffer),
705                                             outShape);
706             }
707         } break;
708         case OperationType::L2_POOL_2D: {
709             const size_t inCount = ins.size();
710             if ((inCount != 10 && inCount != 7) ||
711                     !allParametersPresent(inCount, 1)) {
712                 return ANEURALNETWORKS_BAD_DATA;
713             }
714             const RunTimeOperandInfo& input = mOperands[ins[0]];
715 
716             int32_t padding_left, padding_right;
717             int32_t padding_top, padding_bottom;
718             int32_t stride_width, stride_height;
719             int32_t filter_width, filter_height;
720             int32_t activation;
721 
722             if (inCount == 10) {
723                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
724                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
725                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
726                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
727                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
728                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
729                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
730                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
731                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
732             } else {
733                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
734                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
735                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
736                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
737                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
738                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
739 
740                 Shape inputShape = input.shape();
741                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
742                 int32_t input_height = getSizeOfDimension(inputShape, 1);
743                 calculateExplicitPadding(input_width, stride_width,
744                                          filter_width, padding_implicit,
745                                          &padding_left, &padding_right);
746                 calculateExplicitPadding(input_height, stride_height,
747                                          filter_height, padding_implicit,
748                                          &padding_top, &padding_bottom);
749             }
750 
751             RunTimeOperandInfo& output = mOperands[outs[0]];
752             Shape outShape = output.shape();
753 
754             if (input.type == OperandType::TENSOR_FLOAT32) {
755                 success = genericPoolingPrepare(input.shape(),
756                                                 padding_left, padding_right,
757                                                 padding_top, padding_bottom,
758                                                 stride_width, stride_height,
759                                                 filter_width, filter_height,
760                                                 &outShape) &&
761                           setInfoAndAllocateIfNeeded(&output, outShape) &&
762                           l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
763                                         input.shape(),
764                                         padding_left, padding_right,
765                                         padding_top, padding_bottom,
766                                         stride_width, stride_height,
767                                         filter_width, filter_height, activation,
768                                         reinterpret_cast<float*>(output.buffer),
769                                         outShape);
770             }
771         } break;
772         case OperationType::MAX_POOL_2D: {
773             const size_t inCount = ins.size();
774             if ((inCount != 10 && inCount != 7) ||
775                     !allParametersPresent(inCount, 1)) {
776                 return ANEURALNETWORKS_BAD_DATA;
777             }
778             const RunTimeOperandInfo& input = mOperands[ins[0]];
779 
780             int32_t padding_left, padding_right;
781             int32_t padding_top, padding_bottom;
782             int32_t stride_width, stride_height;
783             int32_t filter_width, filter_height;
784             int32_t activation;
785 
786             if (inCount == 10) {
787                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
788                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
789                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
790                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
791                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
792                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
793                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
794                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
795                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
796             } else {
797                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
798                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
799                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
800                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
801                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
802                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
803 
804                 Shape inputShape = input.shape();
805                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
806                 int32_t input_height = getSizeOfDimension(inputShape, 1);
807                 calculateExplicitPadding(input_width, stride_width,
808                                          filter_width, padding_implicit,
809                                          &padding_left, &padding_right);
810                 calculateExplicitPadding(input_height, stride_height,
811                                          filter_height, padding_implicit,
812                                          &padding_top, &padding_bottom);
813             }
814 
815             RunTimeOperandInfo& output = mOperands[outs[0]];
816             Shape outShape = output.shape();
817 
818             if (input.type == OperandType::TENSOR_FLOAT32) {
819                 success = genericPoolingPrepare(input.shape(),
820                                                 padding_left, padding_right,
821                                                 padding_top, padding_bottom,
822                                                 stride_width, stride_height,
823                                                 filter_width, filter_height,
824                                                 &outShape) &&
825                           setInfoAndAllocateIfNeeded(&output, outShape) &&
826                           maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
827                                          input.shape(),
828                                          padding_left, padding_right,
829                                          padding_top, padding_bottom,
830                                          stride_width, stride_height,
831                                          filter_width, filter_height, activation,
832                                          reinterpret_cast<float*>(output.buffer),
833                                          outShape);
834             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
835                 success = genericPoolingPrepare(input.shape(),
836                                                 padding_left, padding_right,
837                                                 padding_top, padding_bottom,
838                                                 stride_width, stride_height,
839                                                 filter_width, filter_height,
840                                                 &outShape) &&
841                           setInfoAndAllocateIfNeeded(&output, outShape) &&
842                           maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
843                                         input.shape(),
844                                         padding_left, padding_right,
845                                         padding_top, padding_bottom,
846                                         stride_width, stride_height,
847                                         filter_width, filter_height, activation,
848                                         reinterpret_cast<uint8_t*>(output.buffer),
849                                         outShape);
850             }
851 
852         } break;
853         case OperationType::RELU: {
854             if (!allParametersPresent(1, 1)) {
855                 return ANEURALNETWORKS_BAD_DATA;
856             }
857             const RunTimeOperandInfo& input = mOperands[ins[0]];
858             RunTimeOperandInfo& output = mOperands[outs[0]];
859             Shape outShape = output.shape();
860 
861             if (input.type == OperandType::TENSOR_FLOAT32) {
862                 success = genericActivationPrepare(input.shape(), &outShape) &&
863                           setInfoAndAllocateIfNeeded(&output, outShape) &&
864                           reluFloat32(reinterpret_cast<const float*>(input.buffer),
865                                       input.shape(),
866                                       reinterpret_cast<float*>(output.buffer),
867                                       outShape);
868             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
869                 success = genericActivationPrepare(input.shape(), &outShape) &&
870                           setInfoAndAllocateIfNeeded(&output, outShape) &&
871                           reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
872                                      input.shape(),
873                                      reinterpret_cast<uint8_t*>(output.buffer),
874                                      outShape);
875             }
876         } break;
877         case OperationType::RELU1: {
878             if (!allParametersPresent(1, 1)) {
879                 return ANEURALNETWORKS_BAD_DATA;
880             }
881             const RunTimeOperandInfo& input = mOperands[ins[0]];
882             RunTimeOperandInfo& output = mOperands[outs[0]];
883             Shape outShape = output.shape();
884 
885             if (input.type == OperandType::TENSOR_FLOAT32) {
886                 success = genericActivationPrepare(input.shape(), &outShape) &&
887                           setInfoAndAllocateIfNeeded(&output, outShape) &&
888                           relu1Float32(reinterpret_cast<const float*>(input.buffer),
889                                        input.shape(),
890                                        reinterpret_cast<float*>(output.buffer),
891                                        outShape);
892             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
893                 success = genericActivationPrepare(input.shape(), &outShape) &&
894                           setInfoAndAllocateIfNeeded(&output, outShape) &&
895                           relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
896                                       input.shape(),
897                                       reinterpret_cast<uint8_t*>(output.buffer),
898                                       outShape);
899             }
900         } break;
901         case OperationType::RELU6: {
902             if (!allParametersPresent(1, 1)) {
903                 return ANEURALNETWORKS_BAD_DATA;
904             }
905             const RunTimeOperandInfo& input = mOperands[ins[0]];
906             RunTimeOperandInfo& output = mOperands[outs[0]];
907             Shape outShape = output.shape();
908 
909             if (input.type == OperandType::TENSOR_FLOAT32) {
910                 success = genericActivationPrepare(input.shape(), &outShape) &&
911                           setInfoAndAllocateIfNeeded(&output, outShape) &&
912                           relu6Float32(reinterpret_cast<const float*>(input.buffer),
913                                        input.shape(),
914                                        reinterpret_cast<float*>(output.buffer),
915                                        outShape);
916             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
917                 success = genericActivationPrepare(input.shape(), &outShape) &&
918                           setInfoAndAllocateIfNeeded(&output, outShape) &&
919                           relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
920                                       input.shape(),
921                                       reinterpret_cast<uint8_t*>(output.buffer),
922                                       outShape);
923             }
924         } break;
925         case OperationType::TANH: {
926             if (!allParametersPresent(1, 1)) {
927                 return ANEURALNETWORKS_BAD_DATA;
928             }
929             const RunTimeOperandInfo& input = mOperands[ins[0]];
930             RunTimeOperandInfo& output = mOperands[outs[0]];
931             Shape outShape = output.shape();
932 
933             if (input.type == OperandType::TENSOR_FLOAT32) {
934                 success = genericActivationPrepare(input.shape(), &outShape) &&
935                           setInfoAndAllocateIfNeeded(&output, outShape) &&
936                           tanhFloat32(reinterpret_cast<const float*>(input.buffer),
937                                       input.shape(),
938                                       reinterpret_cast<float*>(output.buffer),
939                                       outShape);
940             }
941         } break;
942         case OperationType::LOGISTIC: {
943             if (!allParametersPresent(1, 1)) {
944                 return ANEURALNETWORKS_BAD_DATA;
945             }
946             const RunTimeOperandInfo& input = mOperands[ins[0]];
947             RunTimeOperandInfo& output = mOperands[outs[0]];
948             Shape outShape = output.shape();
949 
950             if (input.type == OperandType::TENSOR_FLOAT32) {
951                 success = genericActivationPrepare(input.shape(), &outShape) &&
952                           setInfoAndAllocateIfNeeded(&output, outShape) &&
953                           logisticFloat32(reinterpret_cast<const float*>(input.buffer),
954                                           input.shape(),
955                                           reinterpret_cast<float*>(output.buffer),
956                                           outShape);
957             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
958                 success = genericActivationPrepare(input.shape(), &outShape) &&
959                           setInfoAndAllocateIfNeeded(&output, outShape) &&
960                           logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
961                                          input.shape(),
962                                          reinterpret_cast<uint8_t*>(output.buffer),
963                                          outShape);
964             }
965         } break;
966         case OperationType::SOFTMAX: {
967             if (!allParametersPresent(2, 1)) {
968                 return ANEURALNETWORKS_BAD_DATA;
969             }
970             RunTimeOperandInfo& input = mOperands[ins[0]];
971             float beta = getScalarData<float>(mOperands[ins[1]]);
972             if (beta <= 0.0f) {
973                 LOG(ERROR) << "beta must be positive for softmax";
974                 return ANEURALNETWORKS_BAD_DATA;
975             }
976 
977             RunTimeOperandInfo& output = mOperands[outs[0]];
978             Shape outShape = output.shape();
979 
980             if (input.type == OperandType::TENSOR_FLOAT32) {
981                 success = genericActivationPrepare(input.shape(), &outShape) &&
982                           setInfoAndAllocateIfNeeded(&output, outShape) &&
983                           softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
984                                          input.shape(),
985                                          beta,
986                                          reinterpret_cast<float*>(output.buffer),
987                                          output.shape());
988             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
989                 success = genericActivationPrepare(input.shape(), &outShape) &&
990                           setInfoAndAllocateIfNeeded(&output, outShape) &&
991                           softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
992                                         input.shape(),
993                                         beta,
994                                         reinterpret_cast<uint8_t*>(output.buffer),
995                                         output.shape());
996             }
997         } break;
998         case OperationType::FULLY_CONNECTED: {
999             if (!allParametersPresent(4, 1)) {
1000                 return ANEURALNETWORKS_BAD_DATA;
1001             }
1002             RunTimeOperandInfo& input   = mOperands[ins[0]];
1003             RunTimeOperandInfo& weights = mOperands[ins[1]];
1004             RunTimeOperandInfo& bias    = mOperands[ins[2]];
1005 
1006             int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
1007 
1008             RunTimeOperandInfo& output = mOperands[outs[0]];
1009             Shape outShape = output.shape();
1010 
1011             if (input.type == OperandType::TENSOR_FLOAT32) {
1012                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
1013                                                 &outShape) &&
1014                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1015                           fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
1016                                                 input.shape(),
1017                                                 reinterpret_cast<const float*>(weights.buffer),
1018                                                 weights.shape(),
1019                                                 reinterpret_cast<const float*>(bias.buffer),
1020                                                 bias.shape(),
1021                                                 activation,
1022                                                 reinterpret_cast<float*>(output.buffer),
1023                                                 outShape);
1024             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1025                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
1026                                                 &outShape) &&
1027                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1028                           fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1029                                                input.shape(),
1030                                                reinterpret_cast<const uint8_t*>(weights.buffer),
1031                                                weights.shape(),
1032                                                reinterpret_cast<const int32_t*>(bias.buffer),
1033                                                bias.shape(),
1034                                                activation,
1035                                                reinterpret_cast<uint8_t*>(output.buffer),
1036                                                outShape);
1037             }
1038         } break;
1039         case OperationType::CONCATENATION: {
1040             if (outs.size() != 1 || ins.size() < 2) {
1041                 return ANEURALNETWORKS_BAD_DATA;
1042             }
1043             int numInputTensors = ins.size() - 1;
1044             int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
1045 
1046             RunTimeOperandInfo& output = mOperands[outs[0]];
1047             Shape outShape = output.shape();
1048 
1049             const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
1050             if (firstInput.type == OperandType::TENSOR_FLOAT32) {
1051                 std::vector<Shape> inputShapes(numInputTensors);
1052                 std::vector<const float*> inputDataPtrs(numInputTensors);
1053 
1054                 for (int i=0; i<numInputTensors; i++) {
1055                     RunTimeOperandInfo& input = mOperands[ins[i]];
1056                     inputShapes[i] = input.shape();
1057                     inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
1058                 }
1059                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
1060                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1061                           concatenationFloat32(inputDataPtrs, inputShapes, axis,
1062                                                reinterpret_cast<float*>(output.buffer), outShape);
1063             } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
1064                 std::vector<Shape> inputShapes(numInputTensors);
1065                 std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
1066 
1067                 for (int i=0; i<numInputTensors; i++) {
1068                     RunTimeOperandInfo& input = mOperands[ins[i]];
1069                     inputShapes[i] = input.shape();
1070                     inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
1071                 }
1072                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
1073                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1074                           concatenationQuant8(inputDataPtrs, inputShapes, axis,
1075                                               reinterpret_cast<uint8_t*>(output.buffer),
1076                                               outShape);
1077             }
1078         } break;
1079         case OperationType::L2_NORMALIZATION: {
1080             if (!allParametersPresent(1, 1)) {
1081                 return ANEURALNETWORKS_BAD_DATA;
1082             }
1083             const RunTimeOperandInfo& input = mOperands[ins[0]];
1084             RunTimeOperandInfo& output = mOperands[outs[0]];
1085             Shape outShape = output.shape();
1086 
1087             if (input.type == OperandType::TENSOR_FLOAT32) {
1088                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1089                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1090                           l2normFloat32(reinterpret_cast<const float*>(input.buffer),
1091                                         input.shape(),
1092                                         reinterpret_cast<float*>(output.buffer),
1093                                         outShape);
1094             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1095                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1096                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1097                           l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1098                                        input.shape(),
1099                                        reinterpret_cast<uint8_t*>(output.buffer),
1100                                        outShape);
1101             }
1102         } break;
1103         case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
1104             if (!allParametersPresent(5, 1)) {
1105                 return ANEURALNETWORKS_BAD_DATA;
1106             }
1107             const RunTimeOperandInfo& input = mOperands[ins[0]];
1108             int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
1109             float bias = getScalarData<float>(mOperands[ins[2]]);
1110             float alpha = getScalarData<float>(mOperands[ins[3]]);
1111             float beta = getScalarData<float>(mOperands[ins[4]]);
1112 
1113             RunTimeOperandInfo& output = mOperands[outs[0]];
1114             Shape outShape = output.shape();
1115 
1116             if (input.type == OperandType::TENSOR_FLOAT32) {
1117                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1118                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1119                           localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
1120                                                    input.shape(),
1121                                                    radius, bias, alpha, beta,
1122                                                    reinterpret_cast<float*>(output.buffer),
1123                                                    outShape);
1124             }
1125         } break;
1126         case OperationType::RESHAPE: {
1127             if (!allParametersPresent(2, 1)) {
1128                 return ANEURALNETWORKS_BAD_DATA;
1129             }
1130             const RunTimeOperandInfo& input = mOperands[ins[0]];
1131             const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
1132 
1133             RunTimeOperandInfo& output = mOperands[outs[0]];
1134             Shape outShape = output.shape();
1135 
1136             success = reshapePrepare(input.shape(),
1137                                      reinterpret_cast<const int32_t*>(targetShape.buffer),
1138                                      getNumberOfElements(targetShape.shape()),
1139                                      &outShape) &&
1140                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1141                       reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
1142                                      input.shape(),
1143                                      reinterpret_cast<void*>(output.buffer),
1144                                      outShape);
1145         } break;
1146         case OperationType::RESIZE_BILINEAR: {
1147             if (!allParametersPresent(3, 1)) {
1148                 return ANEURALNETWORKS_BAD_DATA;
1149             }
1150             const RunTimeOperandInfo& input = mOperands[ins[0]];
1151             int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
1152             int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
1153 
1154             RunTimeOperandInfo& output = mOperands[outs[0]];
1155             Shape outShape = output.shape();
1156 
1157             if (input.type == OperandType::TENSOR_FLOAT32) {
1158                 success = resizeBilinearPrepare(input.shape(),
1159                                                 width, height,
1160                                                 &outShape) &&
1161                           setInfoAndAllocateIfNeeded(&output, outShape) &&
1162                           resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
1163                                                 input.shape(),
1164                                                 reinterpret_cast<float*>(output.buffer),
1165                                                 outShape);
1166             }
1167         } break;
1168         case OperationType::DEPTH_TO_SPACE: {
1169             if (!allParametersPresent(2, 1)) {
1170                 return ANEURALNETWORKS_BAD_DATA;
1171             }
1172             const RunTimeOperandInfo& input = mOperands[ins[0]];
1173             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1174 
1175             RunTimeOperandInfo& output = mOperands[outs[0]];
1176             Shape outShape = output.shape();
1177 
1178             success = depthToSpacePrepare(input.shape(),
1179                                           blockSize,
1180                                           &outShape) &&
1181                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1182                       depthToSpaceGeneric(input.buffer,
1183                                           input.shape(),
1184                                           blockSize,
1185                                           output.buffer,
1186                                           outShape);
1187         } break;
1188         case OperationType::SPACE_TO_DEPTH: {
1189             if (!allParametersPresent(2, 1)) {
1190                 return ANEURALNETWORKS_BAD_DATA;
1191             }
1192             const RunTimeOperandInfo& input = mOperands[ins[0]];
1193             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1194 
1195             RunTimeOperandInfo& output = mOperands[outs[0]];
1196             Shape outShape = output.shape();
1197 
1198             success = spaceToDepthPrepare(input.shape(),
1199                                           blockSize,
1200                                           &outShape) &&
1201                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1202                       spaceToDepthGeneric(input.buffer,
1203                                           input.shape(),
1204                                           blockSize,
1205                                           output.buffer,
1206                                           outShape);
1207         } break;
1208         case OperationType::EMBEDDING_LOOKUP: {
1209             const RunTimeOperandInfo &values =
1210                 mOperands[ins[EmbeddingLookup::kValueTensor]];
1211             const RunTimeOperandInfo &lookups =
1212                 mOperands[ins[EmbeddingLookup::kLookupTensor]];
1213             RunTimeOperandInfo &output =
1214                 mOperands[outs[EmbeddingLookup::kOutputTensor]];
1215 
1216             Shape outputShape;
1217             EmbeddingLookup lookup(operation, mOperands);
1218 
1219             success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
1220                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1221                 lookup.Eval();
1222         } break;
1223         case OperationType::HASHTABLE_LOOKUP: {
1224             const RunTimeOperandInfo &lookups =
1225                 mOperands[ins[HashtableLookup::kLookupTensor]];
1226             const RunTimeOperandInfo &keys =
1227                 mOperands[ins[HashtableLookup::kKeyTensor]];
1228             const RunTimeOperandInfo &values =
1229                 mOperands[ins[HashtableLookup::kValueTensor]];
1230 
1231             RunTimeOperandInfo &output =
1232                 mOperands[outs[HashtableLookup::kOutputTensor]];
1233             RunTimeOperandInfo &hits =
1234                 mOperands[outs[HashtableLookup::kHitsTensor]];
1235 
1236             Shape outputShape, hitShape;
1237             HashtableLookup lookup(operation, mOperands);
1238 
1239             success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
1240                                              &outputShape, &hitShape) &&
1241                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1242                 setInfoAndAllocateIfNeeded(&hits, hitShape) &&
1243                 lookup.Eval();
1244         } break;
1245         case OperationType::LSH_PROJECTION: {
1246             RunTimeOperandInfo &output =
1247                 mOperands[outs[LSHProjection::kOutputTensor]];
1248 
1249             Shape outputShape;
1250             LSHProjection lsh(operation, mOperands);
1251 
1252             success = LSHProjection::Prepare(operation, mOperands,
1253                                              &outputShape) &&
1254                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1255                 lsh.Eval();
1256         } break;
1257         case OperationType::LSTM: {
1258             RunTimeOperandInfo &scratch =
1259                 mOperands[outs[LSTMCell::kScratchBufferTensor]];
1260             RunTimeOperandInfo &outputStateOut =
1261                 mOperands[outs[LSTMCell::kOutputStateOutTensor]];
1262             RunTimeOperandInfo &cellStateOut =
1263                 mOperands[outs[LSTMCell::kCellStateOutTensor]];
1264             RunTimeOperandInfo &output =
1265                 mOperands[outs[LSTMCell::kOutputTensor]];
1266 
1267             Shape scratchShape, outputStateShape, cellStateShape, outputShape;
1268             LSTMCell lstm_cell(operation, mOperands);
1269 
1270             success = LSTMCell::Prepare(operation, mOperands,
1271                                         &scratchShape, &outputStateShape,
1272                                         &cellStateShape, &outputShape) &&
1273                 setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
1274                 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
1275                 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
1276                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1277                 lstm_cell.Eval();
1278         } break;
1279         case OperationType::RNN: {
1280             RunTimeOperandInfo &hiddenStateOut =
1281                 mOperands[outs[RNN::kHiddenStateOutTensor]];
1282             RunTimeOperandInfo &output =
1283                 mOperands[outs[RNN::kOutputTensor]];
1284 
1285             Shape hiddenStateShape, outputShape;
1286             RNN rnn_cell(operation, mOperands);
1287 
1288             success = RNN::Prepare(operation, mOperands,
1289                                    &hiddenStateShape, &outputShape) &&
1290                 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
1291                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1292                 rnn_cell.Eval();
1293         } break;
1294         case OperationType::SVDF: {
1295             RunTimeOperandInfo &stateOut =
1296                 mOperands[outs[SVDF::kStateOutTensor]];
1297             RunTimeOperandInfo &output =
1298                 mOperands[outs[SVDF::kOutputTensor]];
1299 
1300             Shape stateShape, outputShape;
1301             SVDF svdf(operation, mOperands);
1302 
1303             success = SVDF::Prepare(operation, mOperands,
1304                                     &stateShape, &outputShape) &&
1305                 setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
1306                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1307                 svdf.Eval();
1308         } break;
1309         case OperationType::BATCH_TO_SPACE_ND: {
1310             if (!allParametersPresent(2, 1)) {
1311                 return ANEURALNETWORKS_BAD_DATA;
1312             }
1313             const RunTimeOperandInfo& input = mOperands[ins[0]];
1314             const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1315 
1316             RunTimeOperandInfo& output = mOperands[outs[0]];
1317             Shape outShape = output.shape();
1318 
1319             success = batchToSpacePrepare(input.shape(),
1320                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
1321                                           blockSize.shape(),
1322                                           &outShape) &&
1323                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1324                       batchToSpaceGeneric(input.buffer,
1325                                           input.shape(),
1326                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
1327                                           output.buffer,
1328                                           outShape);
1329         } break;
1330         case OperationType::SPACE_TO_BATCH_ND: {
1331             if (!allParametersPresent(3, 1)) {
1332                 return ANEURALNETWORKS_BAD_DATA;
1333             }
1334             const RunTimeOperandInfo& input = mOperands[ins[0]];
1335             const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1336             const RunTimeOperandInfo& paddings = mOperands[ins[2]];
1337 
1338             RunTimeOperandInfo& output = mOperands[outs[0]];
1339             Shape outShape = output.shape();
1340 
1341             success = spaceToBatchPrepare(input.shape(),
1342                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
1343                                           blockSize.shape(),
1344                                           reinterpret_cast<const int32_t*>(paddings.buffer),
1345                                           paddings.shape(),
1346                                           &outShape) &&
1347                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1348                       spaceToBatchGeneric(input.buffer,
1349                                           input.shape(),
1350                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
1351                                           reinterpret_cast<const int32_t*>(paddings.buffer),
1352                                           paddings.shape(),
1353                                           output.buffer,
1354                                           outShape);
1355         } break;
1356         case OperationType::PAD: {
1357             if (!allParametersPresent(2, 1)) {
1358                 return ANEURALNETWORKS_BAD_DATA;
1359             }
1360             const RunTimeOperandInfo& input = mOperands[ins[0]];
1361             const RunTimeOperandInfo& paddings = mOperands[ins[1]];
1362 
1363             RunTimeOperandInfo& output = mOperands[outs[0]];
1364             Shape outShape = output.shape();
1365 
1366             success = padPrepare(input.shape(),
1367                                  reinterpret_cast<const int32_t*>(paddings.buffer),
1368                                  paddings.shape(),
1369                                  &outShape) &&
1370                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1371                       padGeneric(input.buffer,
1372                                  input.shape(),
1373                                  reinterpret_cast<const int32_t*>(paddings.buffer),
1374                                  output.buffer,
1375                                  outShape);
1376         } break;
1377         case OperationType::SQUEEZE: {
1378             if (!allParametersPresent(2, 1)) {
1379                 return ANEURALNETWORKS_BAD_DATA;
1380             }
1381             const RunTimeOperandInfo& input = mOperands[ins[0]];
1382             const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]];
1383 
1384             RunTimeOperandInfo& output = mOperands[outs[0]];
1385             Shape outShape = output.shape();
1386 
1387             success = squeezePrepare(input.shape(),
1388                                      reinterpret_cast<const int32_t*>(squeezeDims.buffer),
1389                                      squeezeDims.shape(),
1390                                      &outShape) &&
1391                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1392                       squeezeGeneric(input.buffer,
1393                                      input.shape(),
1394                                      output.buffer,
1395                                      outShape);
1396         } break;
1397         case OperationType::TRANSPOSE: {
1398             if (!allParametersPresent(2, 1)) {
1399                 return ANEURALNETWORKS_BAD_DATA;
1400             }
1401             const RunTimeOperandInfo& input = mOperands[ins[0]];
1402             const RunTimeOperandInfo& perms = mOperands[ins[1]];
1403 
1404             RunTimeOperandInfo& output = mOperands[outs[0]];
1405             Shape outShape = output.shape();
1406 
1407             success = transposePrepare(input.shape(),
1408                                        reinterpret_cast<const int32_t*>(perms.buffer),
1409                                        perms.shape(),
1410                                        &outShape) &&
1411                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1412                       transposeGeneric(input.buffer,
1413                                        input.shape(),
1414                                        reinterpret_cast<const int32_t*>(perms.buffer),
1415                                        perms.shape(),
1416                                        output.buffer,
1417                                        outShape);
1418         } break;
1419         case OperationType::STRIDED_SLICE: {
1420             if (!allParametersPresent(7, 1)) {
1421                 return ANEURALNETWORKS_BAD_DATA;
1422             }
1423             const RunTimeOperandInfo& input = mOperands[ins[0]];
1424             const RunTimeOperandInfo& begins = mOperands[ins[1]];
1425             const RunTimeOperandInfo& ends = mOperands[ins[2]];
1426             const RunTimeOperandInfo& strides = mOperands[ins[3]];
1427             int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]);
1428             int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]);
1429             int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]);
1430 
1431             RunTimeOperandInfo& output = mOperands[outs[0]];
1432             Shape outShape = output.shape();
1433 
1434             success = stridedSlicePrepare(input.shape(),
1435                                           reinterpret_cast<const int32_t*>(begins.buffer),
1436                                           begins.shape(),
1437                                           reinterpret_cast<const int32_t*>(ends.buffer),
1438                                           ends.shape(),
1439                                           reinterpret_cast<const int32_t*>(strides.buffer),
1440                                           strides.shape(),
1441                                           beginMask, endMask, shrinkAxisMask,
1442                                           &outShape) &&
1443                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1444                       stridedSliceGeneric(input.buffer,
1445                                           input.shape(),
1446                                           reinterpret_cast<const int32_t*>(begins.buffer),
1447                                           reinterpret_cast<const int32_t*>(ends.buffer),
1448                                           reinterpret_cast<const int32_t*>(strides.buffer),
1449                                           beginMask, endMask, shrinkAxisMask,
1450                                           output.buffer,
1451                                           outShape);
1452         } break;
1453         case OperationType::DIV: {
1454             if (!allParametersPresent(3, 1)) {
1455                 return ANEURALNETWORKS_BAD_DATA;
1456             }
1457             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
1458             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
1459             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
1460 
1461             RunTimeOperandInfo& out = mOperands[outs[0]];
1462             Shape outShape = out.shape();
1463 
1464             if (in1.type == OperandType::TENSOR_FLOAT32) {
1465                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
1466                           setInfoAndAllocateIfNeeded(&out, outShape) &&
1467                           divFloat32(reinterpret_cast<const float*>(in1.buffer),
1468                                      in1.shape(),
1469                                      reinterpret_cast<const float*>(in2.buffer),
1470                                      in2.shape(),
1471                                      activation,
1472                                      reinterpret_cast<float*>(out.buffer),
1473                                      outShape);
1474             }
1475         } break;
1476         case OperationType::SUB: {
1477             if (!allParametersPresent(3, 1)) {
1478                 return ANEURALNETWORKS_BAD_DATA;
1479             }
1480             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
1481             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
1482             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
1483 
1484             RunTimeOperandInfo& out = mOperands[outs[0]];
1485             Shape outShape = out.shape();
1486 
1487             if (in1.type == OperandType::TENSOR_FLOAT32) {
1488                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
1489                           setInfoAndAllocateIfNeeded(&out, outShape) &&
1490                           subFloat32(reinterpret_cast<const float*>(in1.buffer),
1491                                      in1.shape(),
1492                                      reinterpret_cast<const float*>(in2.buffer),
1493                                      in2.shape(),
1494                                      activation,
1495                                      reinterpret_cast<float*>(out.buffer),
1496                                      outShape);
1497             }
1498         } break;
1499         case OperationType::MEAN: {
1500             if (!allParametersPresent(3, 1)) {
1501                 return ANEURALNETWORKS_BAD_DATA;
1502             }
1503             const RunTimeOperandInfo& input = mOperands[ins[0]];
1504             const RunTimeOperandInfo& axis = mOperands[ins[1]];
1505             int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]);
1506 
1507             RunTimeOperandInfo& output = mOperands[outs[0]];
1508             Shape outShape = output.shape();
1509 
1510             success = meanPrepare(input.shape(),
1511                                   reinterpret_cast<const int32_t*>(axis.buffer),
1512                                   axis.shape(),
1513                                   keepDims > 0,
1514                                   &outShape) &&
1515                       setInfoAndAllocateIfNeeded(&output, outShape) &&
1516                       meanGeneric(input.buffer,
1517                                   input.shape(),
1518                                   reinterpret_cast<const int32_t*>(axis.buffer),
1519                                   axis.shape(),
1520                                   keepDims > 0,
1521                                   output.buffer,
1522                                   outShape);
1523         } break;
1524         default:
1525             nnAssert(false);
1526             break;
1527     }
1528     if (!success) {
1529         LOG(ERROR) << getOperationName(operation.type) << " failed.";
1530         return ANEURALNETWORKS_OP_FAILED;
1531     }
1532 
1533     freeNoLongerUsedOperands(ins);
1534     return ANEURALNETWORKS_NO_ERROR;
1535 }
1536 
ScopedOpenmpSettings()1537 ScopedOpenmpSettings::ScopedOpenmpSettings() {
1538     mBlocktimeInitial = kmp_get_blocktime();
1539     kmp_set_blocktime(20);  // ms, see b/109645291
1540 
1541 #if NNAPI_LIMIT_CPU_THREADS
1542     // Code not yet enabled. Choosing the number of threads to be based on
1543     // benchmarking. See longer comment by the class declaration.
1544     mMaxThreadsInitial = Eigen::nbThreads();
1545     const int nProcs = omp_get_num_procs();
1546     int threads = nProcs;
1547     if (nProcs >= 8) {
1548         threads = nProcs - 4;
1549     } else if (nProcs >= 4) {
1550         threads = nProcs - 2;
1551     }
1552     Eigen::setNbThreads(threads);
1553 #endif
1554 }
1555 
~ScopedOpenmpSettings()1556 ScopedOpenmpSettings::~ScopedOpenmpSettings() {
1557     kmp_set_blocktime(mBlocktimeInitial);
1558 #if NNAPI_LIMIT_CPU_THREADS
1559     Eigen::setNbThreads(mMaxThreadsInitial);
1560 #endif
1561 }
1562 
1563 
1564 } // namespace nn
1565 } // namespace android
1566