1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #define LOG_TAG "CpuExecutor"
18
19 #include "CpuExecutor.h"
20
21 #include "NeuralNetworks.h"
22 #include "Operations.h"
23
24 #include "Eigen/Core"
25 #include <omp.h>
26 #include <sys/mman.h>
27
28 namespace android {
29 namespace nn {
30
31 // TODO: short term, make share memory mapping and updating a utility function.
32 // TODO: long term, implement mmap_fd as a hidl IMemory service.
RunTimePoolInfo(const hidl_memory & hidlMemory,bool * fail)33 RunTimePoolInfo::RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail) {
34 sp<IMemory> memory;
35 uint8_t* buffer = nullptr;
36
37 auto memType = hidlMemory.name();
38 if (memType == "ashmem") {
39 memory = mapMemory(hidlMemory);
40 if (memory == nullptr) {
41 LOG(ERROR) << "Can't map shared memory.";
42 if (fail) *fail = true;
43 return;
44 }
45 memory->update();
46 buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
47 if (buffer == nullptr) {
48 LOG(ERROR) << "Can't access shared memory.";
49 if (fail) *fail = true;
50 return;
51 }
52 } else if (memType == "mmap_fd") {
53 size_t size = hidlMemory.size();
54 int fd = hidlMemory.handle()->data[0];
55 int prot = hidlMemory.handle()->data[1];
56 size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
57 hidlMemory.handle()->data[3]);
58 buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
59 if (buffer == MAP_FAILED) {
60 LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
61 if (fail) *fail = true;
62 return;
63 }
64 } else {
65 LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type";
66 if (fail) *fail = true;
67 return;
68 }
69
70 mHidlMemory = hidlMemory;
71 mBuffer = buffer;
72 mMemory = memory;
73 }
74
RunTimePoolInfo(uint8_t * buffer)75 RunTimePoolInfo::RunTimePoolInfo(uint8_t* buffer) {
76 mBuffer = buffer;
77 }
78
RunTimePoolInfo(RunTimePoolInfo && other)79 RunTimePoolInfo::RunTimePoolInfo(RunTimePoolInfo&& other) {
80 moveFrom(std::move(other));
81 other.mBuffer = nullptr;
82 }
83
operator =(RunTimePoolInfo && other)84 RunTimePoolInfo& RunTimePoolInfo::operator=(RunTimePoolInfo&& other) {
85 if (this != &other) {
86 release();
87 moveFrom(std::move(other));
88 other.mBuffer = nullptr;
89 }
90 return *this;
91 }
92
moveFrom(RunTimePoolInfo && other)93 void RunTimePoolInfo::moveFrom(RunTimePoolInfo &&other) {
94 mHidlMemory = std::move(other.mHidlMemory);
95 mBuffer = std::move(other.mBuffer);
96 mMemory = std::move(other.mMemory);
97 }
98
release()99 void RunTimePoolInfo::release() {
100 if (mBuffer == nullptr) {
101 return;
102 }
103
104 auto memType = mHidlMemory.name();
105 if (memType == "ashmem") {
106 // nothing to do
107 } else if (memType == "mmap_fd") {
108 size_t size = mHidlMemory.size();
109 if (munmap(mBuffer, size)) {
110 LOG(ERROR) << "RunTimePoolInfo::release(): Can't munmap";
111 }
112 } else if (memType == "") {
113 // Represents a POINTER argument; nothing to do
114 } else {
115 LOG(ERROR) << "RunTimePoolInfo::release(): unsupported hidl_memory type";
116 }
117
118 mHidlMemory = hidl_memory();
119 mMemory = nullptr;
120 mBuffer = nullptr;
121 }
122
123 // Making sure the output data are correctly updated after execution.
update() const124 bool RunTimePoolInfo::update() const {
125 auto memType = mHidlMemory.name();
126 if (memType == "ashmem") {
127 mMemory->commit();
128 return true;
129 } else if (memType == "mmap_fd") {
130 int prot = mHidlMemory.handle()->data[1];
131 if (prot & PROT_WRITE) {
132 size_t size = mHidlMemory.size();
133 return msync(mBuffer, size, MS_SYNC) == 0;
134 }
135 }
136 // No-op for other types of memory.
137 return true;
138 }
139
setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo> * poolInfos,const hidl_vec<hidl_memory> & pools)140 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
141 const hidl_vec<hidl_memory>& pools) {
142 poolInfos->clear();
143 poolInfos->reserve(pools.size());
144 bool fail = false;
145 for (const auto& pool : pools) {
146 poolInfos->emplace_back(pool, &fail);
147 }
148 if (fail) {
149 LOG(ERROR) << "Could not map pools";
150 poolInfos->clear();
151 return false;
152 }
153 return true;
154 }
155
156 // Updates the RunTimeOperandInfo with the newly calculated shape.
157 // Allocate the buffer if we need to.
setInfoAndAllocateIfNeeded(RunTimeOperandInfo * info,const Shape & shape)158 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
159 // For user-provided model output operands, the parameters must match the Shape
160 // calculated from the preparation step.
161 if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
162 if (info->type != shape.type ||
163 info->dimensions != shape.dimensions) {
164 LOG(ERROR) << "Invalid type or dimensions for model output";
165 return false;
166 }
167 if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
168 (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
169 LOG(ERROR) << "Invalid scale or zeroPoint for model output";
170 return false;
171 }
172 }
173 info->type = shape.type;
174 info->dimensions = shape.dimensions;
175 info->scale = shape.scale;
176 info->zeroPoint = shape.offset;
177 if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
178 uint32_t length = sizeOfData(info->type, info->dimensions);
179 info->buffer = new uint8_t[length];
180 if (info->buffer == nullptr) {
181 return false;
182 }
183 }
184 return true;
185 }
186
187 // Ignore the .pools entry in model and request. This will have been taken care of
188 // by the caller.
run(const V1_0::Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)189 int CpuExecutor::run(const V1_0::Model& model, const Request& request,
190 const std::vector<RunTimePoolInfo>& modelPoolInfos,
191 const std::vector<RunTimePoolInfo>& requestPoolInfos) {
192 return run(convertToV1_1(model), request, modelPoolInfos, requestPoolInfos);
193 }
194
run(const V1_1::Model & model,const Request & request,const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)195 int CpuExecutor::run(const V1_1::Model& model, const Request& request,
196 const std::vector<RunTimePoolInfo>& modelPoolInfos,
197 const std::vector<RunTimePoolInfo>& requestPoolInfos) {
198 VLOG(CPUEXE) << "CpuExecutor::run() with request("
199 << SHOW_IF_DEBUG(toString(request)) << ")";
200
201 ScopedOpenmpSettings openMpSettings;
202
203 mModel = &model;
204 mRequest = &request; // TODO check if mRequest is needed
205 initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
206 // The model has serialized the operation in execution order.
207 for (const auto& operation : model.operations) {
208 int n = executeOperation(operation);
209 if (n != ANEURALNETWORKS_NO_ERROR) {
210 return n;
211 }
212 }
213 for (auto& runtimeInfo : modelPoolInfos) {
214 runtimeInfo.update();
215 }
216 for (auto& runtimeInfo : requestPoolInfos) {
217 runtimeInfo.update();
218 }
219 mModel = nullptr;
220 mRequest = nullptr;
221 VLOG(CPUEXE) << "Completed run normally";
222 return ANEURALNETWORKS_NO_ERROR;
223 }
224
initializeRunTimeInfo(const std::vector<RunTimePoolInfo> & modelPoolInfos,const std::vector<RunTimePoolInfo> & requestPoolInfos)225 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
226 const std::vector<RunTimePoolInfo>& requestPoolInfos) {
227 VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
228 const size_t count = mModel->operands.size();
229 mOperands.resize(count);
230
231 // Start by setting the runtime info to what's in the model.
232 for (size_t i = 0; i < count; i++) {
233 const Operand& from = mModel->operands[i];
234 RunTimeOperandInfo& to = mOperands[i];
235 to.type = from.type;
236 to.dimensions = from.dimensions;
237 to.scale = from.scale;
238 to.zeroPoint = from.zeroPoint;
239 to.length = from.location.length;
240 to.lifetime = from.lifetime;
241 switch (from.lifetime) {
242 case OperandLifeTime::TEMPORARY_VARIABLE:
243 to.buffer = nullptr;
244 to.numberOfUsesLeft = from.numberOfConsumers;
245 break;
246 case OperandLifeTime::CONSTANT_COPY:
247 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
248 to.numberOfUsesLeft = 0;
249 break;
250 case OperandLifeTime::CONSTANT_REFERENCE: {
251 auto poolIndex = from.location.poolIndex;
252 nnAssert(poolIndex < modelPoolInfos.size());
253 auto& r = modelPoolInfos[poolIndex];
254 to.buffer = r.getBuffer() + from.location.offset;
255 to.numberOfUsesLeft = 0;
256 break;
257 }
258 case OperandLifeTime::MODEL_INPUT:
259 case OperandLifeTime::MODEL_OUTPUT:
260 case OperandLifeTime::NO_VALUE:
261 to.buffer = nullptr;
262 to.numberOfUsesLeft = 0;
263 break;
264 default:
265 nnAssert(false);
266 break;
267 }
268 }
269
270 // Adjust the runtime info for the arguments passed to the model,
271 // modifying the buffer location, and possibly the dimensions.
272 auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
273 const hidl_vec<RequestArgument>& arguments) {
274 nnAssert(indexes.size() == arguments.size());
275 for (size_t i = 0; i < indexes.size(); i++) {
276 const uint32_t operandIndex = indexes[i];
277 const RequestArgument& from = arguments[i];
278 RunTimeOperandInfo& to = mOperands[operandIndex];
279 if (from.dimensions.size() > 0) {
280 // It's the responsibility of the caller to validate that
281 // from.dimensions only modifies the dimensions that were
282 // unspecified in the model. That's the case in SampleDriver.cpp
283 // with the call to validateRequest().
284 // TODO make sure that's the case for the default CPU path.
285 to.dimensions = from.dimensions;
286 }
287 if (from.hasNoValue) {
288 to.lifetime = OperandLifeTime::NO_VALUE;
289 nnAssert(to.buffer == nullptr);
290 } else {
291 auto poolIndex = from.location.poolIndex;
292 nnAssert(poolIndex < requestPoolInfos.size());
293 auto& r = requestPoolInfos[poolIndex];
294 to.buffer = r.getBuffer() + from.location.offset;
295 }
296 }
297 };
298 updateForArguments(mModel->inputIndexes, mRequest->inputs);
299 updateForArguments(mModel->outputIndexes, mRequest->outputs);
300
301 return true;
302 }
303
freeNoLongerUsedOperands(const std::vector<uint32_t> & inputs)304 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
305 for (uint32_t i : inputs) {
306 auto& info = mOperands[i];
307 // Check if it's a static or model input/output.
308 if (info.numberOfUsesLeft == 0) {
309 continue;
310 }
311 info.numberOfUsesLeft--;
312 if (info.numberOfUsesLeft == 0) {
313 nnAssert(info.buffer != nullptr);
314 delete[] info.buffer;
315 info.buffer = nullptr;
316 }
317 }
318 }
319
executeOperation(const Operation & operation)320 int CpuExecutor::executeOperation(const Operation& operation) {
321 // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
322 const hidl_vec<uint32_t>& ins = operation.inputs;
323 const hidl_vec<uint32_t>& outs = operation.outputs;
324 bool success = false;
325
326 // Function to verify that the number of input and output parameters
327 // matches what is expected. Also checks that all the parameters have
328 // values. This function is to be used only for operations that do not
329 // accept optional arguments.
330 // TODO Have a version that works for optional arguments.
331 auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
332 size_t requiredOuts) -> bool {
333 auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
334 const char* type) -> bool {
335 size_t actualCount = indexes.size();
336 if (actualCount != requiredCount) {
337 LOG(ERROR) << getOperationName(operation.type)
338 << ": Invalid number of " << type << " operands. Got " << actualCount
339 << " of " << requiredCount;
340 return false;
341 }
342 for (size_t i = 0; i < actualCount; i++) {
343 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
344 LOG(ERROR) << getOperationName(operation.type) << " " << type
345 << " operand " << i << " is required but missing.";
346 return false;
347 }
348 }
349 return true;
350 };
351 return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
352 };
353
354 switch (operation.type) {
355 case OperationType::OEM_OPERATION: {
356 LOG(ERROR) << "OEM operation not supported for CPU execution";
357 success = false;
358 } break;
359 case OperationType::ADD: {
360 if (!allParametersPresent(3, 1)) {
361 return ANEURALNETWORKS_BAD_DATA;
362 }
363 const RunTimeOperandInfo& in1 = mOperands[ins[0]];
364 const RunTimeOperandInfo& in2 = mOperands[ins[1]];
365 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
366
367 RunTimeOperandInfo& out = mOperands[outs[0]];
368 Shape outShape = out.shape();
369
370 if (in1.type == OperandType::TENSOR_FLOAT32) {
371 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
372 setInfoAndAllocateIfNeeded(&out, outShape) &&
373 addFloat32(reinterpret_cast<const float*>(in1.buffer),
374 in1.shape(),
375 reinterpret_cast<const float*>(in2.buffer),
376 in2.shape(),
377 activation,
378 reinterpret_cast<float*>(out.buffer),
379 outShape);
380 } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
381 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
382 setInfoAndAllocateIfNeeded(&out, outShape) &&
383 addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
384 in1.shape(),
385 reinterpret_cast<const uint8_t*>(in2.buffer),
386 in2.shape(),
387 activation,
388 reinterpret_cast<uint8_t*>(out.buffer),
389 outShape);
390 }
391 } break;
392 case OperationType::MUL: {
393 if (!allParametersPresent(3, 1)) {
394 return ANEURALNETWORKS_BAD_DATA;
395 }
396 const RunTimeOperandInfo& in1 = mOperands[ins[0]];
397 const RunTimeOperandInfo& in2 = mOperands[ins[1]];
398 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
399
400 RunTimeOperandInfo& out = mOperands[outs[0]];
401 Shape outShape = out.shape();
402
403 if (in1.type == OperandType::TENSOR_FLOAT32) {
404 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
405 setInfoAndAllocateIfNeeded(&out, outShape) &&
406 mulFloat32(reinterpret_cast<const float*>(in1.buffer),
407 in1.shape(),
408 reinterpret_cast<const float*>(in2.buffer),
409 in2.shape(),
410 activation,
411 reinterpret_cast<float*>(out.buffer),
412 outShape);
413 } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
414 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
415 setInfoAndAllocateIfNeeded(&out, outShape) &&
416 mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
417 in1.shape(),
418 reinterpret_cast<const uint8_t*>(in2.buffer),
419 in2.shape(),
420 activation,
421 reinterpret_cast<uint8_t*>(out.buffer),
422 outShape);
423 }
424 } break;
425 case OperationType::FLOOR: {
426 if (!allParametersPresent(1, 1)) {
427 return ANEURALNETWORKS_BAD_DATA;
428 }
429 const RunTimeOperandInfo& input = mOperands[ins[0]];
430 RunTimeOperandInfo& output = mOperands[outs[0]];
431 Shape outShape = output.shape();
432
433 if (input.type == OperandType::TENSOR_FLOAT32) {
434 success = floorPrepare(input.shape(), &outShape) &&
435 setInfoAndAllocateIfNeeded(&output, outShape) &&
436 floorFloat32(reinterpret_cast<const float*>(input.buffer),
437 reinterpret_cast<float*>(output.buffer),
438 outShape);
439 }
440 } break;
441 case OperationType::DEQUANTIZE: {
442 if (!allParametersPresent(1, 1)) {
443 return ANEURALNETWORKS_BAD_DATA;
444 }
445 const RunTimeOperandInfo& input = mOperands[ins[0]];
446 RunTimeOperandInfo& output = mOperands[outs[0]];
447 Shape outShape = output.shape();
448
449 if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
450 success = dequantizePrepare(input.shape(), &outShape) &&
451 setInfoAndAllocateIfNeeded(&output, outShape) &&
452 dequantizeQuant8ToFloat32(
453 reinterpret_cast<const uint8_t*>(input.buffer),
454 reinterpret_cast<float*>(output.buffer),
455 input.shape());
456 }
457 } break;
458 case OperationType::DEPTHWISE_CONV_2D: {
459 const size_t inCount = ins.size();
460 if ((inCount != 11 && inCount != 8) ||
461 !allParametersPresent(inCount, 1)) {
462 return ANEURALNETWORKS_BAD_DATA;
463 }
464 const RunTimeOperandInfo& input = mOperands[ins[0]];
465 const RunTimeOperandInfo& filter = mOperands[ins[1]];
466 const RunTimeOperandInfo& bias = mOperands[ins[2]];
467
468 int32_t padding_left, padding_right;
469 int32_t padding_top, padding_bottom;
470 int32_t stride_width, stride_height;
471 int32_t depth_multiplier;
472 int32_t activation;
473
474 if (inCount == 11) {
475 padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
476 padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
477 padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
478 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
479 stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
480 stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
481 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
482 activation = getScalarData<int32_t>(mOperands[ins[10]]);
483 } else {
484 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
485 stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
486 stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
487 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
488 activation = getScalarData<int32_t>(mOperands[ins[7]]);
489
490 Shape inputShape = input.shape();
491 Shape filterShape = filter.shape();
492 int32_t input_width = getSizeOfDimension(inputShape, 2);
493 int32_t input_height = getSizeOfDimension(inputShape, 1);
494 int32_t filter_width = getSizeOfDimension(filterShape, 2);
495 int32_t filter_height = getSizeOfDimension(filterShape, 1);
496 calculateExplicitPadding(input_width, stride_width,
497 filter_width, padding_implicit,
498 &padding_left, &padding_right);
499 calculateExplicitPadding(input_height, stride_height,
500 filter_height, padding_implicit,
501 &padding_top, &padding_bottom);
502 }
503
504 RunTimeOperandInfo& output = mOperands[outs[0]];
505 Shape outShape = output.shape();
506
507 if (input.type == OperandType::TENSOR_FLOAT32) {
508 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
509 padding_left, padding_right,
510 padding_top, padding_bottom,
511 stride_width, stride_height,
512 &outShape) &&
513 setInfoAndAllocateIfNeeded(&output, outShape) &&
514 depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
515 input.shape(),
516 reinterpret_cast<const float*>(filter.buffer),
517 filter.shape(),
518 reinterpret_cast<const float*>(bias.buffer),
519 bias.shape(),
520 padding_left, padding_right,
521 padding_top, padding_bottom,
522 stride_width, stride_height,
523 depth_multiplier, activation,
524 reinterpret_cast<float*>(output.buffer),
525 outShape);
526 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
527 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
528 padding_left, padding_right,
529 padding_top, padding_bottom,
530 stride_width, stride_height,
531 &outShape) &&
532 setInfoAndAllocateIfNeeded(&output, outShape) &&
533 depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
534 input.shape(),
535 reinterpret_cast<const uint8_t*>(filter.buffer),
536 filter.shape(),
537 reinterpret_cast<const int32_t*>(bias.buffer),
538 bias.shape(),
539 padding_left, padding_right,
540 padding_top, padding_bottom,
541 stride_width, stride_height,
542 depth_multiplier, activation,
543 reinterpret_cast<uint8_t*>(output.buffer),
544 outShape);
545 }
546
547 } break;
548 case OperationType::CONV_2D: {
549 const size_t inCount = ins.size();
550 if ((inCount != 10 && inCount != 7) ||
551 !allParametersPresent(inCount, 1)) {
552 return ANEURALNETWORKS_BAD_DATA;
553 }
554 const RunTimeOperandInfo& input = mOperands[ins[0]];
555 const RunTimeOperandInfo& filter = mOperands[ins[1]];
556 const RunTimeOperandInfo& bias = mOperands[ins[2]];
557
558 int32_t padding_left, padding_right;
559 int32_t padding_top, padding_bottom;
560 int32_t stride_width, stride_height;
561 int32_t activation;
562
563 if (inCount == 10) {
564 padding_left = getScalarData<int32_t>(mOperands[ins[3]]);
565 padding_right = getScalarData<int32_t>(mOperands[ins[4]]);
566 padding_top = getScalarData<int32_t>(mOperands[ins[5]]);
567 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]);
568 stride_width = getScalarData<int32_t>(mOperands[ins[7]]);
569 stride_height = getScalarData<int32_t>(mOperands[ins[8]]);
570 activation = getScalarData<int32_t>(mOperands[ins[9]]);
571 } else {
572 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
573 stride_width = getScalarData<int32_t>(mOperands[ins[4]]);
574 stride_height = getScalarData<int32_t>(mOperands[ins[5]]);
575 activation = getScalarData<int32_t>(mOperands[ins[6]]);
576
577 Shape inputShape = input.shape();
578 Shape filterShape = filter.shape();
579 int32_t input_width = getSizeOfDimension(inputShape, 2);
580 int32_t input_height = getSizeOfDimension(inputShape, 1);
581 int32_t filter_width = getSizeOfDimension(filterShape, 2);
582 int32_t filter_height = getSizeOfDimension(filterShape, 1);
583 calculateExplicitPadding(input_width, stride_width,
584 filter_width, padding_implicit,
585 &padding_left, &padding_right);
586 calculateExplicitPadding(input_height, stride_height,
587 filter_height, padding_implicit,
588 &padding_top, &padding_bottom);
589 }
590
591 RunTimeOperandInfo& output = mOperands[outs[0]];
592 Shape outShape = output.shape();
593
594 if (input.type == OperandType::TENSOR_FLOAT32) {
595 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
596 padding_left, padding_right,
597 padding_top, padding_bottom,
598 stride_width, stride_height,
599 &outShape) &&
600 setInfoAndAllocateIfNeeded(&output, outShape) &&
601 convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
602 reinterpret_cast<const float*>(filter.buffer), filter.shape(),
603 reinterpret_cast<const float*>(bias.buffer), bias.shape(),
604 padding_left, padding_right,
605 padding_top, padding_bottom,
606 stride_width, stride_height, activation,
607 reinterpret_cast<float*>(output.buffer), outShape);
608 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
609 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
610 padding_left, padding_right,
611 padding_top, padding_bottom,
612 stride_width, stride_height,
613 &outShape) &&
614 setInfoAndAllocateIfNeeded(&output, outShape) &&
615 convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
616 input.shape(),
617 reinterpret_cast<const uint8_t*>(filter.buffer),
618 filter.shape(),
619 reinterpret_cast<const int32_t*>(bias.buffer),
620 bias.shape(),
621 padding_left, padding_right,
622 padding_top, padding_bottom,
623 stride_width, stride_height, activation,
624 reinterpret_cast<uint8_t*>(output.buffer),
625 outShape);
626 }
627 } break;
628 case OperationType::AVERAGE_POOL_2D: {
629 const size_t inCount = ins.size();
630 if ((inCount != 10 && inCount != 7) ||
631 !allParametersPresent(inCount, 1)) {
632 return ANEURALNETWORKS_BAD_DATA;
633 }
634 const RunTimeOperandInfo& input = mOperands[ins[0]];
635
636 int32_t padding_left, padding_right;
637 int32_t padding_top, padding_bottom;
638 int32_t stride_width, stride_height;
639 int32_t filter_width, filter_height;
640 int32_t activation;
641
642 if (inCount == 10) {
643 padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
644 padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
645 padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
646 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
647 stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
648 stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
649 filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
650 filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
651 activation = getScalarData<int32_t>(mOperands[ins[9]]);
652 } else {
653 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
654 stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
655 stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
656 filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
657 filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
658 activation = getScalarData<int32_t>(mOperands[ins[6]]);
659
660 Shape inputShape = input.shape();
661 int32_t input_width = getSizeOfDimension(inputShape, 2);
662 int32_t input_height = getSizeOfDimension(inputShape, 1);
663 calculateExplicitPadding(input_width, stride_width,
664 filter_width, padding_implicit,
665 &padding_left, &padding_right);
666 calculateExplicitPadding(input_height, stride_height,
667 filter_height, padding_implicit,
668 &padding_top, &padding_bottom);
669 }
670
671 RunTimeOperandInfo& output = mOperands[outs[0]];
672 Shape outShape = output.shape();
673
674 if (input.type == OperandType::TENSOR_FLOAT32) {
675 success = genericPoolingPrepare(input.shape(),
676 padding_left, padding_right,
677 padding_top, padding_bottom,
678 stride_width, stride_height,
679 filter_width, filter_height,
680 &outShape) &&
681 setInfoAndAllocateIfNeeded(&output, outShape) &&
682 averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
683 input.shape(),
684 padding_left, padding_right,
685 padding_top, padding_bottom,
686 stride_width, stride_height,
687 filter_width, filter_height, activation,
688 reinterpret_cast<float*>(output.buffer),
689 outShape);
690 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
691 success = genericPoolingPrepare(input.shape(),
692 padding_left, padding_right,
693 padding_top, padding_bottom,
694 stride_width, stride_height,
695 filter_width, filter_height,
696 &outShape) &&
697 setInfoAndAllocateIfNeeded(&output, outShape) &&
698 averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
699 input.shape(),
700 padding_left, padding_right,
701 padding_top, padding_bottom,
702 stride_width, stride_height,
703 filter_width, filter_height, activation,
704 reinterpret_cast<uint8_t*>(output.buffer),
705 outShape);
706 }
707 } break;
708 case OperationType::L2_POOL_2D: {
709 const size_t inCount = ins.size();
710 if ((inCount != 10 && inCount != 7) ||
711 !allParametersPresent(inCount, 1)) {
712 return ANEURALNETWORKS_BAD_DATA;
713 }
714 const RunTimeOperandInfo& input = mOperands[ins[0]];
715
716 int32_t padding_left, padding_right;
717 int32_t padding_top, padding_bottom;
718 int32_t stride_width, stride_height;
719 int32_t filter_width, filter_height;
720 int32_t activation;
721
722 if (inCount == 10) {
723 padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
724 padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
725 padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
726 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
727 stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
728 stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
729 filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
730 filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
731 activation = getScalarData<int32_t>(mOperands[ins[9]]);
732 } else {
733 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
734 stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
735 stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
736 filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
737 filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
738 activation = getScalarData<int32_t>(mOperands[ins[6]]);
739
740 Shape inputShape = input.shape();
741 int32_t input_width = getSizeOfDimension(inputShape, 2);
742 int32_t input_height = getSizeOfDimension(inputShape, 1);
743 calculateExplicitPadding(input_width, stride_width,
744 filter_width, padding_implicit,
745 &padding_left, &padding_right);
746 calculateExplicitPadding(input_height, stride_height,
747 filter_height, padding_implicit,
748 &padding_top, &padding_bottom);
749 }
750
751 RunTimeOperandInfo& output = mOperands[outs[0]];
752 Shape outShape = output.shape();
753
754 if (input.type == OperandType::TENSOR_FLOAT32) {
755 success = genericPoolingPrepare(input.shape(),
756 padding_left, padding_right,
757 padding_top, padding_bottom,
758 stride_width, stride_height,
759 filter_width, filter_height,
760 &outShape) &&
761 setInfoAndAllocateIfNeeded(&output, outShape) &&
762 l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
763 input.shape(),
764 padding_left, padding_right,
765 padding_top, padding_bottom,
766 stride_width, stride_height,
767 filter_width, filter_height, activation,
768 reinterpret_cast<float*>(output.buffer),
769 outShape);
770 }
771 } break;
772 case OperationType::MAX_POOL_2D: {
773 const size_t inCount = ins.size();
774 if ((inCount != 10 && inCount != 7) ||
775 !allParametersPresent(inCount, 1)) {
776 return ANEURALNETWORKS_BAD_DATA;
777 }
778 const RunTimeOperandInfo& input = mOperands[ins[0]];
779
780 int32_t padding_left, padding_right;
781 int32_t padding_top, padding_bottom;
782 int32_t stride_width, stride_height;
783 int32_t filter_width, filter_height;
784 int32_t activation;
785
786 if (inCount == 10) {
787 padding_left = getScalarData<int32_t>(mOperands[ins[1]]);
788 padding_right = getScalarData<int32_t>(mOperands[ins[2]]);
789 padding_top = getScalarData<int32_t>(mOperands[ins[3]]);
790 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]);
791 stride_width = getScalarData<int32_t>(mOperands[ins[5]]);
792 stride_height = getScalarData<int32_t>(mOperands[ins[6]]);
793 filter_width = getScalarData<int32_t>(mOperands[ins[7]]);
794 filter_height = getScalarData<int32_t>(mOperands[ins[8]]);
795 activation = getScalarData<int32_t>(mOperands[ins[9]]);
796 } else {
797 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
798 stride_width = getScalarData<int32_t>(mOperands[ins[2]]);
799 stride_height = getScalarData<int32_t>(mOperands[ins[3]]);
800 filter_width = getScalarData<int32_t>(mOperands[ins[4]]);
801 filter_height = getScalarData<int32_t>(mOperands[ins[5]]);
802 activation = getScalarData<int32_t>(mOperands[ins[6]]);
803
804 Shape inputShape = input.shape();
805 int32_t input_width = getSizeOfDimension(inputShape, 2);
806 int32_t input_height = getSizeOfDimension(inputShape, 1);
807 calculateExplicitPadding(input_width, stride_width,
808 filter_width, padding_implicit,
809 &padding_left, &padding_right);
810 calculateExplicitPadding(input_height, stride_height,
811 filter_height, padding_implicit,
812 &padding_top, &padding_bottom);
813 }
814
815 RunTimeOperandInfo& output = mOperands[outs[0]];
816 Shape outShape = output.shape();
817
818 if (input.type == OperandType::TENSOR_FLOAT32) {
819 success = genericPoolingPrepare(input.shape(),
820 padding_left, padding_right,
821 padding_top, padding_bottom,
822 stride_width, stride_height,
823 filter_width, filter_height,
824 &outShape) &&
825 setInfoAndAllocateIfNeeded(&output, outShape) &&
826 maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
827 input.shape(),
828 padding_left, padding_right,
829 padding_top, padding_bottom,
830 stride_width, stride_height,
831 filter_width, filter_height, activation,
832 reinterpret_cast<float*>(output.buffer),
833 outShape);
834 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
835 success = genericPoolingPrepare(input.shape(),
836 padding_left, padding_right,
837 padding_top, padding_bottom,
838 stride_width, stride_height,
839 filter_width, filter_height,
840 &outShape) &&
841 setInfoAndAllocateIfNeeded(&output, outShape) &&
842 maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
843 input.shape(),
844 padding_left, padding_right,
845 padding_top, padding_bottom,
846 stride_width, stride_height,
847 filter_width, filter_height, activation,
848 reinterpret_cast<uint8_t*>(output.buffer),
849 outShape);
850 }
851
852 } break;
853 case OperationType::RELU: {
854 if (!allParametersPresent(1, 1)) {
855 return ANEURALNETWORKS_BAD_DATA;
856 }
857 const RunTimeOperandInfo& input = mOperands[ins[0]];
858 RunTimeOperandInfo& output = mOperands[outs[0]];
859 Shape outShape = output.shape();
860
861 if (input.type == OperandType::TENSOR_FLOAT32) {
862 success = genericActivationPrepare(input.shape(), &outShape) &&
863 setInfoAndAllocateIfNeeded(&output, outShape) &&
864 reluFloat32(reinterpret_cast<const float*>(input.buffer),
865 input.shape(),
866 reinterpret_cast<float*>(output.buffer),
867 outShape);
868 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
869 success = genericActivationPrepare(input.shape(), &outShape) &&
870 setInfoAndAllocateIfNeeded(&output, outShape) &&
871 reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
872 input.shape(),
873 reinterpret_cast<uint8_t*>(output.buffer),
874 outShape);
875 }
876 } break;
877 case OperationType::RELU1: {
878 if (!allParametersPresent(1, 1)) {
879 return ANEURALNETWORKS_BAD_DATA;
880 }
881 const RunTimeOperandInfo& input = mOperands[ins[0]];
882 RunTimeOperandInfo& output = mOperands[outs[0]];
883 Shape outShape = output.shape();
884
885 if (input.type == OperandType::TENSOR_FLOAT32) {
886 success = genericActivationPrepare(input.shape(), &outShape) &&
887 setInfoAndAllocateIfNeeded(&output, outShape) &&
888 relu1Float32(reinterpret_cast<const float*>(input.buffer),
889 input.shape(),
890 reinterpret_cast<float*>(output.buffer),
891 outShape);
892 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
893 success = genericActivationPrepare(input.shape(), &outShape) &&
894 setInfoAndAllocateIfNeeded(&output, outShape) &&
895 relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
896 input.shape(),
897 reinterpret_cast<uint8_t*>(output.buffer),
898 outShape);
899 }
900 } break;
901 case OperationType::RELU6: {
902 if (!allParametersPresent(1, 1)) {
903 return ANEURALNETWORKS_BAD_DATA;
904 }
905 const RunTimeOperandInfo& input = mOperands[ins[0]];
906 RunTimeOperandInfo& output = mOperands[outs[0]];
907 Shape outShape = output.shape();
908
909 if (input.type == OperandType::TENSOR_FLOAT32) {
910 success = genericActivationPrepare(input.shape(), &outShape) &&
911 setInfoAndAllocateIfNeeded(&output, outShape) &&
912 relu6Float32(reinterpret_cast<const float*>(input.buffer),
913 input.shape(),
914 reinterpret_cast<float*>(output.buffer),
915 outShape);
916 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
917 success = genericActivationPrepare(input.shape(), &outShape) &&
918 setInfoAndAllocateIfNeeded(&output, outShape) &&
919 relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
920 input.shape(),
921 reinterpret_cast<uint8_t*>(output.buffer),
922 outShape);
923 }
924 } break;
925 case OperationType::TANH: {
926 if (!allParametersPresent(1, 1)) {
927 return ANEURALNETWORKS_BAD_DATA;
928 }
929 const RunTimeOperandInfo& input = mOperands[ins[0]];
930 RunTimeOperandInfo& output = mOperands[outs[0]];
931 Shape outShape = output.shape();
932
933 if (input.type == OperandType::TENSOR_FLOAT32) {
934 success = genericActivationPrepare(input.shape(), &outShape) &&
935 setInfoAndAllocateIfNeeded(&output, outShape) &&
936 tanhFloat32(reinterpret_cast<const float*>(input.buffer),
937 input.shape(),
938 reinterpret_cast<float*>(output.buffer),
939 outShape);
940 }
941 } break;
942 case OperationType::LOGISTIC: {
943 if (!allParametersPresent(1, 1)) {
944 return ANEURALNETWORKS_BAD_DATA;
945 }
946 const RunTimeOperandInfo& input = mOperands[ins[0]];
947 RunTimeOperandInfo& output = mOperands[outs[0]];
948 Shape outShape = output.shape();
949
950 if (input.type == OperandType::TENSOR_FLOAT32) {
951 success = genericActivationPrepare(input.shape(), &outShape) &&
952 setInfoAndAllocateIfNeeded(&output, outShape) &&
953 logisticFloat32(reinterpret_cast<const float*>(input.buffer),
954 input.shape(),
955 reinterpret_cast<float*>(output.buffer),
956 outShape);
957 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
958 success = genericActivationPrepare(input.shape(), &outShape) &&
959 setInfoAndAllocateIfNeeded(&output, outShape) &&
960 logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
961 input.shape(),
962 reinterpret_cast<uint8_t*>(output.buffer),
963 outShape);
964 }
965 } break;
966 case OperationType::SOFTMAX: {
967 if (!allParametersPresent(2, 1)) {
968 return ANEURALNETWORKS_BAD_DATA;
969 }
970 RunTimeOperandInfo& input = mOperands[ins[0]];
971 float beta = getScalarData<float>(mOperands[ins[1]]);
972 if (beta <= 0.0f) {
973 LOG(ERROR) << "beta must be positive for softmax";
974 return ANEURALNETWORKS_BAD_DATA;
975 }
976
977 RunTimeOperandInfo& output = mOperands[outs[0]];
978 Shape outShape = output.shape();
979
980 if (input.type == OperandType::TENSOR_FLOAT32) {
981 success = genericActivationPrepare(input.shape(), &outShape) &&
982 setInfoAndAllocateIfNeeded(&output, outShape) &&
983 softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
984 input.shape(),
985 beta,
986 reinterpret_cast<float*>(output.buffer),
987 output.shape());
988 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
989 success = genericActivationPrepare(input.shape(), &outShape) &&
990 setInfoAndAllocateIfNeeded(&output, outShape) &&
991 softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
992 input.shape(),
993 beta,
994 reinterpret_cast<uint8_t*>(output.buffer),
995 output.shape());
996 }
997 } break;
998 case OperationType::FULLY_CONNECTED: {
999 if (!allParametersPresent(4, 1)) {
1000 return ANEURALNETWORKS_BAD_DATA;
1001 }
1002 RunTimeOperandInfo& input = mOperands[ins[0]];
1003 RunTimeOperandInfo& weights = mOperands[ins[1]];
1004 RunTimeOperandInfo& bias = mOperands[ins[2]];
1005
1006 int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
1007
1008 RunTimeOperandInfo& output = mOperands[outs[0]];
1009 Shape outShape = output.shape();
1010
1011 if (input.type == OperandType::TENSOR_FLOAT32) {
1012 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
1013 &outShape) &&
1014 setInfoAndAllocateIfNeeded(&output, outShape) &&
1015 fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
1016 input.shape(),
1017 reinterpret_cast<const float*>(weights.buffer),
1018 weights.shape(),
1019 reinterpret_cast<const float*>(bias.buffer),
1020 bias.shape(),
1021 activation,
1022 reinterpret_cast<float*>(output.buffer),
1023 outShape);
1024 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1025 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
1026 &outShape) &&
1027 setInfoAndAllocateIfNeeded(&output, outShape) &&
1028 fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1029 input.shape(),
1030 reinterpret_cast<const uint8_t*>(weights.buffer),
1031 weights.shape(),
1032 reinterpret_cast<const int32_t*>(bias.buffer),
1033 bias.shape(),
1034 activation,
1035 reinterpret_cast<uint8_t*>(output.buffer),
1036 outShape);
1037 }
1038 } break;
1039 case OperationType::CONCATENATION: {
1040 if (outs.size() != 1 || ins.size() < 2) {
1041 return ANEURALNETWORKS_BAD_DATA;
1042 }
1043 int numInputTensors = ins.size() - 1;
1044 int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
1045
1046 RunTimeOperandInfo& output = mOperands[outs[0]];
1047 Shape outShape = output.shape();
1048
1049 const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
1050 if (firstInput.type == OperandType::TENSOR_FLOAT32) {
1051 std::vector<Shape> inputShapes(numInputTensors);
1052 std::vector<const float*> inputDataPtrs(numInputTensors);
1053
1054 for (int i=0; i<numInputTensors; i++) {
1055 RunTimeOperandInfo& input = mOperands[ins[i]];
1056 inputShapes[i] = input.shape();
1057 inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
1058 }
1059 success = concatenationPrepare(inputShapes, axis, &outShape) &&
1060 setInfoAndAllocateIfNeeded(&output, outShape) &&
1061 concatenationFloat32(inputDataPtrs, inputShapes, axis,
1062 reinterpret_cast<float*>(output.buffer), outShape);
1063 } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
1064 std::vector<Shape> inputShapes(numInputTensors);
1065 std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
1066
1067 for (int i=0; i<numInputTensors; i++) {
1068 RunTimeOperandInfo& input = mOperands[ins[i]];
1069 inputShapes[i] = input.shape();
1070 inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
1071 }
1072 success = concatenationPrepare(inputShapes, axis, &outShape) &&
1073 setInfoAndAllocateIfNeeded(&output, outShape) &&
1074 concatenationQuant8(inputDataPtrs, inputShapes, axis,
1075 reinterpret_cast<uint8_t*>(output.buffer),
1076 outShape);
1077 }
1078 } break;
1079 case OperationType::L2_NORMALIZATION: {
1080 if (!allParametersPresent(1, 1)) {
1081 return ANEURALNETWORKS_BAD_DATA;
1082 }
1083 const RunTimeOperandInfo& input = mOperands[ins[0]];
1084 RunTimeOperandInfo& output = mOperands[outs[0]];
1085 Shape outShape = output.shape();
1086
1087 if (input.type == OperandType::TENSOR_FLOAT32) {
1088 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1089 setInfoAndAllocateIfNeeded(&output, outShape) &&
1090 l2normFloat32(reinterpret_cast<const float*>(input.buffer),
1091 input.shape(),
1092 reinterpret_cast<float*>(output.buffer),
1093 outShape);
1094 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
1095 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1096 setInfoAndAllocateIfNeeded(&output, outShape) &&
1097 l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
1098 input.shape(),
1099 reinterpret_cast<uint8_t*>(output.buffer),
1100 outShape);
1101 }
1102 } break;
1103 case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
1104 if (!allParametersPresent(5, 1)) {
1105 return ANEURALNETWORKS_BAD_DATA;
1106 }
1107 const RunTimeOperandInfo& input = mOperands[ins[0]];
1108 int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
1109 float bias = getScalarData<float>(mOperands[ins[2]]);
1110 float alpha = getScalarData<float>(mOperands[ins[3]]);
1111 float beta = getScalarData<float>(mOperands[ins[4]]);
1112
1113 RunTimeOperandInfo& output = mOperands[outs[0]];
1114 Shape outShape = output.shape();
1115
1116 if (input.type == OperandType::TENSOR_FLOAT32) {
1117 success = genericNormalizationPrepare(input.shape(), &outShape) &&
1118 setInfoAndAllocateIfNeeded(&output, outShape) &&
1119 localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
1120 input.shape(),
1121 radius, bias, alpha, beta,
1122 reinterpret_cast<float*>(output.buffer),
1123 outShape);
1124 }
1125 } break;
1126 case OperationType::RESHAPE: {
1127 if (!allParametersPresent(2, 1)) {
1128 return ANEURALNETWORKS_BAD_DATA;
1129 }
1130 const RunTimeOperandInfo& input = mOperands[ins[0]];
1131 const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
1132
1133 RunTimeOperandInfo& output = mOperands[outs[0]];
1134 Shape outShape = output.shape();
1135
1136 success = reshapePrepare(input.shape(),
1137 reinterpret_cast<const int32_t*>(targetShape.buffer),
1138 getNumberOfElements(targetShape.shape()),
1139 &outShape) &&
1140 setInfoAndAllocateIfNeeded(&output, outShape) &&
1141 reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
1142 input.shape(),
1143 reinterpret_cast<void*>(output.buffer),
1144 outShape);
1145 } break;
1146 case OperationType::RESIZE_BILINEAR: {
1147 if (!allParametersPresent(3, 1)) {
1148 return ANEURALNETWORKS_BAD_DATA;
1149 }
1150 const RunTimeOperandInfo& input = mOperands[ins[0]];
1151 int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
1152 int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
1153
1154 RunTimeOperandInfo& output = mOperands[outs[0]];
1155 Shape outShape = output.shape();
1156
1157 if (input.type == OperandType::TENSOR_FLOAT32) {
1158 success = resizeBilinearPrepare(input.shape(),
1159 width, height,
1160 &outShape) &&
1161 setInfoAndAllocateIfNeeded(&output, outShape) &&
1162 resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
1163 input.shape(),
1164 reinterpret_cast<float*>(output.buffer),
1165 outShape);
1166 }
1167 } break;
1168 case OperationType::DEPTH_TO_SPACE: {
1169 if (!allParametersPresent(2, 1)) {
1170 return ANEURALNETWORKS_BAD_DATA;
1171 }
1172 const RunTimeOperandInfo& input = mOperands[ins[0]];
1173 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1174
1175 RunTimeOperandInfo& output = mOperands[outs[0]];
1176 Shape outShape = output.shape();
1177
1178 success = depthToSpacePrepare(input.shape(),
1179 blockSize,
1180 &outShape) &&
1181 setInfoAndAllocateIfNeeded(&output, outShape) &&
1182 depthToSpaceGeneric(input.buffer,
1183 input.shape(),
1184 blockSize,
1185 output.buffer,
1186 outShape);
1187 } break;
1188 case OperationType::SPACE_TO_DEPTH: {
1189 if (!allParametersPresent(2, 1)) {
1190 return ANEURALNETWORKS_BAD_DATA;
1191 }
1192 const RunTimeOperandInfo& input = mOperands[ins[0]];
1193 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
1194
1195 RunTimeOperandInfo& output = mOperands[outs[0]];
1196 Shape outShape = output.shape();
1197
1198 success = spaceToDepthPrepare(input.shape(),
1199 blockSize,
1200 &outShape) &&
1201 setInfoAndAllocateIfNeeded(&output, outShape) &&
1202 spaceToDepthGeneric(input.buffer,
1203 input.shape(),
1204 blockSize,
1205 output.buffer,
1206 outShape);
1207 } break;
1208 case OperationType::EMBEDDING_LOOKUP: {
1209 const RunTimeOperandInfo &values =
1210 mOperands[ins[EmbeddingLookup::kValueTensor]];
1211 const RunTimeOperandInfo &lookups =
1212 mOperands[ins[EmbeddingLookup::kLookupTensor]];
1213 RunTimeOperandInfo &output =
1214 mOperands[outs[EmbeddingLookup::kOutputTensor]];
1215
1216 Shape outputShape;
1217 EmbeddingLookup lookup(operation, mOperands);
1218
1219 success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
1220 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1221 lookup.Eval();
1222 } break;
1223 case OperationType::HASHTABLE_LOOKUP: {
1224 const RunTimeOperandInfo &lookups =
1225 mOperands[ins[HashtableLookup::kLookupTensor]];
1226 const RunTimeOperandInfo &keys =
1227 mOperands[ins[HashtableLookup::kKeyTensor]];
1228 const RunTimeOperandInfo &values =
1229 mOperands[ins[HashtableLookup::kValueTensor]];
1230
1231 RunTimeOperandInfo &output =
1232 mOperands[outs[HashtableLookup::kOutputTensor]];
1233 RunTimeOperandInfo &hits =
1234 mOperands[outs[HashtableLookup::kHitsTensor]];
1235
1236 Shape outputShape, hitShape;
1237 HashtableLookup lookup(operation, mOperands);
1238
1239 success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
1240 &outputShape, &hitShape) &&
1241 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1242 setInfoAndAllocateIfNeeded(&hits, hitShape) &&
1243 lookup.Eval();
1244 } break;
1245 case OperationType::LSH_PROJECTION: {
1246 RunTimeOperandInfo &output =
1247 mOperands[outs[LSHProjection::kOutputTensor]];
1248
1249 Shape outputShape;
1250 LSHProjection lsh(operation, mOperands);
1251
1252 success = LSHProjection::Prepare(operation, mOperands,
1253 &outputShape) &&
1254 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1255 lsh.Eval();
1256 } break;
1257 case OperationType::LSTM: {
1258 RunTimeOperandInfo &scratch =
1259 mOperands[outs[LSTMCell::kScratchBufferTensor]];
1260 RunTimeOperandInfo &outputStateOut =
1261 mOperands[outs[LSTMCell::kOutputStateOutTensor]];
1262 RunTimeOperandInfo &cellStateOut =
1263 mOperands[outs[LSTMCell::kCellStateOutTensor]];
1264 RunTimeOperandInfo &output =
1265 mOperands[outs[LSTMCell::kOutputTensor]];
1266
1267 Shape scratchShape, outputStateShape, cellStateShape, outputShape;
1268 LSTMCell lstm_cell(operation, mOperands);
1269
1270 success = LSTMCell::Prepare(operation, mOperands,
1271 &scratchShape, &outputStateShape,
1272 &cellStateShape, &outputShape) &&
1273 setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
1274 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
1275 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
1276 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1277 lstm_cell.Eval();
1278 } break;
1279 case OperationType::RNN: {
1280 RunTimeOperandInfo &hiddenStateOut =
1281 mOperands[outs[RNN::kHiddenStateOutTensor]];
1282 RunTimeOperandInfo &output =
1283 mOperands[outs[RNN::kOutputTensor]];
1284
1285 Shape hiddenStateShape, outputShape;
1286 RNN rnn_cell(operation, mOperands);
1287
1288 success = RNN::Prepare(operation, mOperands,
1289 &hiddenStateShape, &outputShape) &&
1290 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
1291 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1292 rnn_cell.Eval();
1293 } break;
1294 case OperationType::SVDF: {
1295 RunTimeOperandInfo &stateOut =
1296 mOperands[outs[SVDF::kStateOutTensor]];
1297 RunTimeOperandInfo &output =
1298 mOperands[outs[SVDF::kOutputTensor]];
1299
1300 Shape stateShape, outputShape;
1301 SVDF svdf(operation, mOperands);
1302
1303 success = SVDF::Prepare(operation, mOperands,
1304 &stateShape, &outputShape) &&
1305 setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
1306 setInfoAndAllocateIfNeeded(&output, outputShape) &&
1307 svdf.Eval();
1308 } break;
1309 case OperationType::BATCH_TO_SPACE_ND: {
1310 if (!allParametersPresent(2, 1)) {
1311 return ANEURALNETWORKS_BAD_DATA;
1312 }
1313 const RunTimeOperandInfo& input = mOperands[ins[0]];
1314 const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1315
1316 RunTimeOperandInfo& output = mOperands[outs[0]];
1317 Shape outShape = output.shape();
1318
1319 success = batchToSpacePrepare(input.shape(),
1320 reinterpret_cast<const int32_t*>(blockSize.buffer),
1321 blockSize.shape(),
1322 &outShape) &&
1323 setInfoAndAllocateIfNeeded(&output, outShape) &&
1324 batchToSpaceGeneric(input.buffer,
1325 input.shape(),
1326 reinterpret_cast<const int32_t*>(blockSize.buffer),
1327 output.buffer,
1328 outShape);
1329 } break;
1330 case OperationType::SPACE_TO_BATCH_ND: {
1331 if (!allParametersPresent(3, 1)) {
1332 return ANEURALNETWORKS_BAD_DATA;
1333 }
1334 const RunTimeOperandInfo& input = mOperands[ins[0]];
1335 const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
1336 const RunTimeOperandInfo& paddings = mOperands[ins[2]];
1337
1338 RunTimeOperandInfo& output = mOperands[outs[0]];
1339 Shape outShape = output.shape();
1340
1341 success = spaceToBatchPrepare(input.shape(),
1342 reinterpret_cast<const int32_t*>(blockSize.buffer),
1343 blockSize.shape(),
1344 reinterpret_cast<const int32_t*>(paddings.buffer),
1345 paddings.shape(),
1346 &outShape) &&
1347 setInfoAndAllocateIfNeeded(&output, outShape) &&
1348 spaceToBatchGeneric(input.buffer,
1349 input.shape(),
1350 reinterpret_cast<const int32_t*>(blockSize.buffer),
1351 reinterpret_cast<const int32_t*>(paddings.buffer),
1352 paddings.shape(),
1353 output.buffer,
1354 outShape);
1355 } break;
1356 case OperationType::PAD: {
1357 if (!allParametersPresent(2, 1)) {
1358 return ANEURALNETWORKS_BAD_DATA;
1359 }
1360 const RunTimeOperandInfo& input = mOperands[ins[0]];
1361 const RunTimeOperandInfo& paddings = mOperands[ins[1]];
1362
1363 RunTimeOperandInfo& output = mOperands[outs[0]];
1364 Shape outShape = output.shape();
1365
1366 success = padPrepare(input.shape(),
1367 reinterpret_cast<const int32_t*>(paddings.buffer),
1368 paddings.shape(),
1369 &outShape) &&
1370 setInfoAndAllocateIfNeeded(&output, outShape) &&
1371 padGeneric(input.buffer,
1372 input.shape(),
1373 reinterpret_cast<const int32_t*>(paddings.buffer),
1374 output.buffer,
1375 outShape);
1376 } break;
1377 case OperationType::SQUEEZE: {
1378 if (!allParametersPresent(2, 1)) {
1379 return ANEURALNETWORKS_BAD_DATA;
1380 }
1381 const RunTimeOperandInfo& input = mOperands[ins[0]];
1382 const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]];
1383
1384 RunTimeOperandInfo& output = mOperands[outs[0]];
1385 Shape outShape = output.shape();
1386
1387 success = squeezePrepare(input.shape(),
1388 reinterpret_cast<const int32_t*>(squeezeDims.buffer),
1389 squeezeDims.shape(),
1390 &outShape) &&
1391 setInfoAndAllocateIfNeeded(&output, outShape) &&
1392 squeezeGeneric(input.buffer,
1393 input.shape(),
1394 output.buffer,
1395 outShape);
1396 } break;
1397 case OperationType::TRANSPOSE: {
1398 if (!allParametersPresent(2, 1)) {
1399 return ANEURALNETWORKS_BAD_DATA;
1400 }
1401 const RunTimeOperandInfo& input = mOperands[ins[0]];
1402 const RunTimeOperandInfo& perms = mOperands[ins[1]];
1403
1404 RunTimeOperandInfo& output = mOperands[outs[0]];
1405 Shape outShape = output.shape();
1406
1407 success = transposePrepare(input.shape(),
1408 reinterpret_cast<const int32_t*>(perms.buffer),
1409 perms.shape(),
1410 &outShape) &&
1411 setInfoAndAllocateIfNeeded(&output, outShape) &&
1412 transposeGeneric(input.buffer,
1413 input.shape(),
1414 reinterpret_cast<const int32_t*>(perms.buffer),
1415 perms.shape(),
1416 output.buffer,
1417 outShape);
1418 } break;
1419 case OperationType::STRIDED_SLICE: {
1420 if (!allParametersPresent(7, 1)) {
1421 return ANEURALNETWORKS_BAD_DATA;
1422 }
1423 const RunTimeOperandInfo& input = mOperands[ins[0]];
1424 const RunTimeOperandInfo& begins = mOperands[ins[1]];
1425 const RunTimeOperandInfo& ends = mOperands[ins[2]];
1426 const RunTimeOperandInfo& strides = mOperands[ins[3]];
1427 int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]);
1428 int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]);
1429 int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]);
1430
1431 RunTimeOperandInfo& output = mOperands[outs[0]];
1432 Shape outShape = output.shape();
1433
1434 success = stridedSlicePrepare(input.shape(),
1435 reinterpret_cast<const int32_t*>(begins.buffer),
1436 begins.shape(),
1437 reinterpret_cast<const int32_t*>(ends.buffer),
1438 ends.shape(),
1439 reinterpret_cast<const int32_t*>(strides.buffer),
1440 strides.shape(),
1441 beginMask, endMask, shrinkAxisMask,
1442 &outShape) &&
1443 setInfoAndAllocateIfNeeded(&output, outShape) &&
1444 stridedSliceGeneric(input.buffer,
1445 input.shape(),
1446 reinterpret_cast<const int32_t*>(begins.buffer),
1447 reinterpret_cast<const int32_t*>(ends.buffer),
1448 reinterpret_cast<const int32_t*>(strides.buffer),
1449 beginMask, endMask, shrinkAxisMask,
1450 output.buffer,
1451 outShape);
1452 } break;
1453 case OperationType::DIV: {
1454 if (!allParametersPresent(3, 1)) {
1455 return ANEURALNETWORKS_BAD_DATA;
1456 }
1457 const RunTimeOperandInfo& in1 = mOperands[ins[0]];
1458 const RunTimeOperandInfo& in2 = mOperands[ins[1]];
1459 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
1460
1461 RunTimeOperandInfo& out = mOperands[outs[0]];
1462 Shape outShape = out.shape();
1463
1464 if (in1.type == OperandType::TENSOR_FLOAT32) {
1465 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
1466 setInfoAndAllocateIfNeeded(&out, outShape) &&
1467 divFloat32(reinterpret_cast<const float*>(in1.buffer),
1468 in1.shape(),
1469 reinterpret_cast<const float*>(in2.buffer),
1470 in2.shape(),
1471 activation,
1472 reinterpret_cast<float*>(out.buffer),
1473 outShape);
1474 }
1475 } break;
1476 case OperationType::SUB: {
1477 if (!allParametersPresent(3, 1)) {
1478 return ANEURALNETWORKS_BAD_DATA;
1479 }
1480 const RunTimeOperandInfo& in1 = mOperands[ins[0]];
1481 const RunTimeOperandInfo& in2 = mOperands[ins[1]];
1482 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
1483
1484 RunTimeOperandInfo& out = mOperands[outs[0]];
1485 Shape outShape = out.shape();
1486
1487 if (in1.type == OperandType::TENSOR_FLOAT32) {
1488 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
1489 setInfoAndAllocateIfNeeded(&out, outShape) &&
1490 subFloat32(reinterpret_cast<const float*>(in1.buffer),
1491 in1.shape(),
1492 reinterpret_cast<const float*>(in2.buffer),
1493 in2.shape(),
1494 activation,
1495 reinterpret_cast<float*>(out.buffer),
1496 outShape);
1497 }
1498 } break;
1499 case OperationType::MEAN: {
1500 if (!allParametersPresent(3, 1)) {
1501 return ANEURALNETWORKS_BAD_DATA;
1502 }
1503 const RunTimeOperandInfo& input = mOperands[ins[0]];
1504 const RunTimeOperandInfo& axis = mOperands[ins[1]];
1505 int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]);
1506
1507 RunTimeOperandInfo& output = mOperands[outs[0]];
1508 Shape outShape = output.shape();
1509
1510 success = meanPrepare(input.shape(),
1511 reinterpret_cast<const int32_t*>(axis.buffer),
1512 axis.shape(),
1513 keepDims > 0,
1514 &outShape) &&
1515 setInfoAndAllocateIfNeeded(&output, outShape) &&
1516 meanGeneric(input.buffer,
1517 input.shape(),
1518 reinterpret_cast<const int32_t*>(axis.buffer),
1519 axis.shape(),
1520 keepDims > 0,
1521 output.buffer,
1522 outShape);
1523 } break;
1524 default:
1525 nnAssert(false);
1526 break;
1527 }
1528 if (!success) {
1529 LOG(ERROR) << getOperationName(operation.type) << " failed.";
1530 return ANEURALNETWORKS_OP_FAILED;
1531 }
1532
1533 freeNoLongerUsedOperands(ins);
1534 return ANEURALNETWORKS_NO_ERROR;
1535 }
1536
ScopedOpenmpSettings()1537 ScopedOpenmpSettings::ScopedOpenmpSettings() {
1538 mBlocktimeInitial = kmp_get_blocktime();
1539 kmp_set_blocktime(20); // ms, see b/109645291
1540
1541 #if NNAPI_LIMIT_CPU_THREADS
1542 // Code not yet enabled. Choosing the number of threads to be based on
1543 // benchmarking. See longer comment by the class declaration.
1544 mMaxThreadsInitial = Eigen::nbThreads();
1545 const int nProcs = omp_get_num_procs();
1546 int threads = nProcs;
1547 if (nProcs >= 8) {
1548 threads = nProcs - 4;
1549 } else if (nProcs >= 4) {
1550 threads = nProcs - 2;
1551 }
1552 Eigen::setNbThreads(threads);
1553 #endif
1554 }
1555
~ScopedOpenmpSettings()1556 ScopedOpenmpSettings::~ScopedOpenmpSettings() {
1557 kmp_set_blocktime(mBlocktimeInitial);
1558 #if NNAPI_LIMIT_CPU_THREADS
1559 Eigen::setNbThreads(mMaxThreadsInitial);
1560 #endif
1561 }
1562
1563
1564 } // namespace nn
1565 } // namespace android
1566