/* * Copyright (C) 2017 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #define LOG_TAG "OperationsUtils" #include "OperationsExecutionUtils.h" #include #include #include #include #include #include #include "ActivationFunctor.h" #include "nnapi/Validation.h" namespace android { namespace nn { namespace { void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin, int32_t qmax, int32_t* act_min, int32_t* act_max) { const auto scale = outputShape.scale; const auto zero_point = outputShape.offset; auto quantize = [scale, zero_point](float f) { return zero_point + static_cast(std::round(f / scale)); }; if (activation == kActivationRelu) { *act_min = std::max(qmin, quantize(0.0)); *act_max = qmax; } else if (activation == kActivationRelu6) { *act_min = std::max(qmin, quantize(0.0)); *act_max = std::min(qmax, quantize(6.0)); } else if (activation == kActivationRelu1) { *act_min = std::max(qmin, quantize(-1.0)); *act_max = std::min(qmax, quantize(1.0)); } else if (activation == kActivationNone) { *act_min = qmin; *act_max = qmax; } else { LOG(ERROR) << "Unsupported fused activation function."; } } } // namespace bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) { NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions); if (*axis < 0) { *axis += numberOfDimensions; } return true; } bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) { if (double_multiplier == 0.) { *quantized_multiplier = 0; *shift = 0; return true; } const double q = std::frexp(double_multiplier, shift); auto q_fixed = static_cast(std::round(q * (1LL << 31))); NN_RET_CHECK(q_fixed <= (1LL << 31)); if (q_fixed == (1LL << 31)) { q_fixed /= 2; ++*shift; } NN_RET_CHECK_LE(q_fixed, std::numeric_limits::max()); // A shift amount smaller than -31 would cause all bits to be shifted out // and thus all results would be zero. We implement that instead with // q_fixed==0, so as to avoid hitting issues with right-shift // operations with shift amounts greater than 31. Note that this happens // roughly when abs(double_multiplier) < 2^-31 and the present handling means // that we're effectively flushing tiny double_multiplier's to zero. // We could conceivably handle values in the range (roughly) [32, 63] // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view // the present handling is just doing 'flush denormals to zero'. We could // reconsider and actually generate nonzero denormals if a need arises. if (*shift < -31) { *shift = 0; q_fixed = 0; } *quantized_multiplier = static_cast(q_fixed); return true; } bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier, int32_t* left_shift) { NN_RET_CHECK(double_multiplier > 0.); NN_RET_CHECK(double_multiplier < 1.); NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift)); NN_RET_CHECK(*left_shift <= 0); return true; } bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier, int32_t* right_shift) { NN_OPS_CHECK(double_multiplier >= 0.); NN_OPS_CHECK(double_multiplier < 1.); if (double_multiplier == 0.) { *quantized_multiplier = 0; *right_shift = 0; return true; } NN_OPS_CHECK(double_multiplier > 0.); const double q = std::frexp(double_multiplier, right_shift); *right_shift *= -1; int64_t q_fixed = static_cast(std::round(q * (1LL << 31))); NN_OPS_CHECK(q_fixed <= (1LL << 31)); if (q_fixed == (1LL << 31)) { q_fixed /= 2; --*right_shift; } NN_OPS_CHECK(*right_shift >= 0); NN_OPS_CHECK(q_fixed <= std::numeric_limits::max()); *quantized_multiplier = static_cast(q_fixed); return true; } bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier, int* left_shift) { NN_OPS_CHECK(double_multiplier > 1.); const double q = std::frexp(double_multiplier, left_shift); int64_t q_fixed = static_cast(std::round(q * (1LL << 31))); NN_OPS_CHECK(q_fixed <= (1LL << 31)); if (q_fixed == (1LL << 31)) { q_fixed /= 2; ++*left_shift; } NN_OPS_CHECK(*left_shift >= 0); NN_OPS_CHECK(q_fixed <= std::numeric_limits::max()); *quantized_multiplier = static_cast(q_fixed); return true; } bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape, const Shape& biasShape, const Shape& outputShape, double* multiplier) { // Upcast bias and input_product to double const double input_product_scale = inputShape.scale * filterShape.scale; const double bias_scale = biasShape.scale; // The following conditions must be guaranteed by the training pipeline. NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <= 1e-6 * std::min(input_product_scale, bias_scale)); NN_OPS_CHECK(input_product_scale >= 0); *multiplier = input_product_scale / outputShape.scale; return true; } bool GetQuantizedConvolutionMultiplier(const Shape& inputShape, const Shape& filterShape, const Shape& outputShape, double* multiplier) { // Upcast input_product to double const double input_product_scale = inputShape.scale * filterShape.scale; // The following conditions must be guaranteed by the training pipeline. NN_OPS_CHECK(input_product_scale >= 0); *multiplier = input_product_scale / outputShape.scale; return true; } void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min, int32_t* act_max) { const int32_t qmin = std::numeric_limits::min(); const int32_t qmax = std::numeric_limits::max(); CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max); } void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min, int32_t* act_max) { const int32_t qmin = std::numeric_limits::min(); const int32_t qmax = std::numeric_limits::max(); CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max); } void CalculateActivationRangeFloat(int32_t activation, float* activation_min, float* activation_max) { if (activation == kActivationRelu) { *activation_min = 0.f; *activation_max = std::numeric_limits::max(); } else if (activation == kActivationRelu6) { *activation_min = 0.f; *activation_max = 6.f; } else if (activation == kActivationRelu1) { *activation_min = -1.f; *activation_max = 1.f; } else if (activation == kActivationNone) { *activation_min = std::numeric_limits::lowest(); *activation_max = std::numeric_limits::max(); } else { LOG(ERROR) << "Unsupported fused activation function."; } } int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) { const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) * (1LL << (31 - input_integer_bits)) / (1LL << input_left_shift); // Tighten bound using floor. Suppose that we could use the exact value. // After scaling the difference, the result would be at the maximum. Thus we // must ensure that our value has lower magnitude. return static_cast(std::floor(max_input_rescaled)); } void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor, int32_t filter_size, int32_t padding_implicit, bool isTransposeConv, int32_t* padding_head, int32_t* padding_tail) { *padding_head = 0; *padding_tail = 0; int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1; if (padding_implicit == kPaddingSame) { int32_t out_size = (in_size + stride - 1) / stride; int32_t tmp = (out_size - 1) * stride + effective_filter_size; if (tmp > in_size) { *padding_head = (tmp - in_size) / 2; *padding_tail = (tmp - in_size) - *padding_head; } // For transpose conv, make padding tail fit tightly to the end of the last stride. if (isTransposeConv) { *padding_tail = (tmp - in_size) - *padding_head; } } } bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) { NN_RET_CHECK(in1.type == in2.type); uint32_t numberOfDims1 = getNumberOfDimensions(in1); uint32_t numberOfDims2 = getNumberOfDimensions(in2); uint32_t maxDims = std::max(numberOfDims1, numberOfDims2); out->dimensions = std::vector(maxDims); for (uint32_t i = 1; i <= maxDims; i++) { uint32_t dim1 = 1; if (i <= numberOfDims1) { dim1 = getSizeOfDimension(in1, numberOfDims1 - i); } uint32_t dim2 = 1; if (i <= numberOfDims2) { dim2 = getSizeOfDimension(in2, numberOfDims2 - i); } if (dim1 != dim2 && dim1 != 1 && dim2 != 1) { LOG(ERROR) << "Dimensions mismatch for broadcast:\n" << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1 << "\nSecond tensor: dimension " << numberOfDims2 - i << " of size " << dim2; return false; } out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1; } return true; } template <> uint8_t requantize(uint8_t value, const Shape& oldShape, const Shape& newShape) { double doubleValue = (value - oldShape.offset) * oldShape.scale; double doubleRet = doubleValue / newShape.scale + newShape.offset; if (doubleRet < 0) return 0; if (doubleRet > 255) return 255; return static_cast(std::round(doubleRet)); } template <> int8_t requantize(int8_t value, const Shape& oldShape, const Shape& newShape) { double doubleValue = (value - oldShape.offset) * oldShape.scale; double doubleRet = doubleValue / newShape.scale + newShape.offset; if (doubleRet < -128) return -128; if (doubleRet > 127) return 127; return static_cast(std::round(doubleRet)); } bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize, Shape* output) { // Reshape allows one of the targetDims components to have the // special -1 value, meaning it will be calculated automatically based on the // input. Here we calculate what that dimension should be so that the number // of output elements in the same as the number of input elements. int32_t numInputElements = (int32_t)getNumberOfElements(input); std::vector outDims(targetDimsSize); int32_t numOutputElements = 1; int32_t strechDim = -1; for (int32_t i = 0; i < targetDimsSize; ++i) { int32_t value = targetDims[i]; if (value == -1) { NN_OPS_CHECK(strechDim == -1); strechDim = i; } else { numOutputElements *= value; outDims[i] = (uint32_t)value; } } if (strechDim != -1) { int32_t strechValue = numInputElements / numOutputElements; outDims[strechDim] = (uint32_t)strechValue; numOutputElements *= strechValue; } NN_OPS_CHECK(numInputElements == numOutputElements); output->type = input.type; output->dimensions = outDims; output->offset = input.offset; output->scale = input.scale; return true; } bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) { NN_OPS_CHECK(getNumberOfDimensions(input) == 4); NN_OPS_CHECK(blockSize > 0); uint32_t batches = getSizeOfDimension(input, 0); uint32_t height = getSizeOfDimension(input, 1); uint32_t width = getSizeOfDimension(input, 2); uint32_t channels = getSizeOfDimension(input, 3); NN_OPS_CHECK(channels % (blockSize * blockSize) == 0); output->type = input.type; output->dimensions = {batches, height * blockSize, width * blockSize, channels / (blockSize * blockSize)}; output->offset = input.offset; output->scale = input.scale; return true; } bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) { NN_OPS_CHECK(getNumberOfDimensions(input) == 4); NN_OPS_CHECK(blockSize > 0); uint32_t batches = getSizeOfDimension(input, 0); uint32_t height = getSizeOfDimension(input, 1); uint32_t width = getSizeOfDimension(input, 2); uint32_t channels = getSizeOfDimension(input, 3); NN_OPS_CHECK(height % blockSize == 0); NN_OPS_CHECK(width % blockSize == 0); output->type = input.type; output->dimensions = {batches, height / blockSize, width / blockSize, channels * (blockSize * blockSize)}; output->offset = input.offset; output->scale = input.scale; return true; } bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) { NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2); NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1); const uint32_t columns = getSizeOfDimension(valueShape, 1); const uint32_t lookups = getSizeOfDimension(lookupShape, 0); outputShape->type = valueShape.type; outputShape->dimensions = {lookups, columns}; for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) { outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i)); } outputShape->offset = valueShape.offset; outputShape->scale = valueShape.scale; return true; } bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape, const Shape& valueShape, Shape* outputShape, Shape* hitShape) { NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1); NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1); NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1); const uint32_t lookups = getSizeOfDimension(lookupShape, 0); outputShape->type = valueShape.type; outputShape->dimensions = {lookups}; for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) { outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i)); } outputShape->offset = valueShape.offset; outputShape->scale = valueShape.scale; hitShape->type = OperandType::TENSOR_QUANT8_ASYMM; hitShape->dimensions = {lookups}; hitShape->offset = 0; hitShape->scale = 1.f; return true; } bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape, Shape* output) { uint32_t numInputDims = getNumberOfDimensions(input); // paddings need to be provided as a 2-D int32 tensor. NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32); NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2); NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims); NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2); std::vector outDims(numInputDims); for (uint32_t i = 0; i < numInputDims; ++i) { int32_t beforePadding = *paddingsData++; int32_t afterPadding = *paddingsData++; // Pad value has to be greater than equal to 0. NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0); outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding; } output->type = input.type; output->dimensions = outDims; output->offset = input.offset; output->scale = input.scale; return true; } bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData, const Shape& blockSizeShape, Shape* output) { // Only 4D NHWC tensors are supported. NN_OPS_CHECK(getNumberOfDimensions(input) == 4); // blockSize need to be provided as a 1-D int32 tensor. NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32); NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1); // Only applies to spatial dimensions. NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2); uint32_t batches = getSizeOfDimension(input, 0); uint32_t height = getSizeOfDimension(input, 1); uint32_t width = getSizeOfDimension(input, 2); uint32_t channels = getSizeOfDimension(input, 3); NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0); output->type = input.type; output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]), height * blockSizeData[0], width * blockSizeData[1], channels}; output->offset = input.offset; output->scale = input.scale; return true; } bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData, const Shape& blockSizeShape, const int32_t* paddingsData, const Shape& paddingsShape, Shape* output) { // Only 4D NHWC tensors are supported. NN_OPS_CHECK(getNumberOfDimensions(input) == 4); // blockSize need to be provided as a 1-D int32 tensor. NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32); NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1); // Only applies to spatial dimensions. NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2); // paddings need to be provided as a 2-D int32 tensor. NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32); NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2); NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2); NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2); uint32_t batches = getSizeOfDimension(input, 0); uint32_t height = getSizeOfDimension(input, 1); uint32_t width = getSizeOfDimension(input, 2); uint32_t channels = getSizeOfDimension(input, 3); uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1]; uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3]; NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0); NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0); output->type = input.type; output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]), paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1], channels}; output->offset = input.offset; output->scale = input.scale; return true; } bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims, Shape* output) { // perm need to be provided as a 1-D int32 tensor. NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32); NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1); int32_t numInputDims = static_cast(getNumberOfDimensions(input)); int32_t axisSize = static_cast(getSizeOfDimension(axisShape, 0)); // Determines size of output tensor. if (keepDims) { std::vector outDims(numInputDims); for (int32_t idx = 0; idx < numInputDims; ++idx) { bool isAxis = false; for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) { if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) { isAxis = true; break; } } if (isAxis) { outDims[idx] = 1; } else { outDims[idx] = getSizeOfDimension(input, idx); } } output->dimensions = outDims; } else { // Calculates size of reducing axis. int32_t numReduceAxis = axisSize; for (int32_t i = 0; i < axisSize; ++i) { int32_t current = axisData[i]; if (current < 0) { current += numInputDims; } NN_OPS_CHECK(current >= 0 && current < numInputDims); for (int32_t j = 0; j < i; ++j) { int32_t previous = axisData[j]; if (previous < 0) { previous += numInputDims; } if (current == previous) { --numReduceAxis; break; } } } // Determines output dimensions. std::vector outDims(numInputDims - numReduceAxis); int32_t numSkipAxis = 0; for (int32_t idx = 0; idx < numInputDims; ++idx) { bool isAxis = false; for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) { if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) { ++numSkipAxis; isAxis = true; break; } } if (!isAxis) { outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx); } } // Handle the case when all dimensions are removed if (outDims.empty()) { outDims.push_back(1); } output->dimensions = outDims; } output->type = input.type; output->offset = input.offset; output->scale = input.scale; return true; } bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) { NN_CHECK(handleNegativeAxis(input, &axis)); output->type = OperandType::TENSOR_INT32; // Copy the input dimensions, omitting the axis dimension. output->dimensions.clear(); if (getNumberOfDimensions(input) > 1) { output->dimensions.reserve(getNumberOfDimensions(input) - 1); output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(), input.dimensions.begin() + axis); output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1, input.dimensions.end()); } else { output->dimensions.push_back(1); } return true; } bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs, std::vector* output) { NN_CHECK(handleNegativeAxis(input, &axis)); const int32_t sizeOfAxisToSplit = input.dimensions[axis]; NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0); const int32_t sliceSize = sizeOfAxisToSplit / numOutputs; for (int i = 0; i < numOutputs; ++i) { output->at(i).type = input.type; output->at(i).dimensions = input.dimensions; output->at(i).dimensions[axis] = sliceSize; output->at(i).offset = input.offset; output->at(i).scale = input.scale; } return true; } bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias, int32_t padding_left, int32_t padding_right, int32_t padding_top, int32_t padding_bottom, int32_t stride_width, int32_t stride_height, int32_t numGroups, Shape* output) { if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) { NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM || input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED); } else { NN_OPS_CHECK(input.type == filter.type); } if (input.type == OperandType::TENSOR_QUANT8_ASYMM || input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) { NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32); } else { NN_OPS_CHECK(input.type == bias.type); } NN_OPS_CHECK(getNumberOfDimensions(input) == 4); NN_OPS_CHECK(getNumberOfDimensions(filter) == 4); NN_OPS_CHECK(getNumberOfDimensions(bias) == 1); NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0)); NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3)); NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0); uint32_t channels_out = getSizeOfDimension(filter, 0); uint32_t width = getSizeOfDimension(input, 2); uint32_t height = getSizeOfDimension(input, 1); uint32_t filterWidth = getSizeOfDimension(filter, 2); uint32_t filterHeight = getSizeOfDimension(filter, 1); uint32_t batches = getSizeOfDimension(input, 0); NN_RET_CHECK_GT(static_cast(filterWidth), padding_left); NN_RET_CHECK_GT(static_cast(filterWidth), padding_right); NN_RET_CHECK_GT(static_cast(filterHeight), padding_top); NN_RET_CHECK_GT(static_cast(filterHeight), padding_bottom); uint32_t outWidth = computeOutSize(width, filterWidth, stride_width, padding_left, padding_right); uint32_t outHeight = computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom); output->type = input.type; output->dimensions = {batches, outHeight, outWidth, channels_out}; return true; } } // namespace nn } // namespace android