1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #define LOG_TAG "OperationsUtils"
18 
19 #include "OperationsUtils.h"
20 
21 #include <algorithm>
22 #include <cmath>
23 #include <limits>
24 #include <sstream>
25 #include <vector>
26 
27 #include "Operations.h"
28 #include "Utils.h"
29 
30 namespace android {
31 namespace nn {
32 
33 namespace {
34 
35 using namespace hal;
36 
validateOperandTypes(const std::vector<OperandType> & expectedTypes,const char * tag,uint32_t operandCount,std::function<OperandType (uint32_t)> getOperandType)37 bool validateOperandTypes(const std::vector<OperandType>& expectedTypes, const char* tag,
38                           uint32_t operandCount,
39                           std::function<OperandType(uint32_t)> getOperandType) {
40     NN_RET_CHECK_EQ(operandCount, expectedTypes.size());
41     for (uint32_t i = 0; i < operandCount; ++i) {
42         OperandType type = getOperandType(i);
43         NN_RET_CHECK(type == expectedTypes[i])
44                 << "Invalid " << tag << " tensor type " << toString(type) << " for " << tag << " "
45                 << i << ", expected " << toString(expectedTypes[i]);
46     }
47     return true;
48 }
49 
CalculateActivationRangeImpl(int32_t activation,const Shape & outputShape,int32_t qmin,int32_t qmax,int32_t * act_min,int32_t * act_max)50 void CalculateActivationRangeImpl(int32_t activation, const Shape& outputShape, int32_t qmin,
51                                   int32_t qmax, int32_t* act_min, int32_t* act_max) {
52     const auto scale = outputShape.scale;
53     const auto zero_point = outputShape.offset;
54 
55     auto quantize = [scale, zero_point](float f) {
56         return zero_point + static_cast<int32_t>(std::round(f / scale));
57     };
58 
59     if (activation == kActivationRelu) {
60         *act_min = std::max(qmin, quantize(0.0));
61         *act_max = qmax;
62     } else if (activation == kActivationRelu6) {
63         *act_min = std::max(qmin, quantize(0.0));
64         *act_max = std::min(qmax, quantize(6.0));
65     } else if (activation == kActivationRelu1) {
66         *act_min = std::max(qmin, quantize(-1.0));
67         *act_max = std::min(qmax, quantize(1.0));
68     } else if (activation == kActivationNone) {
69         *act_min = qmin;
70         *act_max = qmax;
71     } else {
72         LOG(ERROR) << "Unsupported fused activation function.";
73     }
74 }
75 
76 }  // namespace
77 
validateInputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)78 bool validateInputTypes(const IOperationValidationContext* context,
79                         const std::vector<OperandType>& expectedTypes) {
80     return validateOperandTypes(expectedTypes, "input", context->getNumInputs(),
81                                 [context](uint32_t index) { return context->getInputType(index); });
82 }
83 
validateOutputTypes(const IOperationValidationContext * context,const std::vector<OperandType> & expectedTypes)84 bool validateOutputTypes(const IOperationValidationContext* context,
85                          const std::vector<OperandType>& expectedTypes) {
86     return validateOperandTypes(
87             expectedTypes, "output", context->getNumOutputs(),
88             [context](uint32_t index) { return context->getOutputType(index); });
89 }
90 
validateHalVersion(const IOperationValidationContext * context,HalVersion minSupportedHalVersion)91 bool validateHalVersion(const IOperationValidationContext* context,
92                         HalVersion minSupportedHalVersion) {
93     if (context->getHalVersion() < minSupportedHalVersion) {
94         std::ostringstream message;
95         message << "Operation " << context->getOperationName() << " with inputs {";
96         for (uint32_t i = 0, n = context->getNumInputs(); i < n; ++i) {
97             if (i != 0) {
98                 message << ", ";
99             }
100             message << toString(context->getInputType(i));
101         }
102         message << "} and outputs {";
103         for (uint32_t i = 0, n = context->getNumOutputs(); i < n; ++i) {
104             if (i != 0) {
105                 message << ", ";
106             }
107             message << toString(context->getOutputType(i));
108         }
109         message << "} is only supported since " << toString(minSupportedHalVersion)
110                 << " (validating using " << toString(context->getHalVersion()) << ")";
111         NN_RET_CHECK_FAIL() << message.str();
112     }
113     return true;
114 }
115 
SameShape(const Shape & in1,const Shape & in2)116 bool SameShape(const Shape& in1, const Shape& in2) {
117     if (in1.type != in2.type || in1.dimensions.size() != in2.dimensions.size()) {
118         return false;
119     }
120     for (size_t i = 0; i < in1.dimensions.size(); i++) {
121         if (in1.dimensions[i] != in2.dimensions[i]) {
122             return false;
123         }
124     }
125     return true;
126 }
127 
SetShape(const Shape & in,Shape * out)128 bool SetShape(const Shape& in, Shape* out) {
129     if (in.type != out->type) {
130         return false;
131     }
132     out->dimensions = in.dimensions;
133     return true;
134 }
135 
getNumberOfElements(const Shape & shape)136 uint32_t getNumberOfElements(const Shape& shape) {
137     uint32_t count = 1;
138     for (size_t i = 0; i < shape.dimensions.size(); i++) {
139         count *= shape.dimensions[i];
140     }
141     return count;
142 }
143 
getNumberOfElements(const Shape & shape,size_t firstAxisInclusive,size_t lastAxisExclusive)144 uint32_t getNumberOfElements(const Shape& shape, size_t firstAxisInclusive,
145                              size_t lastAxisExclusive) {
146     nnAssert(0 <= firstAxisInclusive);
147     nnAssert(firstAxisInclusive <= lastAxisExclusive);
148     nnAssert(lastAxisExclusive <= shape.dimensions.size());
149     uint32_t count = 1;
150     for (size_t i = firstAxisInclusive; i < lastAxisExclusive; i++) {
151         count *= shape.dimensions[i];
152     }
153     return count;
154 }
155 
getNumberOfDimensions(const Shape & shape)156 uint32_t getNumberOfDimensions(const Shape& shape) {
157     return shape.dimensions.size();
158 }
159 
getSizeOfDimension(const Shape & shape,uint32_t dimensionIdx)160 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx) {
161     nnAssert(0 <= dimensionIdx && dimensionIdx < shape.dimensions.size());
162     return shape.dimensions[dimensionIdx];
163 }
164 
hasKnownRank(const Shape & shape)165 uint32_t hasKnownRank(const Shape& shape) {
166     return !shape.dimensions.empty();
167 }
168 
handleNegativeAxis(int32_t numberOfDimensions,int32_t * axis)169 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis) {
170     NN_CHECK(-numberOfDimensions <= *axis && *axis < numberOfDimensions);
171     if (*axis < 0) {
172         *axis += numberOfDimensions;
173     }
174     return true;
175 }
176 
QuantizeMultiplier(double double_multiplier,int32_t * quantized_multiplier,int32_t * shift)177 bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int32_t* shift) {
178     if (double_multiplier == 0.) {
179         *quantized_multiplier = 0;
180         *shift = 0;
181         return true;
182     }
183     const double q = std::frexp(double_multiplier, shift);
184     auto q_fixed = static_cast<int64_t>(std::round(q * (1ll << 31)));
185     NN_RET_CHECK(q_fixed <= (1ll << 31));
186     if (q_fixed == (1ll << 31)) {
187         q_fixed /= 2;
188         ++*shift;
189     }
190     NN_RET_CHECK_LE(q_fixed, std::numeric_limits<int32_t>::max());
191     // A shift amount smaller than -31 would cause all bits to be shifted out
192     // and thus all results would be zero. We implement that instead with
193     // q_fixed==0, so as to avoid hitting issues with right-shift
194     // operations with shift amounts greater than 31. Note that this happens
195     // roughly when abs(double_multiplier) < 2^-31 and the present handling means
196     // that we're effectively flushing tiny double_multiplier's to zero.
197     // We could conceivably handle values in the range (roughly) [32, 63]
198     // as 'denormals' i.e. (shift==0, q_fixed < 2^30). In that point of view
199     // the present handling is just doing 'flush denormals to zero'. We could
200     // reconsider and actually generate nonzero denormals if a need arises.
201     if (*shift < -31) {
202         *shift = 0;
203         q_fixed = 0;
204     }
205     *quantized_multiplier = static_cast<int32_t>(q_fixed);
206     return true;
207 }
208 
QuantizeMultiplierSmallerThanOneExp(double double_multiplier,int32_t * quantized_multiplier,int32_t * left_shift)209 bool QuantizeMultiplierSmallerThanOneExp(double double_multiplier, int32_t* quantized_multiplier,
210                                          int32_t* left_shift) {
211     NN_RET_CHECK(double_multiplier > 0.);
212     NN_RET_CHECK(double_multiplier < 1.);
213     NN_RET_CHECK(QuantizeMultiplier(double_multiplier, quantized_multiplier, left_shift));
214     NN_RET_CHECK(*left_shift <= 0);
215     return true;
216 }
217 
QuantizeMultiplierSmallerThanOne(double double_multiplier,int32_t * quantized_multiplier,int32_t * right_shift)218 bool QuantizeMultiplierSmallerThanOne(double double_multiplier, int32_t* quantized_multiplier,
219                                       int32_t* right_shift) {
220     NN_OPS_CHECK(double_multiplier >= 0.);
221     NN_OPS_CHECK(double_multiplier < 1.);
222     if (double_multiplier == 0.) {
223         *quantized_multiplier = 0;
224         *right_shift = 0;
225         return true;
226     }
227     NN_OPS_CHECK(double_multiplier > 0.);
228     const double q = std::frexp(double_multiplier, right_shift);
229     *right_shift *= -1;
230     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
231     NN_OPS_CHECK(q_fixed <= (1LL << 31));
232     if (q_fixed == (1LL << 31)) {
233         q_fixed /= 2;
234         --*right_shift;
235     }
236     NN_OPS_CHECK(*right_shift >= 0);
237     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
238     *quantized_multiplier = static_cast<int32_t>(q_fixed);
239     return true;
240 }
241 
QuantizeMultiplierGreaterThanOne(double double_multiplier,int32_t * quantized_multiplier,int * left_shift)242 bool QuantizeMultiplierGreaterThanOne(double double_multiplier, int32_t* quantized_multiplier,
243                                       int* left_shift) {
244     NN_OPS_CHECK(double_multiplier > 1.);
245     const double q = std::frexp(double_multiplier, left_shift);
246     int64_t q_fixed = static_cast<int64_t>(std::round(q * (1LL << 31)));
247     NN_OPS_CHECK(q_fixed <= (1LL << 31));
248     if (q_fixed == (1LL << 31)) {
249         q_fixed /= 2;
250         ++*left_shift;
251     }
252     NN_OPS_CHECK(*left_shift >= 0);
253     NN_OPS_CHECK(q_fixed <= std::numeric_limits<int32_t>::max());
254     *quantized_multiplier = static_cast<int32_t>(q_fixed);
255     return true;
256 }
257 
GetQuantizedConvolutionMultipler(const Shape & inputShape,const Shape & filterShape,const Shape & biasShape,const Shape & outputShape,double * multiplier)258 bool GetQuantizedConvolutionMultipler(const Shape& inputShape, const Shape& filterShape,
259                                       const Shape& biasShape, const Shape& outputShape,
260                                       double* multiplier) {
261     // Upcast bias and input_product to double
262     const double input_product_scale = inputShape.scale * filterShape.scale;
263     const double bias_scale = biasShape.scale;
264 
265     // The following conditions must be guaranteed by the training pipeline.
266     NN_OPS_CHECK(std::abs(input_product_scale - bias_scale) <=
267                  1e-6 * std::min(input_product_scale, bias_scale));
268     NN_OPS_CHECK(input_product_scale >= 0);
269     *multiplier = input_product_scale / outputShape.scale;
270     return true;
271 }
272 
CalculateActivationRangeUint8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)273 void CalculateActivationRangeUint8(int32_t activation, const Shape& outputShape, int32_t* act_min,
274                                    int32_t* act_max) {
275     const int32_t qmin = std::numeric_limits<uint8_t>::min();
276     const int32_t qmax = std::numeric_limits<uint8_t>::max();
277 
278     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
279 }
280 
CalculateActivationRangeInt8(int32_t activation,const Shape & outputShape,int32_t * act_min,int32_t * act_max)281 void CalculateActivationRangeInt8(int32_t activation, const Shape& outputShape, int32_t* act_min,
282                                   int32_t* act_max) {
283     const int32_t qmin = std::numeric_limits<int8_t>::min();
284     const int32_t qmax = std::numeric_limits<int8_t>::max();
285 
286     CalculateActivationRangeImpl(activation, outputShape, qmin, qmax, act_min, act_max);
287 }
288 
CalculateActivationRangeFloat(int32_t activation,float * activation_min,float * activation_max)289 void CalculateActivationRangeFloat(int32_t activation, float* activation_min,
290                                    float* activation_max) {
291     if (activation == kActivationRelu) {
292         *activation_min = 0.f;
293         *activation_max = std::numeric_limits<float>::max();
294     } else if (activation == kActivationRelu6) {
295         *activation_min = 0.f;
296         *activation_max = 6.f;
297     } else if (activation == kActivationRelu1) {
298         *activation_min = -1.f;
299         *activation_max = 1.f;
300     } else if (activation == kActivationNone) {
301         *activation_min = std::numeric_limits<float>::lowest();
302         *activation_max = std::numeric_limits<float>::max();
303     } else {
304         LOG(ERROR) << "Unsupported fused activation function.";
305     }
306 }
307 
CalculateInputRadius(int input_integer_bits,int input_left_shift)308 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift) {
309     const double max_input_rescaled = 1.0 * ((1 << input_integer_bits) - 1) *
310                                       (1LL << (31 - input_integer_bits)) /
311                                       (1LL << input_left_shift);
312     // Tighten bound using floor.  Suppose that we could use the exact value.
313     // After scaling the difference, the result would be at the maximum.  Thus we
314     // must ensure that our value has lower magnitude.
315     return static_cast<int32_t>(std::floor(max_input_rescaled));
316 }
317 
calculateExplicitPaddingImpl(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,bool isTransposeConv,int32_t * padding_head,int32_t * padding_tail)318 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
319                                   int32_t filter_size, int32_t padding_implicit,
320                                   bool isTransposeConv, int32_t* padding_head,
321                                   int32_t* padding_tail) {
322     *padding_head = 0;
323     *padding_tail = 0;
324 
325     int32_t effective_filter_size = (filter_size - 1) * dilation_factor + 1;
326 
327     if (padding_implicit == kPaddingSame) {
328         int32_t out_size = (in_size + stride - 1) / stride;
329         int32_t tmp = (out_size - 1) * stride + effective_filter_size;
330         if (tmp > in_size) {
331             *padding_head = (tmp - in_size) / 2;
332             *padding_tail = (tmp - in_size) - *padding_head;
333         }
334         // For transpose conv, make padding tail fit tightly to the end of the last stride.
335         if (isTransposeConv) {
336             *padding_tail = (tmp - in_size) - *padding_head;
337         }
338     }
339 }
340 
calculateBroadcastedShape(const Shape & in1,const Shape & in2,Shape * out)341 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out) {
342     NN_RET_CHECK(in1.type == in2.type);
343     uint32_t numberOfDims1 = getNumberOfDimensions(in1);
344     uint32_t numberOfDims2 = getNumberOfDimensions(in2);
345     uint32_t maxDims = std::max(numberOfDims1, numberOfDims2);
346     out->dimensions = std::vector<uint32_t>(maxDims);
347     for (uint32_t i = 1; i <= maxDims; i++) {
348         uint32_t dim1 = 1;
349         if (i <= numberOfDims1) {
350             dim1 = getSizeOfDimension(in1, numberOfDims1 - i);
351         }
352         uint32_t dim2 = 1;
353         if (i <= numberOfDims2) {
354             dim2 = getSizeOfDimension(in2, numberOfDims2 - i);
355         }
356         if (dim1 != dim2 && dim1 != 1 && dim2 != 1) {
357             LOG(ERROR) << "Dimensions mismatch for broadcast:\n"
358                        << "First tensor: dimension " << numberOfDims1 - i << " of size " << dim1
359                        << "\nSecond tensor: dimension " << numberOfDims2 - i << "of size " << dim2;
360             return false;
361         }
362         out->dimensions[maxDims - i] = (dim1 == 1) ? dim2 : dim1;
363     }
364     return true;
365 }
366 
367 template <>
requantize(uint8_t value,const Shape & oldShape,const Shape & newShape)368 uint8_t requantize<uint8_t>(uint8_t value, const Shape& oldShape, const Shape& newShape) {
369     double doubleValue = (value - oldShape.offset) * oldShape.scale;
370     double doubleRet = doubleValue / newShape.scale + newShape.offset;
371     if (doubleRet < 0) return 0;
372     if (doubleRet > 255) return 255;
373     return static_cast<uint8_t>(std::round(doubleRet));
374 }
375 
376 template <>
requantize(int8_t value,const Shape & oldShape,const Shape & newShape)377 int8_t requantize<int8_t>(int8_t value, const Shape& oldShape, const Shape& newShape) {
378     double doubleValue = (value - oldShape.offset) * oldShape.scale;
379     double doubleRet = doubleValue / newShape.scale + newShape.offset;
380     if (doubleRet < -128) return -128;
381     if (doubleRet > 127) return 127;
382     return static_cast<int8_t>(std::round(doubleRet));
383 }
384 
reshapePrepare(const Shape & input,const int32_t * targetDims,const int32_t targetDimsSize,Shape * output)385 bool reshapePrepare(const Shape& input, const int32_t* targetDims, const int32_t targetDimsSize,
386                     Shape* output) {
387     // Reshape allows one of the targetDims components to have the
388     // special -1 value, meaning it will be calculated automatically based on the
389     // input. Here we calculate what that dimension should be so that the number
390     // of output elements in the same as the number of input elements.
391     int32_t numInputElements = (int32_t)getNumberOfElements(input);
392 
393     std::vector<uint32_t> outDims(targetDimsSize);
394     int32_t numOutputElements = 1;
395     int32_t strechDim = -1;
396     for (int32_t i = 0; i < targetDimsSize; ++i) {
397         int32_t value = targetDims[i];
398         if (value == -1) {
399             NN_OPS_CHECK(strechDim == -1);
400             strechDim = i;
401         } else {
402             numOutputElements *= value;
403             outDims[i] = (uint32_t)value;
404         }
405     }
406     if (strechDim != -1) {
407         int32_t strechValue = numInputElements / numOutputElements;
408         outDims[strechDim] = (uint32_t)strechValue;
409         numOutputElements *= strechValue;
410     }
411 
412     NN_OPS_CHECK(numInputElements == numOutputElements);
413 
414     output->type = input.type;
415     output->dimensions = outDims;
416     output->offset = input.offset;
417     output->scale = input.scale;
418 
419     return true;
420 }
421 
depthToSpacePrepare(const Shape & input,int32_t blockSize,Shape * output)422 bool depthToSpacePrepare(const Shape& input, int32_t blockSize, Shape* output) {
423     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
424     NN_OPS_CHECK(blockSize > 0);
425 
426     uint32_t batches = getSizeOfDimension(input, 0);
427     uint32_t height = getSizeOfDimension(input, 1);
428     uint32_t width = getSizeOfDimension(input, 2);
429     uint32_t channels = getSizeOfDimension(input, 3);
430 
431     NN_OPS_CHECK(channels % (blockSize * blockSize) == 0);
432     output->type = input.type;
433     output->dimensions = {batches, height * blockSize, width * blockSize,
434                           channels / (blockSize * blockSize)};
435     output->offset = input.offset;
436     output->scale = input.scale;
437 
438     return true;
439 }
440 
spaceToDepthPrepare(const Shape & input,int32_t blockSize,Shape * output)441 bool spaceToDepthPrepare(const Shape& input, int32_t blockSize, Shape* output) {
442     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
443     NN_OPS_CHECK(blockSize > 0);
444 
445     uint32_t batches = getSizeOfDimension(input, 0);
446     uint32_t height = getSizeOfDimension(input, 1);
447     uint32_t width = getSizeOfDimension(input, 2);
448     uint32_t channels = getSizeOfDimension(input, 3);
449 
450     NN_OPS_CHECK(height % blockSize == 0);
451     NN_OPS_CHECK(width % blockSize == 0);
452 
453     output->type = input.type;
454     output->dimensions = {batches, height / blockSize, width / blockSize,
455                           channels * (blockSize * blockSize)};
456     output->offset = input.offset;
457     output->scale = input.scale;
458 
459     return true;
460 }
461 
embeddingLookupPrepare(const Shape & valueShape,const Shape & lookupShape,Shape * outputShape)462 bool embeddingLookupPrepare(const Shape& valueShape, const Shape& lookupShape, Shape* outputShape) {
463     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 2);
464     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
465 
466     const uint32_t rows = getSizeOfDimension(valueShape, 0);
467     const uint32_t columns = getSizeOfDimension(valueShape, 1);
468 
469     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
470 
471     outputShape->type = valueShape.type;
472     outputShape->dimensions = {lookups, columns};
473     for (uint32_t i = 2; i < getNumberOfDimensions(valueShape); i++) {
474         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
475     }
476     outputShape->offset = valueShape.offset;
477     outputShape->scale = valueShape.scale;
478 
479     return true;
480 }
481 
hashtableLookupPrepare(const Shape & lookupShape,const Shape & keyShape,const Shape & valueShape,Shape * outputShape,Shape * hitShape)482 bool hashtableLookupPrepare(const Shape& lookupShape, const Shape& keyShape,
483                             const Shape& valueShape, Shape* outputShape, Shape* hitShape) {
484     NN_OPS_CHECK(getNumberOfDimensions(lookupShape) == 1);
485     NN_OPS_CHECK(getNumberOfDimensions(keyShape) == 1);
486     NN_OPS_CHECK(getNumberOfDimensions(valueShape) >= 1);
487 
488     const uint32_t lookups = getSizeOfDimension(lookupShape, 0);
489     const uint32_t keys = getSizeOfDimension(keyShape, 0);
490     const uint32_t rows = getSizeOfDimension(valueShape, 0);
491     outputShape->type = valueShape.type;
492     outputShape->dimensions = {lookups};
493     for (uint32_t i = 1; i < getNumberOfDimensions(valueShape); i++) {
494         outputShape->dimensions.push_back(getSizeOfDimension(valueShape, i));
495     }
496     outputShape->offset = valueShape.offset;
497     outputShape->scale = valueShape.scale;
498 
499     hitShape->type = OperandType::TENSOR_QUANT8_ASYMM;
500     hitShape->dimensions = {lookups};
501     hitShape->offset = 0;
502     hitShape->scale = 1.f;
503 
504     return true;
505 }
506 
padPrepare(const Shape & input,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)507 bool padPrepare(const Shape& input, const int32_t* paddingsData, const Shape& paddingsShape,
508                 Shape* output) {
509     uint32_t numInputDims = getNumberOfDimensions(input);
510 
511     // paddings need to be provided as a 2-D int32 tensor.
512     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
513     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
514     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == numInputDims);
515     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
516 
517     std::vector<uint32_t> outDims(numInputDims);
518     for (uint32_t i = 0; i < numInputDims; ++i) {
519         int32_t beforePadding = *paddingsData++;
520         int32_t afterPadding = *paddingsData++;
521         // Pad value has to be greater than equal to 0.
522         NN_OPS_CHECK(beforePadding >= 0 && afterPadding >= 0);
523         outDims[i] = beforePadding + getSizeOfDimension(input, i) + afterPadding;
524     }
525     output->type = input.type;
526     output->dimensions = outDims;
527     output->offset = input.offset;
528     output->scale = input.scale;
529 
530     return true;
531 }
532 
batchToSpacePrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,Shape * output)533 bool batchToSpacePrepare(const Shape& input, const int32_t* blockSizeData,
534                          const Shape& blockSizeShape, Shape* output) {
535     // Only 4D NHWC tensors are supported.
536     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
537 
538     // blockSize need to be provided as a 1-D int32 tensor.
539     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
540     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
541     // Only applies to spatial dimensions.
542     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
543 
544     uint32_t batches = getSizeOfDimension(input, 0);
545     uint32_t height = getSizeOfDimension(input, 1);
546     uint32_t width = getSizeOfDimension(input, 2);
547     uint32_t channels = getSizeOfDimension(input, 3);
548 
549     NN_OPS_CHECK(batches % (blockSizeData[0] * blockSizeData[1]) == 0);
550     output->type = input.type;
551     output->dimensions = {batches / (blockSizeData[0] * blockSizeData[1]),
552                           height * blockSizeData[0], width * blockSizeData[1], channels};
553     output->offset = input.offset;
554     output->scale = input.scale;
555 
556     return true;
557 }
558 
spaceToBatchPrepare(const Shape & input,const int32_t * blockSizeData,const Shape & blockSizeShape,const int32_t * paddingsData,const Shape & paddingsShape,Shape * output)559 bool spaceToBatchPrepare(const Shape& input, const int32_t* blockSizeData,
560                          const Shape& blockSizeShape, const int32_t* paddingsData,
561                          const Shape& paddingsShape, Shape* output) {
562     // Only 4D NHWC tensors are supported.
563     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
564 
565     // blockSize need to be provided as a 1-D int32 tensor.
566     NN_OPS_CHECK(blockSizeShape.type == OperandType::TENSOR_INT32);
567     NN_OPS_CHECK(getNumberOfDimensions(blockSizeShape) == 1);
568     // Only applies to spatial dimensions.
569     NN_OPS_CHECK(getSizeOfDimension(blockSizeShape, 0) == 2);
570 
571     // paddings need to be provided as a 2-D int32 tensor.
572     NN_OPS_CHECK(paddingsShape.type == OperandType::TENSOR_INT32);
573     NN_OPS_CHECK(getNumberOfDimensions(paddingsShape) == 2);
574     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 0) == 2);
575     NN_OPS_CHECK(getSizeOfDimension(paddingsShape, 1) == 2);
576 
577     uint32_t batches = getSizeOfDimension(input, 0);
578     uint32_t height = getSizeOfDimension(input, 1);
579     uint32_t width = getSizeOfDimension(input, 2);
580     uint32_t channels = getSizeOfDimension(input, 3);
581 
582     uint32_t paddedHeight = paddingsData[0] + height + paddingsData[1];
583     uint32_t paddedWidth = paddingsData[2] + width + paddingsData[3];
584 
585     NN_OPS_CHECK(paddedHeight % blockSizeData[0] == 0);
586     NN_OPS_CHECK(paddedWidth % blockSizeData[1] == 0);
587 
588     output->type = input.type;
589     output->dimensions = {batches * (blockSizeData[0] * blockSizeData[1]),
590                           paddedHeight / blockSizeData[0], paddedWidth / blockSizeData[1],
591                           channels};
592     output->offset = input.offset;
593     output->scale = input.scale;
594 
595     return true;
596 }
597 
meanPrepare(const Shape & input,const int32_t * axisData,const Shape & axisShape,bool keepDims,Shape * output)598 bool meanPrepare(const Shape& input, const int32_t* axisData, const Shape& axisShape, bool keepDims,
599                  Shape* output) {
600     // perm need to be provided as a 1-D int32 tensor.
601     NN_OPS_CHECK(axisShape.type == OperandType::TENSOR_INT32);
602     NN_OPS_CHECK(getNumberOfDimensions(axisShape) == 1);
603 
604     int32_t numInputDims = static_cast<int32_t>(getNumberOfDimensions(input));
605     int32_t axisSize = static_cast<int32_t>(getSizeOfDimension(axisShape, 0));
606 
607     // Determines size of output tensor.
608     if (keepDims) {
609         std::vector<uint32_t> outDims(numInputDims);
610         for (int32_t idx = 0; idx < numInputDims; ++idx) {
611             bool isAxis = false;
612             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
613                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
614                     isAxis = true;
615                     break;
616                 }
617             }
618             if (isAxis) {
619                 outDims[idx] = 1;
620             } else {
621                 outDims[idx] = getSizeOfDimension(input, idx);
622             }
623         }
624         output->dimensions = outDims;
625     } else {
626         // Calculates size of reducing axis.
627         int32_t numReduceAxis = axisSize;
628         for (int32_t i = 0; i < axisSize; ++i) {
629             int32_t current = axisData[i];
630             if (current < 0) {
631                 current += numInputDims;
632             }
633             NN_OPS_CHECK(current >= 0 && current < numInputDims);
634             for (int32_t j = 0; j < i; ++j) {
635                 int32_t previous = axisData[j];
636                 if (previous < 0) {
637                     previous += numInputDims;
638                 }
639                 if (current == previous) {
640                     --numReduceAxis;
641                     break;
642                 }
643             }
644         }
645         // Determines output dimensions.
646         std::vector<uint32_t> outDims(numInputDims - numReduceAxis);
647         int32_t numSkipAxis = 0;
648         for (int32_t idx = 0; idx < numInputDims; ++idx) {
649             bool isAxis = false;
650             for (int32_t axisIdx = 0; axisIdx < axisSize; ++axisIdx) {
651                 if (axisData[axisIdx] == idx || axisData[axisIdx] + numInputDims == idx) {
652                     ++numSkipAxis;
653                     isAxis = true;
654                     break;
655                 }
656             }
657             if (!isAxis) {
658                 outDims[idx - numSkipAxis] = getSizeOfDimension(input, idx);
659             }
660         }
661         // Handle the case when all dimensions are removed
662         if (outDims.empty()) {
663             outDims.push_back(1);
664         }
665         output->dimensions = outDims;
666     }
667 
668     output->type = input.type;
669     output->offset = input.offset;
670     output->scale = input.scale;
671 
672     return true;
673 }
674 
argMinMaxPrepare(const Shape & input,int32_t axis,Shape * output)675 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output) {
676     NN_CHECK(handleNegativeAxis(input, &axis));
677 
678     output->type = OperandType::TENSOR_INT32;
679 
680     // Copy the input dimensions, omitting the axis dimension.
681     output->dimensions.clear();
682     if (getNumberOfDimensions(input) > 1) {
683         output->dimensions.reserve(getNumberOfDimensions(input) - 1);
684         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin(),
685                                   input.dimensions.begin() + axis);
686         output->dimensions.insert(output->dimensions.end(), input.dimensions.begin() + axis + 1,
687                                   input.dimensions.end());
688     } else {
689         output->dimensions.push_back(1);
690     }
691 
692     return true;
693 }
694 
splitPrepare(const Shape & input,int32_t axis,int32_t numOutputs,std::vector<Shape> * output)695 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs,
696                   std::vector<Shape>* output) {
697     NN_CHECK(handleNegativeAxis(input, &axis));
698 
699     const int32_t sizeOfAxisToSplit = input.dimensions[axis];
700     NN_OPS_CHECK(sizeOfAxisToSplit % numOutputs == 0);
701     const int32_t sliceSize = sizeOfAxisToSplit / numOutputs;
702 
703     for (int i = 0; i < numOutputs; ++i) {
704         output->at(i).type = input.type;
705         output->at(i).dimensions = input.dimensions;
706         output->at(i).dimensions[axis] = sliceSize;
707         output->at(i).offset = input.offset;
708         output->at(i).scale = input.scale;
709     }
710     return true;
711 }
712 
groupedConvPrepare(const Shape & input,const Shape & filter,const Shape & bias,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,Shape * output)713 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
714                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
715                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
716                         int32_t numGroups, Shape* output) {
717     if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) {
718         NN_OPS_CHECK(input.type == OperandType::TENSOR_QUANT8_ASYMM ||
719                      input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED);
720     } else {
721         NN_OPS_CHECK(input.type == filter.type);
722     }
723     if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
724         input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
725         NN_OPS_CHECK(bias.type == OperandType::TENSOR_INT32);
726     } else {
727         NN_OPS_CHECK(input.type == bias.type);
728     }
729     NN_OPS_CHECK(getNumberOfDimensions(input) == 4);
730     NN_OPS_CHECK(getNumberOfDimensions(filter) == 4);
731     NN_OPS_CHECK(getNumberOfDimensions(bias) == 1);
732 
733     NN_OPS_CHECK(getSizeOfDimension(filter, 0) == getSizeOfDimension(bias, 0));
734 
735     NN_OPS_CHECK(getSizeOfDimension(filter, 3) * numGroups == getSizeOfDimension(input, 3));
736     NN_OPS_CHECK(getSizeOfDimension(filter, 0) % numGroups == 0);
737 
738     uint32_t channels_out = getSizeOfDimension(filter, 0);
739     uint32_t width = getSizeOfDimension(input, 2);
740     uint32_t height = getSizeOfDimension(input, 1);
741     uint32_t filterWidth = getSizeOfDimension(filter, 2);
742     uint32_t filterHeight = getSizeOfDimension(filter, 1);
743     uint32_t batches = getSizeOfDimension(input, 0);
744 
745     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_left);
746     NN_RET_CHECK_GT(static_cast<int32_t>(filterWidth), padding_right);
747     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_top);
748     NN_RET_CHECK_GT(static_cast<int32_t>(filterHeight), padding_bottom);
749 
750     uint32_t outWidth =
751             computeOutSize(width, filterWidth, stride_width, padding_left, padding_right);
752     uint32_t outHeight =
753             computeOutSize(height, filterHeight, stride_height, padding_top, padding_bottom);
754 
755     output->type = input.type;
756     output->dimensions = {batches, outHeight, outWidth, channels_out};
757     return true;
758 }
759 
760 }  // namespace nn
761 }  // namespace android
762