1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
18 #define ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
19 
20 #include "Utils.h"
21 
22 #include <cstdint>
23 #include <vector>
24 
25 namespace android {
26 namespace nn {
27 
28 // DEPRECATED. Use NN_RET_CHECK instead.
29 #define NN_CHECK(x) NN_RET_CHECK(x)
30 #define NN_OPS_CHECK(x) NN_RET_CHECK(x)
31 
32 // DEPRECATED. Use NN_RET_CHECK_EQ instead.
33 #define NN_CHECK_EQ(x, y) NN_RET_CHECK_EQ(x, y)
34 
35 // An 8-bit boolean type (sizeof(bool) is implementation-defined).
36 typedef uint8_t bool8;
37 
38 enum PaddingScheme {
39     kPaddingUnknown = 0,
40     kPaddingSame = 1,
41     kPaddingValid = 2,
42 };
43 
44 // Stores operand type information. "Shape" is a historical name.
45 struct Shape {
46     OperandType type;
47     std::vector<uint32_t> dimensions;
48     float scale;
49     int32_t offset;
50     Operand::ExtraParams extraParams;
51 };
52 
53 // Provides information available during graph creation to validate an operation.
54 class IOperationValidationContext {
55    public:
~IOperationValidationContext()56     virtual ~IOperationValidationContext() {}
57 
58     // The HAL version of the environment in which the operation is to be
59     // executed.
60     //
61     // Operation validation logic needs to handle all HAL versions to support
62     // the following use cases (assume in these examples that the latest HAL
63     // version is V1_2):
64     // 1. Our runtime wants to distribute work to a driver implementing an older
65     //    HAL version and calls, for example,
66     //    compliantWithV1_0(const V1_2::Model&).
67     // 2. A driver implements an older HAL version and delegates model
68     //    validation to, for example, validateModel(const V1_0::Model&).
69     //
70     // If getHalVersion() returns HalVersion::V1_0 and the operation
71     // is only supported since HalVersion::V1_1, validation will fail.
72     virtual HalVersion getHalVersion() const = 0;
73 
74     virtual uint32_t getNumInputs() const = 0;
75     virtual OperandType getInputType(uint32_t index) const = 0;
76     virtual Shape getInputShape(uint32_t index) const = 0;
77     virtual const Operand::ExtraParams getInputExtraParams(uint32_t index) const = 0;
78 
79     virtual uint32_t getNumOutputs() const = 0;
80     virtual OperandType getOutputType(uint32_t index) const = 0;
81     virtual Shape getOutputShape(uint32_t index) const = 0;
82 };
83 
84 // Provides inputs and outputs during operation execution.
85 class IOperationExecutionContext {
86    public:
~IOperationExecutionContext()87     virtual ~IOperationExecutionContext() {}
88 
89     virtual uint32_t getNumInputs() const = 0;
90     virtual OperandType getInputType(uint32_t index) const = 0;
91     virtual Shape getInputShape(uint32_t index) const = 0;
92     virtual const void* getInputBuffer(uint32_t index) const = 0;
93     virtual const Operand::ExtraParams getInputExtraParams(uint32_t index) const = 0;
94 
95     virtual uint32_t getNumOutputs() const = 0;
96     virtual OperandType getOutputType(uint32_t index) const = 0;
97     virtual Shape getOutputShape(uint32_t index) const = 0;
98     virtual void* getOutputBuffer(uint32_t index) = 0;
99 
100     // Updates the output shape, allocating the buffer if necessary.
101     virtual bool setOutputShape(uint32_t index, const Shape& shape) = 0;
102 
103     virtual bool isOmittedInput(uint32_t index) const = 0;
104     virtual bool isOmittedOutput(uint32_t index) const = 0;
105 
106     template <typename T>
getInputBuffer(uint32_t index)107     const T* getInputBuffer(uint32_t index) const {
108         return reinterpret_cast<const T*>(getInputBuffer(index));
109     }
110 
111     template <typename T>
getOutputBuffer(uint32_t index)112     T* getOutputBuffer(uint32_t index) {
113         return reinterpret_cast<T*>(getOutputBuffer(index));
114     }
115 
116     template <typename T>
getInputValue(uint32_t index)117     T getInputValue(uint32_t index) const {
118         return getInputBuffer<T>(index)[0];
119     }
120 };
121 
122 // Verifies that the number and types of operation inputs are as expected.
123 bool validateInputTypes(const IOperationValidationContext* context,
124                         const std::vector<OperandType>& expectedTypes);
125 
126 // Verifies that the number and types of operation outputs are as expected.
127 bool validateOutputTypes(const IOperationValidationContext* context,
128                          const std::vector<OperandType>& expectedTypes);
129 
130 // Verifies that the HAL version specified in the context is greater or equal
131 // than the minimal supported HAL version.
132 bool validateHalVersion(const IOperationValidationContext* context,
133                         HalVersion minSupportedHalVersion);
134 
135 // Verifies that the two shapes are the same.
136 bool SameShape(const Shape& in1, const Shape& in2);
137 
138 // Sets out to the same shape as in.
139 bool SetShape(const Shape& in, Shape* out);
140 
141 // Combine two tensor dimensions, both can have unspecified dimensions.
142 bool combineDimensions(const std::vector<uint32_t>& lhs, const std::vector<uint32_t>& rhs,
143                        std::vector<uint32_t>* combined);
144 
145 // Return the total number of elements, i.e. all the dimensions multiplied
146 // together. For a scalar, returns one.
147 uint32_t getNumberOfElements(const Shape& shape);
148 uint32_t getNumberOfElements(const Shape& shape,
149                              size_t firstAxisInclusive,
150                              size_t lastAxisExclusive);
151 
152 uint32_t getNumberOfDimensions(const Shape& shape);
153 
154 uint32_t getSizeOfDimension(const Shape& shape, uint32_t dimensionIdx);
155 
156 // Converts an axis index from the range [-dims, dims) into the range [0, dims).
157 bool handleNegativeAxis(int32_t numberOfDimensions, int32_t* axis);
158 
handleNegativeAxis(const Shape & shape,int32_t * axis)159 inline bool handleNegativeAxis(const Shape& shape, int32_t* axis) {
160     return handleNegativeAxis(getNumberOfDimensions(shape), axis);
161 }
162 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)163 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
164                               int32_t paddingHead, int32_t paddingTail) {
165     return (imageSize - filterSize + stride + paddingHead + paddingTail) / stride;
166 }
167 
computeOutSize(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t dilationRate,int32_t paddingHead,int32_t paddingTail)168 inline int32_t computeOutSize(int32_t imageSize, int32_t filterSize, int32_t stride,
169                               int32_t dilationRate, int32_t paddingHead, int32_t paddingTail) {
170     int32_t effectiveFilterSize = ((filterSize - 1) * dilationRate + 1);
171     return (imageSize - effectiveFilterSize + stride + paddingHead + paddingTail) / stride;
172 }
173 
computeOutSizeTransposeConv(int32_t imageSize,int32_t filterSize,int32_t stride,int32_t paddingHead,int32_t paddingTail)174 inline int32_t computeOutSizeTransposeConv(int32_t imageSize, int32_t filterSize, int32_t stride,
175                                            int32_t paddingHead, int32_t paddingTail) {
176     return imageSize * stride + filterSize - stride - paddingHead - paddingTail;
177 }
178 
179 __wur bool QuantizeMultiplier(double double_multiplier, int32_t* quantized_multiplier, int* shift);
180 
181 __wur
182 bool QuantizeMultiplierSmallerThanOne(double double_multiplier,
183                                       int32_t* quantized_multiplier,
184                                       int32_t* right_shift);
185 
186 __wur
187 bool QuantizeMultiplierGreaterThanOne(double double_multiplier,
188                                       int32_t* quantized_multiplier,
189                                       int* left_shift);
190 
191 __wur bool GetQuantizedConvolutionMultipler(const Shape& inputShape, const Shape& filterShape,
192                                             const Shape& biasShape, const Shape& outputShape,
193                                             double* multiplier);
194 
195 void CalculateActivationRangeUint8(int32_t activation,
196                                    const Shape& outputShape,
197                                    int32_t* act_min,
198                                    int32_t* act_max);
199 
200 void CalculateActivationRangeFloat(int32_t activation,
201                                    float* activation_min,
202                                    float* activation_max);
203 
204 int32_t CalculateInputRadius(int input_integer_bits, int input_left_shift);
205 
206 void calculateExplicitPaddingImpl(int32_t in_size, int32_t stride, int32_t dilation_factor,
207                                   int32_t filter_size, int32_t padding_implicit,
208                                   bool isTransposeConv, int32_t* padding_head,
209                                   int32_t* padding_tail);
210 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t dilation_factor,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)211 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t dilation_factor,
212                                      int32_t filter_size, int32_t padding_implicit,
213                                      int32_t* padding_head, int32_t* padding_tail) {
214     calculateExplicitPaddingImpl(in_size, stride, dilation_factor, filter_size, padding_implicit,
215                                  /*isTransposeConv=*/false, padding_head, padding_tail);
216 }
217 
calculateExplicitPadding(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)218 inline void calculateExplicitPadding(int32_t in_size, int32_t stride, int32_t filter_size,
219                                      int32_t padding_implicit, int32_t* padding_head,
220                                      int32_t* padding_tail) {
221     calculateExplicitPadding(in_size, stride, 1, filter_size, padding_implicit, padding_head,
222                              padding_tail);
223 }
224 
calculateExplicitPaddingTransposeConv(int32_t in_size,int32_t stride,int32_t filter_size,int32_t padding_implicit,int32_t * padding_head,int32_t * padding_tail)225 inline void calculateExplicitPaddingTransposeConv(int32_t in_size, int32_t stride,
226                                                   int32_t filter_size, int32_t padding_implicit,
227                                                   int32_t* padding_head, int32_t* padding_tail) {
228     calculateExplicitPaddingImpl(in_size, stride, /*dilation_factor=*/1, filter_size,
229                                  padding_implicit, /*isTransposeConv=*/true, padding_head,
230                                  padding_tail);
231 }
232 
getPaddingScheme(int32_t inWidth,int32_t inHeight,int32_t strideWidth,int32_t strideHeight,int32_t filterWidth,int32_t filterHeight,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom)233 inline PaddingScheme getPaddingScheme(int32_t inWidth, int32_t inHeight,
234                                       int32_t strideWidth, int32_t strideHeight,
235                                       int32_t filterWidth, int32_t filterHeight,
236                                       int32_t paddingLeft, int32_t paddingRight,
237                                       int32_t paddingTop, int32_t paddingBottom) {
238     if (paddingLeft == 0 && paddingRight == 0 && paddingTop == 0 && paddingBottom == 0) {
239         return kPaddingValid;
240     }
241 
242     int32_t expectedPaddingLeft, expectedPaddingRight;
243     int32_t expectedPaddingTop, expectedPaddingBottom;
244 
245     calculateExplicitPadding(inWidth, strideWidth, filterWidth, kPaddingSame,
246                              &expectedPaddingLeft, &expectedPaddingRight);
247     calculateExplicitPadding(inHeight, strideHeight, filterHeight, kPaddingSame,
248                              &expectedPaddingTop, &expectedPaddingBottom);
249     if (expectedPaddingLeft == paddingLeft && expectedPaddingRight == paddingRight &&
250         expectedPaddingTop == paddingTop && expectedPaddingBottom == paddingBottom) {
251         return kPaddingSame;
252     } else {
253         return kPaddingUnknown;
254     }
255 }
256 
257 // TODO: add more documentation from upstream.
258 // Reverse order of bits in the mask to match the expected order in kernel
ReverseMaskBits(int mask,int num_dimensions)259 inline int ReverseMaskBits(int mask, int num_dimensions) {
260   int out = 0;
261   for (int dim = 0; dim < num_dimensions; dim++) {
262     out <<= 1;
263     out += (mask & 1);
264     mask >>= 1;
265   }
266   return out;
267 }
268 
269 // TODO: add more documentation from upstream.
PositiveRemainder(int32_t dividend,int32_t divisor)270 inline int32_t PositiveRemainder(int32_t dividend, int32_t divisor) {
271   return (divisor + (dividend % divisor)) % divisor;
272 }
273 
274 // TODO: add more documentation from upstream.
ClampedIndex(int32_t index,int dim,bool pos_stride)275 inline int32_t ClampedIndex(int32_t index, int dim, bool pos_stride) {
276   return pos_stride
277              ? (index >= dim ? dim
278                              : PositiveRemainder(
279                                    std::min(std::max(index, -dim), dim), dim))
280              : (index < -dim
281                     ? -1
282                     : PositiveRemainder(
283                           std::min(std::max(index, -dim), dim - 1), dim));
284 }
285 
286 // Broadcasts input shape against one another and puts the result into output
287 // shape. Returns true on success and false on error.
288 bool calculateBroadcastedShape(const Shape& in1, const Shape& in2, Shape* out);
289 
290 // Dequantizes a value and quantizes it back using new scale and offset.
291 uint8_t requantize(uint8_t value, const Shape& oldShape, const Shape& newShape);
292 
293 // Preparation functions for the corresponding ops
294 bool floorPrepare(const Shape& input, Shape* output);
295 
296 bool depthwiseConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
297                           int32_t padding_left, int32_t padding_right, int32_t padding_top,
298                           int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
299                           int32_t depth_multiplier, int32_t dilation_width_factor,
300                           int32_t dilation_height_factor, Shape* output);
301 
302 bool genericActivationPrepare(const Shape& input, Shape* output);
303 
304 bool genericNormalizationPrepare(const Shape& input, Shape* output);
305 
306 bool reshapePrepare(const Shape& input,
307                     const int32_t* targetDims,
308                     const int32_t targetDimsSize,
309                     Shape* output);
310 
311 bool depthToSpacePrepare(const Shape& input,
312                          int32_t blockSize,
313                          Shape* output);
314 
315 bool spaceToDepthPrepare(const Shape& input,
316                          int32_t blockSize,
317                          Shape* output);
318 
319 bool embeddingLookupPrepare(const Shape &valueShape,
320                             const Shape &lookupShape,
321                             Shape *outputShape);
322 
323 bool hashtableLookupPrepare(const Shape &lookupShape,
324                             const Shape &keyShape,
325                             const Shape &valueShape,
326                             Shape *outputShape,
327                             Shape *hitShape);
328 
329 bool padPrepare(const Shape& input,
330                 const int32_t* paddingsData,
331                 const Shape& paddingsShape,
332                 Shape* output);
333 
334 bool batchToSpacePrepare(const Shape& input,
335                          const int32_t* blockSizeData,
336                          const Shape& blockSizeShape,
337                          Shape* output);
338 
339 bool spaceToBatchPrepare(const Shape& input,
340                          const int32_t* blockSizeData,
341                          const Shape& blockSizeShape,
342                          const int32_t* paddingsData,
343                          const Shape& paddingsShape,
344                          Shape* output);
345 
346 bool squeezePrepare(const Shape& input,
347                     const int32_t* squeezeDims,
348                     const Shape& squeezeDimsShape,
349                     Shape* output);
350 
351 bool meanPrepare(const Shape& input,
352                  const int32_t* axisData,
353                  const Shape& axisShape,
354                  bool keepDims,
355                  Shape* output);
356 
357 bool stridedSlicePrepare(const Shape& input,
358                          const int32_t* beginData, const Shape& beginShape,
359                          const int32_t* endData, const Shape& endShape,
360                          const int32_t* stridesData, const Shape& stridesShape,
361                          int32_t beginMask, int32_t endMask, int32_t shrinkAxisMask,
362                          Shape* output);
363 
364 bool argMinMaxPrepare(const Shape& input, int32_t axis, Shape* output);
365 
366 bool splitPrepare(const Shape& input, int32_t axis, int32_t numOutputs, std::vector<Shape>* output);
367 
368 bool groupedConvPrepare(const Shape& input, const Shape& filter, const Shape& bias,
369                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
370                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
371                         int32_t numGroups, Shape* output);
372 
373 // Transposes the first two dimensions.
374 template <typename T>
transposeFirstTwoDimensions(const T * buffer,const Shape & shape,T * transposedBuffer)375 inline bool transposeFirstTwoDimensions(const T* buffer, const Shape& shape, T* transposedBuffer) {
376     const int numDims = getNumberOfDimensions(shape);
377     NN_RET_CHECK(numDims >= 2);
378     const int firstDim = getSizeOfDimension(shape, 0);
379     const int secondDim = getSizeOfDimension(shape, 1);
380     int blockSize = 1;
381     for (int i = 2; i < numDims; ++i) {
382         blockSize *= getSizeOfDimension(shape, i);
383     }
384 
385     for (int i = 0; i < firstDim; ++i) {
386         for (int j = 0; j < secondDim; ++j) {
387             for (int k = 0; k < blockSize; ++k) {
388                 transposedBuffer[(j * firstDim + i) * blockSize + k] =
389                         buffer[(i * secondDim + j) * blockSize + k];
390             }
391         }
392     }
393     return true;
394 }
395 
transposeFirstTwoDimensions(const Shape & shape,Shape * transposedShape)396 inline bool transposeFirstTwoDimensions(const Shape& shape, Shape* transposedShape) {
397     NN_RET_CHECK(getNumberOfDimensions(shape) >= 2);
398     *transposedShape = shape;
399     transposedShape->dimensions[0] = shape.dimensions[1];
400     transposedShape->dimensions[1] = shape.dimensions[0];
401     return true;
402 }
403 
404 // Given two 3-dimensional tensors, merge them into one 3-dimensional tensor
405 // at the third dimension. The merged tensor's third dimension size will be
406 // sum of that of the two inputs.
407 template <typename T>
mergeThirdDimension(const T * bufferA,const std::vector<uint32_t> & dimsA,const T * bufferB,const std::vector<uint32_t> & dimsB,T * merged)408 inline bool mergeThirdDimension(const T* bufferA, const std::vector<uint32_t>& dimsA,
409                                 const T* bufferB, const std::vector<uint32_t>& dimsB, T* merged) {
410     NN_RET_CHECK_EQ(dimsA.size(), 3u);
411     NN_RET_CHECK_EQ(dimsB.size(), 3u);
412 
413     NN_RET_CHECK_EQ(dimsA[0], dimsB[0]);
414     NN_RET_CHECK_EQ(dimsA[1], dimsB[1]);
415 
416     for (unsigned int i = 0; i < dimsA[0]; ++i) {
417         for (unsigned int j = 0; j < dimsA[1]; ++j) {
418             for (unsigned int k = 0; k < dimsA[2]; ++k) {
419                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + k] =
420                         bufferA[(i * dimsA[1] + j) * dimsA[2] + k];
421             }
422             for (unsigned int k = 0; k < dimsB[2]; ++k) {
423                 merged[(i * dimsA[1] + j) * (dimsA[2] + dimsB[2]) + dimsA[2] + k] =
424                         bufferB[(i * dimsB[1] + j) * dimsB[2] + k];
425             }
426         }
427     }
428     return true;
429 }
430 
431 } // namespace nn
432 } // namespace android
433 
434 #endif // ANDROID_ML_NN_COMMON_OPERATIONS_UTILS_H
435