1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CpuOperationUtils.h"
18 #include "Operations.h"
19 
20 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h"
21 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
22 
23 #include "Tracing.h"
24 
25 namespace android {
26 namespace nn {
27 
depthwiseConvFloat16(const _Float16 * inputData,const Shape & inputShape,const _Float16 * filterData,const Shape & filterShape,const _Float16 * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,_Float16 * outputData,const Shape & outputShape)28 bool depthwiseConvFloat16(const _Float16* inputData, const Shape& inputShape,
29                           const _Float16* filterData, const Shape& filterShape,
30                           const _Float16* biasData, const Shape& biasShape, int32_t paddingLeft,
31                           int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
32                           int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
33                           int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
34                           _Float16* outputData, const Shape& outputShape) {
35     NNTRACE_TRANS("depthwiseConvFloat16");
36     std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
37     convertFloat16ToFloat32(inputData, &inputDataFloat32);
38     std::vector<float> filterDataFloat32(getNumberOfElements(filterShape));
39     convertFloat16ToFloat32(filterData, &filterDataFloat32);
40     std::vector<float> biasDataFloat32(getNumberOfElements(biasShape));
41     convertFloat16ToFloat32(biasData, &biasDataFloat32);
42 
43     std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
44     depthwiseConvFloat32(inputDataFloat32.data(), inputShape, filterDataFloat32.data(), filterShape,
45                          biasDataFloat32.data(), biasShape, paddingLeft, paddingRight, paddingTop,
46                          paddingBottom, strideWidth, strideHeight, dilationWidthFactor,
47                          dilationHeightFactor, depthMultiplier, activation,
48                          outputDataFloat32.data(), outputShape);
49 
50     convertFloat32ToFloat16(outputDataFloat32, outputData);
51     return true;
52 }
53 
54 #define ANDROID_NN_DEPTHWISE_CONV_PARAMETERS                    \
55     uint32_t height = getSizeOfDimension(inputShape, 1);        \
56     uint32_t width = getSizeOfDimension(inputShape, 2);         \
57     uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
58     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);  \
59     uint32_t outHeight = getSizeOfDimension(outputShape, 1);    \
60     uint32_t outWidth = getSizeOfDimension(outputShape, 2);     \
61                                                                 \
62     uint32_t paddingHeight = (uint32_t)paddingTop;              \
63     uint32_t paddingWidth = (uint32_t)paddingLeft;
64 
depthwiseConvFloat32(const float * inputData,const Shape & inputShape,const float * filterData,const Shape & filterShape,const float * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,float * outputData,const Shape & outputShape)65 bool depthwiseConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
66                           const Shape& filterShape, const float* biasData, const Shape& biasShape,
67                           int32_t paddingLeft, int32_t paddingRight, int32_t paddingTop,
68                           int32_t paddingBottom, int32_t strideWidth, int32_t strideHeight,
69                           int32_t dilationWidthFactor, int32_t dilationHeightFactor,
70                           int32_t depthMultiplier, int32_t activation, float* outputData,
71                           const Shape& outputShape) {
72     NNTRACE_TRANS("depthwiseConvFloat32");
73 
74     ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
75 
76     float output_activation_min, output_activation_max;
77     CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
78 
79     tflite::DepthwiseParams params{
80             .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
81             .stride_width = static_cast<int16>(strideWidth),
82             .stride_height = static_cast<int16>(strideHeight),
83             .depth_multiplier = static_cast<int16>(depthMultiplier),
84             .float_activation_min = output_activation_min,
85             .float_activation_max = output_activation_max,
86             .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
87             .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
88     };
89     NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
90     tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
91                                          convertShapeToTflshape(filterShape), filterData,
92                                          convertShapeToTflshape(biasShape), biasData,
93                                          convertShapeToTflshape(outputShape), outputData);
94 
95     return true;
96 }
97 
depthwiseConvQuant8(const uint8_t * inputData,const Shape & inputShape,const uint8_t * filterData,const Shape & filterShape,const int32_t * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,uint8_t * outputData,const Shape & outputShape)98 bool depthwiseConvQuant8(const uint8_t* inputData, const Shape& inputShape,
99                          const uint8_t* filterData, const Shape& filterShape,
100                          const int32_t* biasData, const Shape& biasShape, int32_t paddingLeft,
101                          int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
102                          int32_t strideWidth, int32_t strideHeight, int32_t dilationWidthFactor,
103                          int32_t dilationHeightFactor, int32_t depthMultiplier, int32_t activation,
104                          uint8_t* outputData, const Shape& outputShape) {
105     NNTRACE_TRANS("depthwiseConvQuant8");
106 
107     ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
108 
109     double real_multiplier = 0.0;
110     int32_t output_multiplier = 0;
111     int32_t output_shift = 0;
112     int32_t output_activation_min = 0;
113     int32_t output_activation_max = 0;
114 
115     NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
116                                                   &real_multiplier));
117     int exponent;
118     NN_RET_CHECK(QuantizeMultiplier(real_multiplier, &output_multiplier, &exponent));
119     output_shift = -exponent;
120     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
121                                   &output_activation_max);
122 
123     tflite::DepthwiseParams params{
124             .padding_values = {static_cast<int16>(paddingWidth), static_cast<int16>(paddingHeight)},
125             .stride_width = static_cast<int16>(strideWidth),
126             .stride_height = static_cast<int16>(strideHeight),
127             .depth_multiplier = static_cast<int16>(depthMultiplier),
128             .quantized_activation_min = output_activation_min,
129             .quantized_activation_max = output_activation_max,
130             .dilation_width_factor = static_cast<int16>(dilationWidthFactor),
131             .dilation_height_factor = static_cast<int16>(dilationHeightFactor),
132             .input_offset = -inputShape.offset,
133             .weights_offset = -filterShape.offset,
134             .output_offset = outputShape.offset,
135             .output_shift = -output_shift,
136             .output_multiplier = output_multiplier,
137     };
138     NNTRACE_COMP_SWITCH("optimized_ops::DepthwiseConv");
139     tflite::optimized_ops::DepthwiseConv(params, convertShapeToTflshape(inputShape), inputData,
140                                          convertShapeToTflshape(filterShape), filterData,
141                                          convertShapeToTflshape(biasShape), biasData,
142                                          convertShapeToTflshape(outputShape), outputData);
143     return true;
144 }
145 
depthwiseConvQuant8PerChannel(const uint8_t * inputData,const Shape & inputShape,const int8_t * filterData,const Shape & filterShape,const float * filterScales,const int32_t * biasData,const Shape & biasShape,int32_t paddingLeft,int32_t paddingRight,int32_t paddingTop,int32_t paddingBottom,int32_t strideWidth,int32_t strideHeight,int32_t dilationWidthFactor,int32_t dilationHeightFactor,int32_t depthMultiplier,int32_t activation,uint8_t * outputData,const Shape & outputShape)146 bool depthwiseConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
147                                    const int8_t* filterData, const Shape& filterShape,
148                                    const float* filterScales, const int32_t* biasData,
149                                    const Shape& biasShape, int32_t paddingLeft,
150                                    int32_t paddingRight, int32_t paddingTop, int32_t paddingBottom,
151                                    int32_t strideWidth, int32_t strideHeight,
152                                    int32_t dilationWidthFactor, int32_t dilationHeightFactor,
153 
154                                    int32_t depthMultiplier, int32_t activation, uint8_t* outputData,
155                                    const Shape& outputShape) {
156     NNTRACE_TRANS("depthwiseConvQuant8");
157 
158     uint32_t paddingHeight = (uint32_t)paddingTop;
159     uint32_t paddingWidth = (uint32_t)paddingLeft;
160 
161     uint32_t numBatches = getSizeOfDimension(inputShape, 0);
162     uint32_t inputHeight = getSizeOfDimension(inputShape, 1);
163     uint32_t inputWidth = getSizeOfDimension(inputShape, 2);
164     uint32_t inputDepth = getSizeOfDimension(inputShape, 3);
165     uint32_t filterHeight = getSizeOfDimension(filterShape, 1);
166     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);
167     uint32_t filterDepth = getSizeOfDimension(filterShape, 3);
168     uint32_t outputHeight = getSizeOfDimension(outputShape, 1);
169     uint32_t outputWidth = getSizeOfDimension(outputShape, 2);
170     uint32_t outputDepth = getSizeOfDimension(outputShape, 3);
171 
172     int32_t inputOffset = -inputShape.offset;
173     int32_t outputOffset = outputShape.offset;
174 
175     auto realMultiplier = std::vector<double>(outputDepth, .0f);
176     auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
177     auto outputShift = std::vector<int32_t>(outputDepth, .0f);
178 
179     for (int i = 0; i < outputDepth; ++i) {
180         Shape filterChannelShape = filterShape;
181         filterChannelShape.scale = filterScales[i];
182         Shape biasChannelShape = biasShape;
183         biasChannelShape.scale = filterScales[i] * inputShape.scale;
184         NN_RET_CHECK(GetQuantizedConvolutionMultipler(
185                 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
186         int exponent;
187         NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
188         outputShift[i] = -exponent;
189     }
190 
191     int32_t output_activation_min = 0, output_activation_max = 0;
192     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
193                                   &output_activation_max);
194 
195     const uint8_t* inputBase = inputData;
196     uint8_t* outPtr = outputData;
197     for (uint32_t b = 0; b < numBatches; b++) {
198         for (uint32_t h = 0; h < outputHeight; h++) {
199             for (uint32_t w = 0; w < outputWidth; w++) {
200                 for (uint32_t ic = 0; ic < inputDepth; ic++) {
201                     for (uint32_t m = 0; m < depthMultiplier; m++) {
202                         int32_t wInputOrigin = static_cast<int32_t>(w) * strideWidth - paddingLeft;
203                         int32_t hInputOrigin = static_cast<int32_t>(h) * strideHeight - paddingTop;
204                         const int oc = m + ic * depthMultiplier;
205 
206                         int32_t sum = 0.0f;
207                         for (uint32_t i = 0; i < filterHeight; i++) {
208                             for (uint32_t j = 0; j < filterWidth; j++) {
209                                 int32_t hInput = hInputOrigin +
210                                                  dilationHeightFactor * static_cast<int32_t>(i);
211                                 int32_t wInput = wInputOrigin +
212                                                  dilationWidthFactor * static_cast<int32_t>(j);
213 
214                                 if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
215                                     wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
216                                     uint32_t filterIndex =
217                                             i * filterWidth * filterDepth + j * filterDepth + oc;
218                                     uint32_t inputIndex = hInput * inputWidth * inputDepth +
219                                                           wInput * inputDepth + ic;
220                                     sum += (static_cast<int32_t>(filterData[filterIndex])) *
221                                            (static_cast<int32_t>(inputBase[inputIndex]) +
222                                             inputOffset);
223                                 }
224                             }
225                         }
226 
227                         sum += biasData[oc];
228                         sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier[oc],
229                                                                     -outputShift[oc]);
230                         sum += outputOffset;
231                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
232                         outPtr[m] = static_cast<uint8_t>(sum);
233                     }
234                     outPtr += depthMultiplier;
235                 }
236             }
237         }
238         inputBase += inputHeight * inputWidth * inputDepth;
239     }
240 
241     return true;
242 }
243 
244 #undef ANDROID_NN_DEPTHWISE_CONV_PARAMETERS
245 }  // namespace nn
246 }  // namespace android
247