1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "CpuOperationUtils.h"
18 #include "Operations.h"
19 
20 #include <cfloat>
21 #include <cmath>
22 
23 #include "Tracing.h"
24 #include "tensorflow/lite/kernels/internal/common.h"
25 
26 namespace android {
27 namespace nn {
28 
29 #define ANDROID_NN_GROUPED_CONV_PARAMETERS                      \
30     uint32_t numBatches = getSizeOfDimension(inputShape, 0);    \
31     uint32_t inputHeight = getSizeOfDimension(inputShape, 1);   \
32     uint32_t inputWidth = getSizeOfDimension(inputShape, 2);    \
33     uint32_t inputDepth = getSizeOfDimension(inputShape, 3);    \
34     uint32_t filterHeight = getSizeOfDimension(filterShape, 1); \
35     uint32_t filterWidth = getSizeOfDimension(filterShape, 2);  \
36     uint32_t filterDepth = getSizeOfDimension(filterShape, 3);  \
37     uint32_t outputHeight = getSizeOfDimension(outputShape, 1); \
38     uint32_t outputWidth = getSizeOfDimension(outputShape, 2);  \
39     uint32_t outputDepth = getSizeOfDimension(outputShape, 3);  \
40     uint32_t outputGroupDepth = outputDepth / numGroups;
41 
groupedConvFloat32(const float * inputData,const Shape & inputShape,const float * filterData,const Shape & filterShape,const float * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,float * outputData,const Shape & outputShape)42 bool groupedConvFloat32(const float* inputData, const Shape& inputShape, const float* filterData,
43                         const Shape& filterShape, const float* biasData, const Shape& biasShape,
44                         int32_t padding_left, int32_t padding_right, int32_t padding_top,
45                         int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
46                         int32_t numGroups, int32_t activation, float* outputData,
47                         const Shape& outputShape) {
48     NNTRACE_TRANS("groupConvFloat32");
49     ANDROID_NN_GROUPED_CONV_PARAMETERS
50 
51     float output_activation_min = 0.0f, output_activation_max = 0.0f;
52     CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
53 
54     const float* inputBase = inputData;
55     float* outPtr = outputData;
56     for (uint32_t b = 0; b < numBatches; b++) {
57         for (uint32_t h = 0; h < outputHeight; h++) {
58             for (uint32_t w = 0; w < outputWidth; w++) {
59                 const float* filterBase = filterData;
60                 for (uint32_t g = 0; g < numGroups; g++) {
61                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
62                         int32_t wInputOrigin =
63                                 static_cast<int32_t>(w) * stride_width - padding_left;
64                         int32_t hInputOrigin =
65                                 static_cast<int32_t>(h) * stride_height - padding_top;
66                         float sum = 0.0f;
67                         for (uint32_t i = 0; i < filterHeight; i++) {
68                             for (uint32_t j = 0; j < filterWidth; j++) {
69                                 for (uint32_t k = 0; k < filterDepth; k++) {
70                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
71                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
72                                     uint32_t dInput = filterDepth * g + k;
73                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
74                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
75                                         uint32_t filterIndex =
76                                                 i * filterWidth * filterDepth + j * filterDepth + k;
77                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
78                                                               wInput * inputDepth + dInput;
79                                         sum += filterBase[filterIndex] * inputBase[inputIndex];
80                                     }
81                                 }
82                             }
83                         }
84                         sum += biasData[g * outputGroupDepth + d];
85                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
86                         outPtr[d] = sum;
87                         filterBase += filterHeight * filterWidth * filterDepth;
88                     }
89                     outPtr += outputGroupDepth;
90                 }
91             }
92         }
93         inputBase += inputHeight * inputWidth * inputDepth;
94     }
95 
96     return true;
97 }
98 
groupedConvQuant8(const uint8_t * inputData,const Shape & inputShape,const uint8_t * filterData,const Shape & filterShape,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,uint8_t * outputData,const Shape & outputShape)99 bool groupedConvQuant8(const uint8_t* inputData, const Shape& inputShape, const uint8_t* filterData,
100                        const Shape& filterShape, const int32_t* biasData, const Shape& biasShape,
101                        int32_t padding_left, int32_t padding_right, int32_t padding_top,
102                        int32_t padding_bottom, int32_t stride_width, int32_t stride_height,
103                        int32_t numGroups, int32_t activation, uint8_t* outputData,
104                        const Shape& outputShape) {
105     NNTRACE_TRANS("groupConvQuant8");
106     ANDROID_NN_GROUPED_CONV_PARAMETERS
107 
108     int32_t inputOffset = -inputShape.offset;
109     int32_t filterOffset = -filterShape.offset;
110     int32_t outputOffset = outputShape.offset;
111 
112     double realMultiplier = 0.0;
113     int32_t outputMultiplier = 0;
114     int32_t outputShift = 0;
115     NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, filterShape, biasShape, outputShape,
116                                                   &realMultiplier));
117     int exponent;
118     NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
119     outputShift = -exponent;
120 
121     int32_t output_activation_min = 0, output_activation_max = 0;
122     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
123                                   &output_activation_max);
124 
125     const uint8_t* inputBase = inputData;
126     uint8_t* outPtr = outputData;
127     for (uint32_t b = 0; b < numBatches; b++) {
128         for (uint32_t h = 0; h < outputHeight; h++) {
129             for (uint32_t w = 0; w < outputWidth; w++) {
130                 const uint8_t* filterBase = filterData;
131                 for (uint32_t g = 0; g < numGroups; g++) {
132                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
133                         int32_t wInputOrigin =
134                                 static_cast<int32_t>(w) * stride_width - padding_left;
135                         int32_t hInputOrigin =
136                                 static_cast<int32_t>(h) * stride_height - padding_top;
137                         int32_t sum = 0.0f;
138                         for (uint32_t i = 0; i < filterHeight; i++) {
139                             for (uint32_t j = 0; j < filterWidth; j++) {
140                                 for (uint32_t k = 0; k < filterDepth; k++) {
141                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
142                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
143                                     uint32_t dInput = filterDepth * g + k;
144                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
145                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
146                                         uint32_t filterIndex =
147                                                 i * filterWidth * filterDepth + j * filterDepth + k;
148                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
149                                                               wInput * inputDepth + dInput;
150                                         sum += (static_cast<int32_t>(filterBase[filterIndex]) +
151                                                 filterOffset) *
152                                                (static_cast<int32_t>(inputBase[inputIndex]) +
153                                                 inputOffset);
154                                     }
155                                 }
156                             }
157                         }
158                         sum += biasData[g * outputGroupDepth + d];
159                         sum = tflite::MultiplyByQuantizedMultiplier(sum, outputMultiplier,
160                                                                     -outputShift);
161                         sum += outputOffset;
162                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
163                         outPtr[d] = static_cast<uint8_t>(sum);
164                         filterBase += filterHeight * filterWidth * filterDepth;
165                     }
166                     outPtr += outputGroupDepth;
167                 }
168             }
169         }
170         inputBase += inputHeight * inputWidth * inputDepth;
171     }
172 
173     return true;
174 }
175 
groupedConvQuant8PerChannel(const uint8_t * inputData,const Shape & inputShape,const int8_t * filterData,const Shape & filterShape,const float * filterScales,const int32_t * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,uint8_t * outputData,const Shape & outputShape)176 bool groupedConvQuant8PerChannel(const uint8_t* inputData, const Shape& inputShape,
177                                  const int8_t* filterData, const Shape& filterShape,
178                                  const float* filterScales, const int32_t* biasData,
179                                  const Shape& biasShape, int32_t padding_left,
180                                  int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
181                                  int32_t stride_width, int32_t stride_height, int32_t numGroups,
182                                  int32_t activation, uint8_t* outputData,
183                                  const Shape& outputShape) {
184     NNTRACE_TRANS("groupConvQuant8");
185     ANDROID_NN_GROUPED_CONV_PARAMETERS
186 
187     int32_t inputOffset = -inputShape.offset;
188     int32_t outputOffset = outputShape.offset;
189 
190     auto realMultiplier = std::vector<double>(outputDepth, .0f);
191     auto outputMultiplier = std::vector<int32_t>(outputDepth, 0);
192     auto outputShift = std::vector<int32_t>(outputDepth, 0);
193 
194     for (int i = 0; i < outputDepth; ++i) {
195         Shape filterChannelShape = filterShape;
196         filterChannelShape.scale = filterScales[i];
197         Shape biasChannelShape = biasShape;
198         biasChannelShape.scale = filterScales[i] * inputShape.scale;
199 
200         NN_RET_CHECK(GetQuantizedConvolutionMultipler(
201                 inputShape, filterChannelShape, biasChannelShape, outputShape, &realMultiplier[i]));
202         int exponent;
203         NN_RET_CHECK(QuantizeMultiplier(realMultiplier[i], &outputMultiplier[i], &exponent));
204         outputShift[i] = -exponent;
205     }
206 
207     int32_t output_activation_min = 0, output_activation_max = 0;
208     CalculateActivationRangeUint8(activation, outputShape, &output_activation_min,
209                                   &output_activation_max);
210 
211     const uint8_t* inputBase = inputData;
212     uint8_t* outPtr = outputData;
213     for (uint32_t b = 0; b < numBatches; b++) {
214         for (uint32_t h = 0; h < outputHeight; h++) {
215             for (uint32_t w = 0; w < outputWidth; w++) {
216                 const int8_t* filterBase = filterData;
217                 for (uint32_t g = 0; g < numGroups; g++) {
218                     for (uint32_t d = 0; d < outputGroupDepth; d++) {
219                         int32_t wInputOrigin =
220                                 static_cast<int32_t>(w) * stride_width - padding_left;
221                         int32_t hInputOrigin =
222                                 static_cast<int32_t>(h) * stride_height - padding_top;
223                         int32_t sum = 0.0f;
224                         for (uint32_t i = 0; i < filterHeight; i++) {
225                             for (uint32_t j = 0; j < filterWidth; j++) {
226                                 for (uint32_t k = 0; k < filterDepth; k++) {
227                                     int32_t hInput = hInputOrigin + static_cast<int32_t>(i);
228                                     int32_t wInput = wInputOrigin + static_cast<int32_t>(j);
229                                     uint32_t dInput = filterDepth * g + k;
230                                     if (hInput >= 0 && hInput < static_cast<int32_t>(inputHeight) &&
231                                         wInput >= 0 && wInput < static_cast<int32_t>(inputWidth)) {
232                                         uint32_t filterIndex =
233                                                 i * filterWidth * filterDepth + j * filterDepth + k;
234                                         uint32_t inputIndex = hInput * inputWidth * inputDepth +
235                                                               wInput * inputDepth + dInput;
236                                         sum += (static_cast<int32_t>(filterBase[filterIndex])) *
237                                                (static_cast<int32_t>(inputBase[inputIndex]) +
238                                                 inputOffset);
239                                     }
240                                 }
241                             }
242                         }
243                         int channelIndex = g * outputGroupDepth + d;
244                         sum += biasData[channelIndex];
245                         sum = tflite::MultiplyByQuantizedMultiplier(
246                                 sum, outputMultiplier[channelIndex], -outputShift[channelIndex]);
247                         sum += outputOffset;
248                         sum = std::max(std::min(sum, output_activation_max), output_activation_min);
249                         outPtr[d] = static_cast<uint8_t>(sum);
250                         filterBase += filterHeight * filterWidth * filterDepth;
251                     }
252                     outPtr += outputGroupDepth;
253                 }
254             }
255         }
256         inputBase += inputHeight * inputWidth * inputDepth;
257     }
258 
259     return true;
260 }
261 
groupedConvFloat16(const _Float16 * inputData,const Shape & inputShape,const _Float16 * filterData,const Shape & filterShape,const _Float16 * biasData,const Shape & biasShape,int32_t padding_left,int32_t padding_right,int32_t padding_top,int32_t padding_bottom,int32_t stride_width,int32_t stride_height,int32_t numGroups,int32_t activation,_Float16 * outputData,const Shape & outputShape)262 bool groupedConvFloat16(const _Float16* inputData, const Shape& inputShape,
263                         const _Float16* filterData, const Shape& filterShape,
264                         const _Float16* biasData, const Shape& biasShape, int32_t padding_left,
265                         int32_t padding_right, int32_t padding_top, int32_t padding_bottom,
266                         int32_t stride_width, int32_t stride_height, int32_t numGroups,
267                         int32_t activation, _Float16* outputData, const Shape& outputShape) {
268     NNTRACE_TRANS("groupConvFloat16");
269 
270     std::vector<float> inputData_float32(getNumberOfElements(inputShape));
271     std::vector<float> filterData_float32(getNumberOfElements(filterShape));
272     std::vector<float> biasData_float32(getNumberOfElements(biasShape));
273     std::vector<float> outputData_float32(getNumberOfElements(outputShape));
274 
275     convertFloat16ToFloat32(inputData, &inputData_float32);
276     convertFloat16ToFloat32(filterData, &filterData_float32);
277     convertFloat16ToFloat32(biasData, &biasData_float32);
278 
279     groupedConvFloat32(inputData_float32.data(), inputShape, filterData_float32.data(), filterShape,
280                        biasData_float32.data(), biasShape, padding_left, padding_right, padding_top,
281                        padding_bottom, stride_width, stride_height, numGroups, activation,
282                        outputData_float32.data(), outputShape);
283     convertFloat32ToFloat16(outputData_float32, outputData);
284 
285     return true;
286 }
287 
288 #undef ANDROID_NN_GROUPED_CONV_PARAMETERS
289 }  // namespace nn
290 }  // namespace android
291