1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <cassert>
16 #include <cmath>
17 #include <cstdio>
18 #include <cstdlib>
19 #include <iostream>
20 #include <limits>
21 
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/c_api_internal.h"
24 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h"
25 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
26 #include "tensorflow/lite/kernels/internal/quantization_util.h"
27 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
28 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
29 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
30 #include "tensorflow/lite/kernels/internal/tensor.h"
31 #include "tensorflow/lite/kernels/kernel_util.h"
32 #include "tensorflow/lite/kernels/op_macros.h"
33 #include "tensorflow/lite/kernels/padding.h"
34 
35 namespace tflite {
36 namespace ops {
37 namespace builtin {
38 namespace depthwise_conv {
39 
40 constexpr int kInputTensor = 0;
41 constexpr int kFilterTensor = 1;
42 constexpr int kBiasTensor = 2;
43 constexpr int kOutputTensor = 0;
44 
45 // This file has three implementation of DepthwiseConv.
46 enum KernelType {
47   kReference,
48   kGenericOptimized,  // Neon-free
49   kNeonOptimized,
50 };
51 
52 struct OpData {
53   TfLitePaddingValues padding;
54   // The scaling factor from input to output (aka the 'real multiplier') can
55   // be represented as a fixed point multiplier plus a left shift.
56   int32_t output_multiplier;
57   int output_shift;
58   // The range of the fused activation layer. For example for kNone and
59   // uint8_t these would be 0 and 255.
60   int32_t output_activation_min;
61   int32_t output_activation_max;
62 
63   // Per channel output multiplier and shift.
64   std::vector<int32_t> per_channel_output_multiplier;
65   std::vector<int> per_channel_output_shift;
66 };
67 
Init(TfLiteContext * context,const char * buffer,size_t length)68 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
69   // This is a builtin op, so we don't use the contents in 'buffer', if any.
70   // Instead, we allocate a new object to carry information from Prepare() to
71   // Eval().
72   return new OpData;
73 }
74 
Free(TfLiteContext * context,void * buffer)75 void Free(TfLiteContext* context, void* buffer) {
76   delete reinterpret_cast<OpData*>(buffer);
77 }
78 
Prepare(TfLiteContext * context,TfLiteNode * node)79 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
80   auto* params =
81       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
82   OpData* data = reinterpret_cast<OpData*>(node->user_data);
83 
84   // TODO(ahentz): use could use GetOptionalInputTensor() here, but we need to
85   // decide whether we are OK with optional tensors being completely absent, as
86   // opposed to having -1 as their index.
87   bool hasBias = NumInputs(node) == 3;
88 
89   TF_LITE_ENSURE(context, hasBias || NumInputs(node) == 2);
90   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
91   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
92   const TfLiteTensor* bias = nullptr;
93 
94   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
95   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
96 
97   TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
98   TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
99 
100   // The parameter 'depth_multiplier' is redundant, so we check here to make
101   // sure it is consistent with the given dimensions.
102   TF_LITE_ENSURE_EQ(context,
103                     params->depth_multiplier * SizeOfDimension(input, 3),
104                     SizeOfDimension(filter, 3));
105 
106   const TfLiteType data_type = input->type;
107   TF_LITE_ENSURE(context, data_type == kTfLiteFloat32 ||
108                               data_type == kTfLiteUInt8 ||
109                               data_type == kTfLiteInt8);
110   TF_LITE_ENSURE_EQ(context, output->type, data_type);
111   TF_LITE_ENSURE_EQ(context, filter->type, data_type);
112 
113   if (hasBias) {
114     bias = GetInput(context, node, kBiasTensor);
115     if (data_type == kTfLiteUInt8 || data_type == kTfLiteInt8) {
116       TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
117       TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
118     } else {
119       TF_LITE_ENSURE_EQ(context, bias->type, data_type);
120     }
121     TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1);
122     TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3),
123                       SizeOfDimension(bias, 0));
124   }
125 
126   int channels_out = SizeOfDimension(filter, 3);
127   int width = SizeOfDimension(input, 2);
128   int height = SizeOfDimension(input, 1);
129   int filter_width = SizeOfDimension(filter, 2);
130   int filter_height = SizeOfDimension(filter, 1);
131   int batches = SizeOfDimension(input, 0);
132 
133   // Matching GetWindowedOutputSize in TensorFlow.
134   auto padding = params->padding;
135   auto compute_out_size = [padding](int image_size, int filter_size, int stride,
136                                     int dilation_rate) -> int {
137     int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
138     return padding == kTfLitePaddingSame
139                ? (image_size + stride - 1) / stride
140                : padding == kTfLitePaddingValid
141                      ? (image_size - effective_filter_size + stride) / stride
142                      : 0;
143   };
144 
145   int out_width = compute_out_size(width, filter_width, params->stride_width,
146                                    params->dilation_width_factor);
147   int out_height =
148       compute_out_size(height, filter_height, params->stride_height,
149                        params->dilation_height_factor);
150 
151   data->padding.height =
152       ComputePadding(params->stride_height, params->dilation_height_factor,
153                      height, filter_height, out_height);
154   data->padding.width =
155       ComputePadding(params->stride_width, params->dilation_width_factor, width,
156                      filter_width, out_width);
157 
158   // Note that quantized inference requires that all tensors have their
159   // parameters set. This is usually done during quantized training or
160   // calibration.
161   if (data_type != kTfLiteFloat32) {
162     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
163                       kTfLiteAffineQuantization);
164     const auto* affine_quantization =
165         reinterpret_cast<TfLiteAffineQuantization*>(
166             filter->quantization.params);
167     TF_LITE_ENSURE(context, affine_quantization);
168     TF_LITE_ENSURE(context, affine_quantization->scale);
169     const int number_channel = affine_quantization->scale->size;
170     data->per_channel_output_multiplier.resize(number_channel);
171     data->per_channel_output_shift.resize(number_channel);
172     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
173         context, input, filter, bias, output, params->activation,
174         &data->output_multiplier, &data->output_shift,
175         &data->output_activation_min, &data->output_activation_max,
176         data->per_channel_output_multiplier.data(),
177         data->per_channel_output_shift.data()));
178   }
179 
180   TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4);
181   outputSize->data[0] = batches;
182   outputSize->data[1] = out_height;
183   outputSize->data[2] = out_width;
184   outputSize->data[3] = channels_out;
185   return context->ResizeTensor(context, output, outputSize);
186 }
187 
188 template <KernelType kernel_type>
EvalFloat(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)189 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
190                TfLiteDepthwiseConvParams* params, OpData* data,
191                const TfLiteTensor* input, const TfLiteTensor* filter,
192                const TfLiteTensor* bias, TfLiteTensor* output) {
193   float output_activation_min, output_activation_max;
194   CalculateActivationRange(params->activation, &output_activation_min,
195                            &output_activation_max);
196 
197   void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
198                          const float*, const RuntimeShape&, const float*,
199                          const RuntimeShape&, const float*, const RuntimeShape&,
200                          float*);
201   if (kernel_type == kReference) {
202     depthwise_conv = &reference_ops::DepthwiseConv;
203   } else {
204     depthwise_conv = &optimized_ops::DepthwiseConv;
205   }
206 
207   DepthwiseParams op_params;
208   op_params.padding_type = PaddingType::kSame;
209   op_params.padding_values.width = data->padding.width;
210   op_params.padding_values.height = data->padding.height;
211   op_params.stride_width = params->stride_width;
212   op_params.stride_height = params->stride_height;
213   op_params.dilation_width_factor = params->dilation_width_factor;
214   op_params.dilation_height_factor = params->dilation_height_factor;
215   op_params.depth_multiplier = params->depth_multiplier;
216   op_params.float_activation_min = output_activation_min;
217   op_params.float_activation_max = output_activation_max;
218   depthwise_conv(op_params, GetTensorShape(input), GetTensorData<float>(input),
219                  GetTensorShape(filter), GetTensorData<float>(filter),
220                  GetTensorShape(bias), GetTensorData<float>(bias),
221                  GetTensorShape(output), GetTensorData<float>(output));
222 }
223 
224 template <KernelType kernel_type>
EvalQuantized(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)225 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
226                    TfLiteDepthwiseConvParams* params, OpData* data,
227                    const TfLiteTensor* input, const TfLiteTensor* filter,
228                    const TfLiteTensor* bias, TfLiteTensor* output) {
229   auto input_offset = -input->params.zero_point;
230   auto filter_offset = -filter->params.zero_point;
231   auto output_offset = output->params.zero_point;
232 
233   void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
234                          const uint8*, const RuntimeShape&, const uint8*,
235                          const RuntimeShape&, const int32*, const RuntimeShape&,
236                          uint8*);
237 
238   if (kernel_type == kReference) {
239     depthwise_conv = &reference_ops::DepthwiseConv;
240   } else {
241     depthwise_conv = &optimized_ops::DepthwiseConv;
242   }
243 
244   DepthwiseParams op_params;
245   op_params.padding_type = PaddingType::kSame;
246   op_params.padding_values.width = data->padding.width;
247   op_params.padding_values.height = data->padding.height;
248   op_params.stride_width = params->stride_width;
249   op_params.stride_height = params->stride_height;
250   op_params.dilation_width_factor = params->dilation_width_factor;
251   op_params.dilation_height_factor = params->dilation_height_factor;
252   op_params.depth_multiplier = params->depth_multiplier;
253   op_params.input_offset = input_offset;
254   op_params.weights_offset = filter_offset;
255   op_params.output_offset = output_offset;
256   op_params.output_multiplier = data->output_multiplier;
257   op_params.output_shift = -data->output_shift;
258   op_params.quantized_activation_min = data->output_activation_min;
259   op_params.quantized_activation_max = data->output_activation_max;
260   depthwise_conv(op_params, GetTensorShape(input),
261                  GetTensorData<uint8_t>(input), GetTensorShape(filter),
262                  GetTensorData<uint8_t>(filter), GetTensorShape(bias),
263                  GetTensorData<int32_t>(bias), GetTensorShape(output),
264                  GetTensorData<uint8_t>(output));
265 }
266 
EvalQuantizedPerChannel(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)267 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
268                              TfLiteDepthwiseConvParams* params, OpData* data,
269                              const TfLiteTensor* input,
270                              const TfLiteTensor* filter,
271                              const TfLiteTensor* bias, TfLiteTensor* output) {
272   DepthwiseParams op_params;
273   op_params.padding_type = PaddingType::kSame;
274   op_params.padding_values.width = data->padding.width;
275   op_params.padding_values.height = data->padding.height;
276   op_params.stride_width = params->stride_width;
277   op_params.stride_height = params->stride_height;
278   op_params.dilation_width_factor = params->dilation_width_factor;
279   op_params.dilation_height_factor = params->dilation_height_factor;
280   op_params.depth_multiplier = params->depth_multiplier;
281   op_params.input_offset = input->params.zero_point;
282   op_params.weights_offset = 0;
283   op_params.output_offset = output->params.zero_point;
284 
285   reference_integer_ops::DepthwiseConvPerChannel(
286       op_params, data->per_channel_output_multiplier.data(),
287       data->per_channel_output_shift.data(), GetTensorShape(input),
288       GetTensorData<int8>(input), GetTensorShape(filter),
289       GetTensorData<int8>(filter), GetTensorShape(bias),
290       GetTensorData<int32>(bias), GetTensorShape(output),
291       GetTensorData<int8>(output));
292 }
293 
294 template <KernelType kernel_type>
Eval(TfLiteContext * context,TfLiteNode * node)295 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
296   auto* params =
297       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
298   OpData* data = reinterpret_cast<OpData*>(node->user_data);
299 
300   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
301   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
302   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
303   const TfLiteTensor* bias =
304       (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
305 
306   // TODO(aselle): Consider whether float conv and quantized conv should be
307   // separate ops to avoid dispatch overhead here.
308   switch (input->type) {  // Already know in/out types are same.
309     case kTfLiteFloat32:
310       EvalFloat<kernel_type>(context, node, params, data, input, filter, bias,
311                              output);
312       break;
313     case kTfLiteUInt8:
314       EvalQuantized<kernel_type>(context, node, params, data, input, filter,
315                                  bias, output);
316       break;
317     case kTfLiteInt8: {
318       EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
319                               output);
320       break;
321     }
322     default:
323       context->ReportError(context, "Type %d not currently supported.",
324                            input->type);
325       return kTfLiteError;
326   }
327   return kTfLiteOk;
328 }
329 
330 }  // namespace depthwise_conv
331 
Register_DEPTHWISE_CONVOLUTION_REF()332 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
333   static TfLiteRegistration r = {
334       depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
335       depthwise_conv::Eval<depthwise_conv::kReference>};
336   return &r;
337 }
338 
Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()339 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
340   static TfLiteRegistration r = {
341       depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
342       depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
343   return &r;
344 }
345 
Register_DEPTHWISE_CONVOLUTION_NEON_OPT()346 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
347   static TfLiteRegistration r = {
348       depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
349       depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
350   return &r;
351 }
352 
Register_DEPTHWISE_CONV_2D()353 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
354 #ifdef USE_NEON
355   return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
356 #else
357   return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
358 #endif
359 }
360 
361 }  // namespace builtin
362 }  // namespace ops
363 }  // namespace tflite
364