1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 #include <cassert>
16 #include <cmath>
17 #include <cstdio>
18 #include <cstdlib>
19 #include <iostream>
20 #include <limits>
21
22 #include "tensorflow/lite/c/builtin_op_data.h"
23 #include "tensorflow/lite/c/c_api_internal.h"
24 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_float.h"
25 #include "tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8.h"
26 #include "tensorflow/lite/kernels/internal/quantization_util.h"
27 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_float.h"
28 #include "tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h"
29 #include "tensorflow/lite/kernels/internal/reference/integer_ops/depthwise_conv.h"
30 #include "tensorflow/lite/kernels/internal/tensor.h"
31 #include "tensorflow/lite/kernels/kernel_util.h"
32 #include "tensorflow/lite/kernels/op_macros.h"
33 #include "tensorflow/lite/kernels/padding.h"
34
35 namespace tflite {
36 namespace ops {
37 namespace builtin {
38 namespace depthwise_conv {
39
40 constexpr int kInputTensor = 0;
41 constexpr int kFilterTensor = 1;
42 constexpr int kBiasTensor = 2;
43 constexpr int kOutputTensor = 0;
44
45 // This file has three implementation of DepthwiseConv.
46 enum KernelType {
47 kReference,
48 kGenericOptimized, // Neon-free
49 kNeonOptimized,
50 };
51
52 struct OpData {
53 TfLitePaddingValues padding;
54 // The scaling factor from input to output (aka the 'real multiplier') can
55 // be represented as a fixed point multiplier plus a left shift.
56 int32_t output_multiplier;
57 int output_shift;
58 // The range of the fused activation layer. For example for kNone and
59 // uint8_t these would be 0 and 255.
60 int32_t output_activation_min;
61 int32_t output_activation_max;
62
63 // Per channel output multiplier and shift.
64 std::vector<int32_t> per_channel_output_multiplier;
65 std::vector<int> per_channel_output_shift;
66 };
67
Init(TfLiteContext * context,const char * buffer,size_t length)68 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
69 // This is a builtin op, so we don't use the contents in 'buffer', if any.
70 // Instead, we allocate a new object to carry information from Prepare() to
71 // Eval().
72 return new OpData;
73 }
74
Free(TfLiteContext * context,void * buffer)75 void Free(TfLiteContext* context, void* buffer) {
76 delete reinterpret_cast<OpData*>(buffer);
77 }
78
Prepare(TfLiteContext * context,TfLiteNode * node)79 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
80 auto* params =
81 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
82 OpData* data = reinterpret_cast<OpData*>(node->user_data);
83
84 // TODO(ahentz): use could use GetOptionalInputTensor() here, but we need to
85 // decide whether we are OK with optional tensors being completely absent, as
86 // opposed to having -1 as their index.
87 bool hasBias = NumInputs(node) == 3;
88
89 TF_LITE_ENSURE(context, hasBias || NumInputs(node) == 2);
90 const TfLiteTensor* input = GetInput(context, node, kInputTensor);
91 const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
92 const TfLiteTensor* bias = nullptr;
93
94 TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
95 TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
96
97 TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
98 TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
99
100 // The parameter 'depth_multiplier' is redundant, so we check here to make
101 // sure it is consistent with the given dimensions.
102 TF_LITE_ENSURE_EQ(context,
103 params->depth_multiplier * SizeOfDimension(input, 3),
104 SizeOfDimension(filter, 3));
105
106 const TfLiteType data_type = input->type;
107 TF_LITE_ENSURE(context, data_type == kTfLiteFloat32 ||
108 data_type == kTfLiteUInt8 ||
109 data_type == kTfLiteInt8);
110 TF_LITE_ENSURE_EQ(context, output->type, data_type);
111 TF_LITE_ENSURE_EQ(context, filter->type, data_type);
112
113 if (hasBias) {
114 bias = GetInput(context, node, kBiasTensor);
115 if (data_type == kTfLiteUInt8 || data_type == kTfLiteInt8) {
116 TF_LITE_ENSURE_EQ(context, bias->type, kTfLiteInt32);
117 TF_LITE_ENSURE_EQ(context, bias->params.zero_point, 0);
118 } else {
119 TF_LITE_ENSURE_EQ(context, bias->type, data_type);
120 }
121 TF_LITE_ENSURE_EQ(context, NumDimensions(bias), 1);
122 TF_LITE_ENSURE_EQ(context, SizeOfDimension(filter, 3),
123 SizeOfDimension(bias, 0));
124 }
125
126 int channels_out = SizeOfDimension(filter, 3);
127 int width = SizeOfDimension(input, 2);
128 int height = SizeOfDimension(input, 1);
129 int filter_width = SizeOfDimension(filter, 2);
130 int filter_height = SizeOfDimension(filter, 1);
131 int batches = SizeOfDimension(input, 0);
132
133 // Matching GetWindowedOutputSize in TensorFlow.
134 auto padding = params->padding;
135 auto compute_out_size = [padding](int image_size, int filter_size, int stride,
136 int dilation_rate) -> int {
137 int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
138 return padding == kTfLitePaddingSame
139 ? (image_size + stride - 1) / stride
140 : padding == kTfLitePaddingValid
141 ? (image_size - effective_filter_size + stride) / stride
142 : 0;
143 };
144
145 int out_width = compute_out_size(width, filter_width, params->stride_width,
146 params->dilation_width_factor);
147 int out_height =
148 compute_out_size(height, filter_height, params->stride_height,
149 params->dilation_height_factor);
150
151 data->padding.height =
152 ComputePadding(params->stride_height, params->dilation_height_factor,
153 height, filter_height, out_height);
154 data->padding.width =
155 ComputePadding(params->stride_width, params->dilation_width_factor, width,
156 filter_width, out_width);
157
158 // Note that quantized inference requires that all tensors have their
159 // parameters set. This is usually done during quantized training or
160 // calibration.
161 if (data_type != kTfLiteFloat32) {
162 TF_LITE_ENSURE_EQ(context, filter->quantization.type,
163 kTfLiteAffineQuantization);
164 const auto* affine_quantization =
165 reinterpret_cast<TfLiteAffineQuantization*>(
166 filter->quantization.params);
167 TF_LITE_ENSURE(context, affine_quantization);
168 TF_LITE_ENSURE(context, affine_quantization->scale);
169 const int number_channel = affine_quantization->scale->size;
170 data->per_channel_output_multiplier.resize(number_channel);
171 data->per_channel_output_shift.resize(number_channel);
172 TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
173 context, input, filter, bias, output, params->activation,
174 &data->output_multiplier, &data->output_shift,
175 &data->output_activation_min, &data->output_activation_max,
176 data->per_channel_output_multiplier.data(),
177 data->per_channel_output_shift.data()));
178 }
179
180 TfLiteIntArray* outputSize = TfLiteIntArrayCreate(4);
181 outputSize->data[0] = batches;
182 outputSize->data[1] = out_height;
183 outputSize->data[2] = out_width;
184 outputSize->data[3] = channels_out;
185 return context->ResizeTensor(context, output, outputSize);
186 }
187
188 template <KernelType kernel_type>
EvalFloat(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)189 void EvalFloat(TfLiteContext* context, TfLiteNode* node,
190 TfLiteDepthwiseConvParams* params, OpData* data,
191 const TfLiteTensor* input, const TfLiteTensor* filter,
192 const TfLiteTensor* bias, TfLiteTensor* output) {
193 float output_activation_min, output_activation_max;
194 CalculateActivationRange(params->activation, &output_activation_min,
195 &output_activation_max);
196
197 void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
198 const float*, const RuntimeShape&, const float*,
199 const RuntimeShape&, const float*, const RuntimeShape&,
200 float*);
201 if (kernel_type == kReference) {
202 depthwise_conv = &reference_ops::DepthwiseConv;
203 } else {
204 depthwise_conv = &optimized_ops::DepthwiseConv;
205 }
206
207 DepthwiseParams op_params;
208 op_params.padding_type = PaddingType::kSame;
209 op_params.padding_values.width = data->padding.width;
210 op_params.padding_values.height = data->padding.height;
211 op_params.stride_width = params->stride_width;
212 op_params.stride_height = params->stride_height;
213 op_params.dilation_width_factor = params->dilation_width_factor;
214 op_params.dilation_height_factor = params->dilation_height_factor;
215 op_params.depth_multiplier = params->depth_multiplier;
216 op_params.float_activation_min = output_activation_min;
217 op_params.float_activation_max = output_activation_max;
218 depthwise_conv(op_params, GetTensorShape(input), GetTensorData<float>(input),
219 GetTensorShape(filter), GetTensorData<float>(filter),
220 GetTensorShape(bias), GetTensorData<float>(bias),
221 GetTensorShape(output), GetTensorData<float>(output));
222 }
223
224 template <KernelType kernel_type>
EvalQuantized(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)225 void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
226 TfLiteDepthwiseConvParams* params, OpData* data,
227 const TfLiteTensor* input, const TfLiteTensor* filter,
228 const TfLiteTensor* bias, TfLiteTensor* output) {
229 auto input_offset = -input->params.zero_point;
230 auto filter_offset = -filter->params.zero_point;
231 auto output_offset = output->params.zero_point;
232
233 void (*depthwise_conv)(const DepthwiseParams&, const RuntimeShape&,
234 const uint8*, const RuntimeShape&, const uint8*,
235 const RuntimeShape&, const int32*, const RuntimeShape&,
236 uint8*);
237
238 if (kernel_type == kReference) {
239 depthwise_conv = &reference_ops::DepthwiseConv;
240 } else {
241 depthwise_conv = &optimized_ops::DepthwiseConv;
242 }
243
244 DepthwiseParams op_params;
245 op_params.padding_type = PaddingType::kSame;
246 op_params.padding_values.width = data->padding.width;
247 op_params.padding_values.height = data->padding.height;
248 op_params.stride_width = params->stride_width;
249 op_params.stride_height = params->stride_height;
250 op_params.dilation_width_factor = params->dilation_width_factor;
251 op_params.dilation_height_factor = params->dilation_height_factor;
252 op_params.depth_multiplier = params->depth_multiplier;
253 op_params.input_offset = input_offset;
254 op_params.weights_offset = filter_offset;
255 op_params.output_offset = output_offset;
256 op_params.output_multiplier = data->output_multiplier;
257 op_params.output_shift = -data->output_shift;
258 op_params.quantized_activation_min = data->output_activation_min;
259 op_params.quantized_activation_max = data->output_activation_max;
260 depthwise_conv(op_params, GetTensorShape(input),
261 GetTensorData<uint8_t>(input), GetTensorShape(filter),
262 GetTensorData<uint8_t>(filter), GetTensorShape(bias),
263 GetTensorData<int32_t>(bias), GetTensorShape(output),
264 GetTensorData<uint8_t>(output));
265 }
266
EvalQuantizedPerChannel(TfLiteContext * context,TfLiteNode * node,TfLiteDepthwiseConvParams * params,OpData * data,const TfLiteTensor * input,const TfLiteTensor * filter,const TfLiteTensor * bias,TfLiteTensor * output)267 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
268 TfLiteDepthwiseConvParams* params, OpData* data,
269 const TfLiteTensor* input,
270 const TfLiteTensor* filter,
271 const TfLiteTensor* bias, TfLiteTensor* output) {
272 DepthwiseParams op_params;
273 op_params.padding_type = PaddingType::kSame;
274 op_params.padding_values.width = data->padding.width;
275 op_params.padding_values.height = data->padding.height;
276 op_params.stride_width = params->stride_width;
277 op_params.stride_height = params->stride_height;
278 op_params.dilation_width_factor = params->dilation_width_factor;
279 op_params.dilation_height_factor = params->dilation_height_factor;
280 op_params.depth_multiplier = params->depth_multiplier;
281 op_params.input_offset = input->params.zero_point;
282 op_params.weights_offset = 0;
283 op_params.output_offset = output->params.zero_point;
284
285 reference_integer_ops::DepthwiseConvPerChannel(
286 op_params, data->per_channel_output_multiplier.data(),
287 data->per_channel_output_shift.data(), GetTensorShape(input),
288 GetTensorData<int8>(input), GetTensorShape(filter),
289 GetTensorData<int8>(filter), GetTensorShape(bias),
290 GetTensorData<int32>(bias), GetTensorShape(output),
291 GetTensorData<int8>(output));
292 }
293
294 template <KernelType kernel_type>
Eval(TfLiteContext * context,TfLiteNode * node)295 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
296 auto* params =
297 reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
298 OpData* data = reinterpret_cast<OpData*>(node->user_data);
299
300 TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
301 const TfLiteTensor* input = GetInput(context, node, kInputTensor);
302 const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
303 const TfLiteTensor* bias =
304 (NumInputs(node) == 3) ? GetInput(context, node, kBiasTensor) : nullptr;
305
306 // TODO(aselle): Consider whether float conv and quantized conv should be
307 // separate ops to avoid dispatch overhead here.
308 switch (input->type) { // Already know in/out types are same.
309 case kTfLiteFloat32:
310 EvalFloat<kernel_type>(context, node, params, data, input, filter, bias,
311 output);
312 break;
313 case kTfLiteUInt8:
314 EvalQuantized<kernel_type>(context, node, params, data, input, filter,
315 bias, output);
316 break;
317 case kTfLiteInt8: {
318 EvalQuantizedPerChannel(context, node, params, data, input, filter, bias,
319 output);
320 break;
321 }
322 default:
323 context->ReportError(context, "Type %d not currently supported.",
324 input->type);
325 return kTfLiteError;
326 }
327 return kTfLiteOk;
328 }
329
330 } // namespace depthwise_conv
331
Register_DEPTHWISE_CONVOLUTION_REF()332 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_REF() {
333 static TfLiteRegistration r = {
334 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
335 depthwise_conv::Eval<depthwise_conv::kReference>};
336 return &r;
337 }
338
Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT()339 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT() {
340 static TfLiteRegistration r = {
341 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
342 depthwise_conv::Eval<depthwise_conv::kGenericOptimized>};
343 return &r;
344 }
345
Register_DEPTHWISE_CONVOLUTION_NEON_OPT()346 TfLiteRegistration* Register_DEPTHWISE_CONVOLUTION_NEON_OPT() {
347 static TfLiteRegistration r = {
348 depthwise_conv::Init, depthwise_conv::Free, depthwise_conv::Prepare,
349 depthwise_conv::Eval<depthwise_conv::kNeonOptimized>};
350 return &r;
351 }
352
Register_DEPTHWISE_CONV_2D()353 TfLiteRegistration* Register_DEPTHWISE_CONV_2D() {
354 #ifdef USE_NEON
355 return Register_DEPTHWISE_CONVOLUTION_NEON_OPT();
356 #else
357 return Register_DEPTHWISE_CONVOLUTION_GENERIC_OPT();
358 #endif
359 }
360
361 } // namespace builtin
362 } // namespace ops
363 } // namespace tflite
364