1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
17 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
18 
19 #include <assert.h>
20 #include <stdint.h>
21 #include <sys/types.h>
22 #include <algorithm>
23 #include <cmath>
24 #include <limits>
25 #include <memory>
26 #include <tuple>
27 #include <type_traits>
28 
29 #include "tensorflow/lite/c/builtin_op_data.h"
30 #include "tensorflow/lite/kernels/internal/common.h"
31 #include "tensorflow/lite/kernels/internal/optimized/eigen_spatial_convolutions.h"
32 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
33 #include "tensorflow/lite/kernels/internal/types.h"
34 
35 namespace tflite {
36 namespace multithreaded_ops {
37 
38 // Shorthands for the types we need when interfacing with the EigenTensor
39 // library.
40 typedef Eigen::TensorMap<
41     Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
42     EigenMatrix;
43 typedef Eigen::TensorMap<
44     Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
45     Eigen::Aligned>
46     ConstEigenMatrix;
47 
48 typedef Eigen::TensorMap<
49     Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
50     EigenTensor;
51 typedef Eigen::TensorMap<
52     Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
53     Eigen::Aligned>
54     ConstEigenTensor;
55 
56 // Utility functions we need for the EigenTensor API.
57 template <typename Device, typename T>
58 struct MatMulConvFunctor {
59   // Computes on device "d": out = in0 * in1, where * is matrix
60   // multiplication.
operatorMatMulConvFunctor61   void operator()(
62       const Device& d, EigenMatrix out, ConstEigenMatrix in0,
63       ConstEigenMatrix in1,
64       const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair) {
65     out.device(d) = in0.contract(in1, dim_pair);
66   }
67 };
68 
69 template <class T>
70 class EigenTensorConvFunctor {
71  private:
RuntimePadding2EigenPadding(PaddingType padding)72   Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) {
73     switch (padding) {
74       case PaddingType::kValid:
75         return Eigen::PADDING_VALID;
76       case PaddingType::kSame:
77         return Eigen::PADDING_SAME;
78       case PaddingType::kNone:
79         assert(false);  // should never get here.
80         return Eigen::PADDING_VALID;
81     }
82     return Eigen::PADDING_SAME;  // Prevent compiler warning about missing
83                                  // return
84   }
85 
86  public:
operator()87   void operator()(const Eigen::ThreadPoolDevice& device, const T* input_data,
88                   T* im2col_buffer, int input_batches, int input_height,
89                   int input_width, int input_depth, const T* filter_data,
90                   int filter_height, int filter_width, int filter_count,
91                   int stride_rows, int stride_cols, int pad_width,
92                   int pad_height, PaddingType padding, T* output_data,
93                   int output_height, int output_width) {
94     const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 &&
95                                 stride_rows == 1 && stride_cols == 1);
96     if (is_1x1_kernel) {
97       // For 1x1 kernel, the 2D convolution is reduced to matrix
98       // multiplication.
99       const int conv_width = output_height * output_width;
100       Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
101       dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
102       EigenMatrix output(output_data, input_batches * conv_width, filter_count);
103       ConstEigenMatrix input(input_data, input_batches * conv_width,
104                              input_depth);
105       ConstEigenMatrix filter(filter_data, input_depth, filter_count);
106       MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
107                                                       filter, dim_pair);
108     } else if (filter_height == input_height && filter_width == input_width &&
109                pad_width == 0 && pad_height == 0) {
110       // If the input data and filter have the same height/width,
111       // the 2D convolution is reduced to matrix multiplication.
112       const int k =  // Length of reduction dimension.
113           filter_width * filter_height * input_depth;
114       Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
115       dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
116       EigenMatrix output(output_data, input_batches, filter_count);
117       ConstEigenMatrix input(input_data, input_batches, k);
118       ConstEigenMatrix filter(filter_data, k, filter_count);
119       MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
120                                                       filter, dim_pair);
121     } else {
122       EigenTensor output(output_data, input_batches, output_height,
123                          output_width, filter_count);
124       ConstEigenTensor input(input_data, input_batches, input_height,
125                              input_width, input_depth);
126       ConstEigenTensor filter(filter_data, filter_height, filter_width,
127                               input_depth, filter_count);
128       output.device(device) =
129           Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
130                                     RuntimePadding2EigenPadding(padding));
131     }
132   }
133 };
134 
Conv(const Eigen::ThreadPoolDevice & device,const ConvParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data,const RuntimeShape & im2col_shape,float * im2col_data)135 inline void Conv(const Eigen::ThreadPoolDevice& device,
136                  const ConvParams& params, const RuntimeShape& input_shape,
137                  const float* input_data, const RuntimeShape& filter_shape,
138                  const float* filter_data, const RuntimeShape& bias_shape,
139                  const float* bias_data, const RuntimeShape& output_shape,
140                  float* output_data, const RuntimeShape& im2col_shape,
141                  float* im2col_data) {
142   const int stride_width = params.stride_width;
143   const int stride_height = params.stride_height;
144   const PaddingType padding = params.padding_type;
145   const int pad_width = params.padding_values.width;
146   const int pad_height = params.padding_values.height;
147   const float output_activation_min = params.float_activation_min;
148   const float output_activation_max = params.float_activation_max;
149   TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
150   TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
151   TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
152 
153   const int batches = MatchingDim(input_shape, 0, output_shape, 0);
154   const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
155   const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
156   const int input_height = input_shape.Dims(1);
157   const int input_width = input_shape.Dims(2);
158   const int filter_height = filter_shape.Dims(1);
159   const int filter_width = filter_shape.Dims(2);
160   const int output_height = output_shape.Dims(1);
161   const int output_width = output_shape.Dims(2);
162   EigenTensorConvFunctor<float> conv_functor;
163   conv_functor(device, input_data, im2col_data, batches, input_height,
164                input_width, input_depth, filter_data, filter_height,
165                filter_width, output_depth, stride_height, stride_width,
166                pad_height, pad_width, padding, output_data, output_height,
167                output_width);
168 
169   optimized_ops::AddBiasAndEvalActivationFunction(
170       output_activation_min, output_activation_max, bias_shape, bias_data,
171       output_shape, output_data);
172 }
173 
174 }  // namespace multithreaded_ops
175 }  // namespace tflite
176 
177 #endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
178