1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
17 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
18
19 #include <assert.h>
20 #include <stdint.h>
21 #include <sys/types.h>
22 #include <algorithm>
23 #include <cmath>
24 #include <limits>
25 #include <memory>
26 #include <tuple>
27 #include <type_traits>
28
29 #include "tensorflow/lite/c/builtin_op_data.h"
30 #include "tensorflow/lite/kernels/internal/common.h"
31 #include "tensorflow/lite/kernels/internal/optimized/eigen_spatial_convolutions.h"
32 #include "tensorflow/lite/kernels/internal/optimized/optimized_ops.h"
33 #include "tensorflow/lite/kernels/internal/types.h"
34
35 namespace tflite {
36 namespace multithreaded_ops {
37
38 // Shorthands for the types we need when interfacing with the EigenTensor
39 // library.
40 typedef Eigen::TensorMap<
41 Eigen::Tensor<float, 2, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
42 EigenMatrix;
43 typedef Eigen::TensorMap<
44 Eigen::Tensor<const float, 2, Eigen::RowMajor, Eigen::DenseIndex>,
45 Eigen::Aligned>
46 ConstEigenMatrix;
47
48 typedef Eigen::TensorMap<
49 Eigen::Tensor<float, 4, Eigen::RowMajor, Eigen::DenseIndex>, Eigen::Aligned>
50 EigenTensor;
51 typedef Eigen::TensorMap<
52 Eigen::Tensor<const float, 4, Eigen::RowMajor, Eigen::DenseIndex>,
53 Eigen::Aligned>
54 ConstEigenTensor;
55
56 // Utility functions we need for the EigenTensor API.
57 template <typename Device, typename T>
58 struct MatMulConvFunctor {
59 // Computes on device "d": out = in0 * in1, where * is matrix
60 // multiplication.
operatorMatMulConvFunctor61 void operator()(
62 const Device& d, EigenMatrix out, ConstEigenMatrix in0,
63 ConstEigenMatrix in1,
64 const Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1>& dim_pair) {
65 out.device(d) = in0.contract(in1, dim_pair);
66 }
67 };
68
69 template <class T>
70 class EigenTensorConvFunctor {
71 private:
RuntimePadding2EigenPadding(PaddingType padding)72 Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) {
73 switch (padding) {
74 case PaddingType::kValid:
75 return Eigen::PADDING_VALID;
76 case PaddingType::kSame:
77 return Eigen::PADDING_SAME;
78 case PaddingType::kNone:
79 assert(false); // should never get here.
80 return Eigen::PADDING_VALID;
81 }
82 return Eigen::PADDING_SAME; // Prevent compiler warning about missing
83 // return
84 }
85
86 public:
operator()87 void operator()(const Eigen::ThreadPoolDevice& device, const T* input_data,
88 T* im2col_buffer, int input_batches, int input_height,
89 int input_width, int input_depth, const T* filter_data,
90 int filter_height, int filter_width, int filter_count,
91 int stride_rows, int stride_cols, int pad_width,
92 int pad_height, PaddingType padding, T* output_data,
93 int output_height, int output_width) {
94 const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 &&
95 stride_rows == 1 && stride_cols == 1);
96 if (is_1x1_kernel) {
97 // For 1x1 kernel, the 2D convolution is reduced to matrix
98 // multiplication.
99 const int conv_width = output_height * output_width;
100 Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
101 dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
102 EigenMatrix output(output_data, input_batches * conv_width, filter_count);
103 ConstEigenMatrix input(input_data, input_batches * conv_width,
104 input_depth);
105 ConstEigenMatrix filter(filter_data, input_depth, filter_count);
106 MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
107 filter, dim_pair);
108 } else if (filter_height == input_height && filter_width == input_width &&
109 pad_width == 0 && pad_height == 0) {
110 // If the input data and filter have the same height/width,
111 // the 2D convolution is reduced to matrix multiplication.
112 const int k = // Length of reduction dimension.
113 filter_width * filter_height * input_depth;
114 Eigen::array<Eigen::IndexPair<Eigen::DenseIndex>, 1> dim_pair;
115 dim_pair[0] = Eigen::IndexPair<Eigen::DenseIndex>(1, 0);
116 EigenMatrix output(output_data, input_batches, filter_count);
117 ConstEigenMatrix input(input_data, input_batches, k);
118 ConstEigenMatrix filter(filter_data, k, filter_count);
119 MatMulConvFunctor<Eigen::ThreadPoolDevice, T>()(device, output, input,
120 filter, dim_pair);
121 } else {
122 EigenTensor output(output_data, input_batches, output_height,
123 output_width, filter_count);
124 ConstEigenTensor input(input_data, input_batches, input_height,
125 input_width, input_depth);
126 ConstEigenTensor filter(filter_data, filter_height, filter_width,
127 input_depth, filter_count);
128 output.device(device) =
129 Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
130 RuntimePadding2EigenPadding(padding));
131 }
132 }
133 };
134
Conv(const Eigen::ThreadPoolDevice & device,const ConvParams & params,const RuntimeShape & input_shape,const float * input_data,const RuntimeShape & filter_shape,const float * filter_data,const RuntimeShape & bias_shape,const float * bias_data,const RuntimeShape & output_shape,float * output_data,const RuntimeShape & im2col_shape,float * im2col_data)135 inline void Conv(const Eigen::ThreadPoolDevice& device,
136 const ConvParams& params, const RuntimeShape& input_shape,
137 const float* input_data, const RuntimeShape& filter_shape,
138 const float* filter_data, const RuntimeShape& bias_shape,
139 const float* bias_data, const RuntimeShape& output_shape,
140 float* output_data, const RuntimeShape& im2col_shape,
141 float* im2col_data) {
142 const int stride_width = params.stride_width;
143 const int stride_height = params.stride_height;
144 const PaddingType padding = params.padding_type;
145 const int pad_width = params.padding_values.width;
146 const int pad_height = params.padding_values.height;
147 const float output_activation_min = params.float_activation_min;
148 const float output_activation_max = params.float_activation_max;
149 TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
150 TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
151 TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
152
153 const int batches = MatchingDim(input_shape, 0, output_shape, 0);
154 const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
155 const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
156 const int input_height = input_shape.Dims(1);
157 const int input_width = input_shape.Dims(2);
158 const int filter_height = filter_shape.Dims(1);
159 const int filter_width = filter_shape.Dims(2);
160 const int output_height = output_shape.Dims(1);
161 const int output_width = output_shape.Dims(2);
162 EigenTensorConvFunctor<float> conv_functor;
163 conv_functor(device, input_data, im2col_data, batches, input_height,
164 input_width, input_depth, filter_data, filter_height,
165 filter_width, output_depth, stride_height, stride_width,
166 pad_height, pad_width, padding, output_data, output_height,
167 output_width);
168
169 optimized_ops::AddBiasAndEvalActivationFunction(
170 output_activation_min, output_activation_max, bias_shape, bias_data,
171 output_shape, output_data);
172 }
173
174 } // namespace multithreaded_ops
175 } // namespace tflite
176
177 #endif // TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_MULTITHREADED_CONV_H_
178