1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
18 
19 #include <cstdint>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/operations.h"
24 #include "tensorflow/lite/delegates/gpu/common/shape.h"
25 #include "tensorflow/lite/delegates/gpu/common/status.h"
26 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/weights_layout.h"
33 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
34 #include "tensorflow/lite/delegates/gpu/common/types.h"
35 
36 namespace tflite {
37 namespace gpu {
38 
39 class ConvolutionTransposed : public GPUOperation {
40  public:
41   ConvolutionTransposed() = default;
42   void GetPossibleKernelWorkGroups(
43       TuningType tuning_type, const GpuInfo& gpu_info,
44       const KernelInfo& kernel_info,
45       std::vector<int3>* work_groups) const override;
46   absl::Status BindArguments(ArgumentsBinder* args) override;
47   int3 GetGridSize() const override;
48 
49   // Move only
50   ConvolutionTransposed(ConvolutionTransposed&& operation) = default;
51   ConvolutionTransposed& operator=(ConvolutionTransposed&& operation) = default;
52   ConvolutionTransposed(const ConvolutionTransposed&) = delete;
53   ConvolutionTransposed& operator=(const ConvolutionTransposed&) = delete;
54 
GetWeightsDescription()55   WeightsDescription GetWeightsDescription() const {
56     WeightsDescription desc;
57     desc.layout = weights_layout_;
58     desc.output_group_size = block_size_.w;
59     return desc;
60   }
61 
62  private:
63   friend ConvolutionTransposed CreateConvolutionTransposed(
64       const GpuInfo& gpu_info, const OperationDef& definition,
65       const ConvolutionTransposedAttributes& attr);
66   friend ConvolutionTransposed CreateConvolutionTransposed3D(
67       const GpuInfo& gpu_info, const OperationDef& definition,
68       const ConvolutionTransposed3DAttributes& attr);
69   friend ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
70       const GpuInfo& gpu_info, const OperationDef& definition,
71       const ConvolutionTransposedAttributes& attr);
72 
73   ConvolutionTransposed(const OperationDef& definition,
74                         const ConvolutionTransposedAttributes& attr,
75                         const GpuInfo& gpu_info, bool weights_are_buffer);
76   ConvolutionTransposed(const OperationDef& definition,
77                         const ConvolutionTransposed3DAttributes& attr,
78                         const GpuInfo& gpu_info, bool weights_are_buffer);
79 
80   template <DataType T>
81   void UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
82                      bool weights_are_buffer);
83 
84   template <DataType T>
85   void UploadWeights(const tflite::gpu::Tensor<OHWDI, T>& weights,
86                      bool weights_are_buffer);
87 
88   std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
89                                                 const GpuInfo& gpu_info,
90                                                 bool weights_are_buffer,
91                                                 const int4& block_size);
92   int4 stride_;
93   int4 block_size_ = int4(1, 1, 1, 1);  // WHDS
94   WeightsLayout weights_layout_;
95 };
96 
97 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWI,T> & weights,bool weights_are_buffer)98 void ConvolutionTransposed::UploadWeights(
99     const tflite::gpu::Tensor<OHWI, T>& weights, bool weights_are_buffer) {
100   const int flt_count =
101       GetTotalElementsCountForLayout(GetWeightsDescription(), weights.shape);
102   DataType weights_type = definition_.precision == CalculationsPrecision::F32
103                               ? DataType::FLOAT32
104                               : DataType::FLOAT16;
105 
106   std::vector<uint8_t> weights_data(flt_count * SizeOf(weights_type));
107   RearrangeWeights(weights, GetWeightsDescription(), weights_type,
108                    absl::MakeSpan(weights_data));
109 
110   if (weights_are_buffer) {
111     BufferDescriptor desc;
112     desc.element_type = weights_type;
113     desc.element_size = 16;
114     desc.size = weights_data.size();
115     desc.data = std::move(weights_data);
116     args_.AddObject("weights",
117                     absl::make_unique<BufferDescriptor>(std::move(desc)));
118   } else {
119     const int dst_depth =
120         AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
121     const int src_depth = DivideRoundUp(weights.shape.i, 4);
122     const int kernel_x = weights.shape.w;
123     const int kernel_y = weights.shape.h;
124     int texture_width = dst_depth;
125     int texture_height = src_depth * kernel_x * kernel_y;
126     int sub_size = SizeOf(weights_type) * 4 * texture_width * texture_height;
127     for (int i = 0; i < 4; ++i) {
128       Texture2DDescriptor desc;
129       desc.element_type = weights_type;
130       desc.size = int2(texture_width, texture_height);
131       desc.data.resize(sub_size);
132       memcpy(desc.data.data(), weights_data.data() + sub_size * i, sub_size);
133       const std::string name = "weights" + std::to_string(i);
134       args_.AddObject(name,
135                       absl::make_unique<Texture2DDescriptor>(std::move(desc)));
136     }
137   }
138 }
139 
140 template <DataType T>
UploadWeights(const tflite::gpu::Tensor<OHWDI,T> & weights,bool weights_are_buffer)141 void ConvolutionTransposed::UploadWeights(
142     const tflite::gpu::Tensor<OHWDI, T>& weights, bool weights_are_buffer) {
143   const int dst_depth =
144       AlignByN(DivideRoundUp(weights.shape.o, 4), block_size_.w);
145   const int src_depth = DivideRoundUp(weights.shape.i, 4);
146   const int kernel_x = weights.shape.w;
147   const int kernel_y = weights.shape.h;
148   const int kernel_z = weights.shape.d;
149 
150   const int elements_count =
151       kernel_x * kernel_y * kernel_z * src_depth * dst_depth * 4;
152   const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
153 
154   const int float4_size = f32_weights ? 16 : 8;
155   std::vector<uint8_t> data(float4_size * elements_count);
156 
157   if (f32_weights) {
158     float4* ptr = reinterpret_cast<float4*>(data.data());
159     if (weights_are_buffer) {
160       RearrangeWeightsToODHWIOGroupI4O4(weights, block_size_.w,
161                                         absl::MakeSpan(ptr, elements_count));
162     } else {
163       RearrangeWeightsToI4DHWIOOGroupO4(weights, block_size_.w,
164                                         absl::MakeSpan(ptr, elements_count));
165     }
166   } else {
167     half4* ptr = reinterpret_cast<half4*>(data.data());
168     if (weights_are_buffer) {
169       RearrangeWeightsToODHWIOGroupI4O4(weights, block_size_.w,
170                                         absl::MakeSpan(ptr, elements_count));
171     } else {
172       RearrangeWeightsToI4DHWIOOGroupO4(weights, block_size_.w,
173                                         absl::MakeSpan(ptr, elements_count));
174     }
175   }
176 
177   if (weights_are_buffer) {
178     BufferDescriptor desc;
179     desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
180     desc.element_size = 16;
181     desc.size = float4_size * elements_count;
182     desc.data = std::move(data);
183     args_.AddObject("weights",
184                     absl::make_unique<BufferDescriptor>(std::move(desc)));
185   } else {
186     int texture_width = dst_depth;
187     int texture_height = src_depth * kernel_x * kernel_y * kernel_z;
188     int sub_size = float4_size * texture_width * texture_height;
189     for (int i = 0; i < 4; ++i) {
190       Texture2DDescriptor desc;
191       desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
192       desc.size = int2(texture_width, texture_height);
193       desc.data.resize(sub_size);
194       memcpy(desc.data.data(), data.data() + sub_size * i, sub_size);
195       const std::string name = "weights" + std::to_string(i);
196       args_.AddObject(name,
197                       absl::make_unique<Texture2DDescriptor>(std::move(desc)));
198     }
199   }
200 }
201 
202 ConvolutionTransposed CreateConvolutionTransposed(
203     const GpuInfo& gpu_info, const OperationDef& definition,
204     const ConvolutionTransposedAttributes& attr);
205 
206 ConvolutionTransposed CreateConvolutionTransposed3D(
207     const GpuInfo& gpu_info, const OperationDef& definition,
208     const ConvolutionTransposed3DAttributes& attr);
209 
210 ConvolutionTransposed CreateConvolutionTransposedDynamicWeights(
211     const GpuInfo& gpu_info, const OperationDef& definition,
212     const ConvolutionTransposedAttributes& attr);
213 
214 }  // namespace gpu
215 }  // namespace tflite
216 
217 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_CONVOLUTION_TRANSPOSED_H_
218