1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
17 
18 #include "absl/strings/substitute.h"
19 #include "tensorflow/lite/delegates/gpu/common/access_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h"
21 
22 namespace tflite {
23 namespace gpu {
24 namespace {
GetElementWiseCode(const OperationDef & op_def,bool check_src_slices)25 std::string GetElementWiseCode(const OperationDef& op_def,
26                                bool check_src_slices) {
27   std::string c;
28   c += "MAIN_FUNCTION(\n";
29   c += "$0) {\n";
30   c += "  int X = GLOBAL_ID_0;\n";
31   c += "  int Y = GLOBAL_ID_1;\n";
32   c += "  int Z = GLOBAL_ID_2;\n";
33   c += "  if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || "
34        "Z >= args.dst_tensor.Slices()) return; \n";
35   if (check_src_slices) {
36     c += "  FLT4 src = INIT_FLT4(0.0f);\n";
37     c += "  if (Z < args.src_tensor.Slices()) {\n";
38     c += "    src = args.src_tensor.Read(X, Y, Z);\n";
39     c += "  }\n";
40   } else {
41     c += "  FLT4 src = args.src_tensor.Read(X, Y, Z);\n";
42   }
43   c += "  args.dst_tensor.Write(src, X, Y, Z);\n";
44   c += "} \n";
45   return c;
46 }
47 
48 }  // namespace
49 
GetDataType() const50 DataType OperationDef::GetDataType() const {
51   return DeduceDataTypeFromPrecision(precision);
52 }
53 
GetPrimaryDataType() const54 DataType OperationDef::GetPrimaryDataType() const {
55   return src_tensors[0].data_type;
56 }
GetPrimaryStorageType() const57 TensorStorageType OperationDef::GetPrimaryStorageType() const {
58   return src_tensors[0].storage_type;
59 }
60 
IsBatchSupported() const61 bool OperationDef::IsBatchSupported() const {
62   for (const auto& src : src_tensors) {
63     if (HasAxis(src.layout, Axis::BATCH)) {
64       return true;
65     }
66   }
67   for (const auto& dst : dst_tensors) {
68     if (HasAxis(dst.layout, Axis::BATCH)) {
69       return true;
70     }
71   }
72   return false;
73 }
74 
GPUOperation(const OperationDef & definition)75 GPUOperation::GPUOperation(const OperationDef& definition)
76     : definition_(definition) {}
77 
SetSrc(GpuSpatialTensor * ptr,int index)78 void GPUOperation::SetSrc(GpuSpatialTensor* ptr, int index) {
79   if (index >= src_.size()) {
80     src_.resize(index + 1, nullptr);
81   }
82   src_[index] = ptr;
83 }
84 
SetDst(GpuSpatialTensor * ptr,int index)85 void GPUOperation::SetDst(GpuSpatialTensor* ptr, int index) {
86   if (index >= dst_.size()) {
87     dst_.resize(index + 1, nullptr);
88   }
89   dst_[index] = ptr;
90 }
91 
GPUOperation(GPUOperation && operation)92 GPUOperation::GPUOperation(GPUOperation&& operation)
93     : args_(std::move(operation.args_)),
94       code_(std::move(operation.code_)),
95       work_group_size_(operation.work_group_size_),
96       compiler_options_(std::move(operation.compiler_options_)),
97       tensor_to_grid_(operation.tensor_to_grid_),
98       elementwise_(operation.elementwise_),
99       linkable_(operation.linkable_),
100       check_src_channels_size_(operation.check_src_channels_size_),
101       definition_(std::move(operation.definition_)),
102       src_(std::move(operation.src_)),
103       dst_(std::move(operation.dst_)),
104       grid_dimension_(operation.grid_dimension_),
105       work_group_launch_order_(operation.work_group_launch_order_),
106       grid_size_(operation.grid_size_),
107       src_tensors_names_(std::move(operation.src_tensors_names_)),
108       dst_tensors_names_(std::move(operation.dst_tensors_names_)),
109       work_groups_count_(operation.work_groups_count_),
110       linkable_count_(operation.linkable_count_),
111       elementwise_code_(std::move(operation.elementwise_code_)) {}
112 
operator =(GPUOperation && operation)113 GPUOperation& GPUOperation::operator=(GPUOperation&& operation) {
114   if (this != &operation) {
115     args_ = std::move(operation.args_);
116     code_ = std::move(operation.code_);
117     std::swap(work_group_size_, operation.work_group_size_);
118     compiler_options_ = std::move(operation.compiler_options_);
119     tensor_to_grid_ = operation.tensor_to_grid_;
120     elementwise_ = operation.elementwise_;
121     linkable_ = operation.linkable_;
122     check_src_channels_size_ = operation.check_src_channels_size_;
123     definition_ = std::move(operation.definition_);
124     src_ = std::move(operation.src_);
125     dst_ = std::move(operation.dst_);
126     std::swap(grid_dimension_, operation.grid_dimension_);
127     std::swap(work_group_launch_order_, operation.work_group_launch_order_);
128     std::swap(grid_size_, operation.grid_size_);
129     src_tensors_names_ = std::move(operation.src_tensors_names_);
130     dst_tensors_names_ = std::move(operation.dst_tensors_names_);
131     std::swap(work_groups_count_, operation.work_groups_count_);
132     std::swap(linkable_count_, operation.linkable_count_);
133     elementwise_code_ = std::move(operation.elementwise_code_);
134   }
135   return *this;
136 }
137 
AddOperation(GPUOperation * operation)138 absl::Status GPUOperation::AddOperation(GPUOperation* operation) {
139   linkable_count_ += 1;
140   std::string code = operation->code_;
141   std::string unique_postfix = absl::StrCat("_link", linkable_count_);
142   operation->args_.RenameArgs(unique_postfix, &code);
143   elementwise_code_ += "{\n" + code + "\n}\n";
144   RETURN_IF_ERROR(args_.Merge(std::move(operation->args_), unique_postfix));
145   for (int i = 0; i < operation->src_tensors_names_.size(); ++i) {
146     definition_.src_tensors.push_back(
147         operation->definition_.src_tensors[i + 1]);
148     src_tensors_names_.push_back(operation->src_tensors_names_[i] +
149                                  unique_postfix);
150   }
151   for (int i = 0; i < operation->dst_tensors_names_.size(); ++i) {
152     dst_tensors_names_.push_back(operation->dst_tensors_names_[i] +
153                                  unique_postfix);
154   }
155   return absl::OkStatus();
156 }
157 
AddSrcTensor(const std::string & tensor_name,const TensorDescriptor & desc)158 void GPUOperation::AddSrcTensor(const std::string& tensor_name,
159                                 const TensorDescriptor& desc) {
160   src_tensors_names_.push_back(tensor_name);
161   auto desc_new = absl::make_unique<TensorDescriptor>(desc);
162   args_.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
163 }
164 
AddSrcBuffer(const std::string & buffer_name,const BufferDescriptor & desc)165 void GPUOperation::AddSrcBuffer(const std::string& buffer_name,
166                                 const BufferDescriptor& desc) {
167   src_tensors_names_.push_back(buffer_name);
168   auto desc_new = absl::make_unique<BufferDescriptor>(desc);
169   args_.AddObjectRef(buffer_name, AccessType::READ, std::move(desc_new));
170 }
171 
AddSrcTexture2D(const std::string & texture_name,const Texture2DDescriptor & desc)172 void GPUOperation::AddSrcTexture2D(const std::string& texture_name,
173                                    const Texture2DDescriptor& desc) {
174   src_tensors_names_.push_back(texture_name);
175   auto desc_new = absl::make_unique<Texture2DDescriptor>(desc);
176   args_.AddObjectRef(texture_name, AccessType::READ, std::move(desc_new));
177 }
178 
AddDstTensor(const std::string & tensor_name,const TensorDescriptor & desc)179 void GPUOperation::AddDstTensor(const std::string& tensor_name,
180                                 const TensorDescriptor& desc) {
181   dst_tensors_names_.push_back(tensor_name);
182   auto desc_new = absl::make_unique<TensorDescriptor>(desc);
183   args_.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
184 }
185 
AssembleCode(const GpuInfo & gpu_info)186 void GPUOperation::AssembleCode(const GpuInfo& gpu_info) {
187   if (elementwise_) {
188     auto src_desc =
189         absl::make_unique<TensorDescriptor>(definition_.src_tensors[0]);
190     if (definition_.IsBatchSupported()) {
191       src_desc->SetStateVar("BatchedWidth", "true");
192     }
193     src_tensors_names_.insert(src_tensors_names_.begin(), "src_tensor");
194     args_.AddObjectRef("src_tensor", AccessType::READ, std::move(src_desc));
195 
196     auto dst_desc =
197         absl::make_unique<TensorDescriptor>(definition_.dst_tensors[0]);
198     if (definition_.IsBatchSupported()) {
199       dst_desc->SetStateVar("BatchedWidth", "true");
200     }
201     dst_tensors_names_.insert(dst_tensors_names_.begin(), "dst_tensor");
202     args_.AddObjectRef("dst_tensor", AccessType::WRITE, std::move(dst_desc));
203 
204     elementwise_code_ = "{\n" + code_ + "\n}\n" + elementwise_code_;
205     code_ = GetElementWiseCode(definition_, check_src_channels_size_);
206   }
207 }
208 
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups) const209 void GPUOperation::GetPossibleKernelWorkGroups(
210     TuningType tuning_type, const GpuInfo& gpu_info,
211     const KernelInfo& kernel_info, std::vector<int3>* work_groups) const {
212   GetPossibleWorkGroups(tuning_type, gpu_info, kernel_info, grid_size_,
213                         work_groups);
214 }
215 
GetGridSize() const216 int3 GPUOperation::GetGridSize() const {
217   if (elementwise_ || tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_SToZ) {
218     const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
219     const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
220     const int grid_z = dst_[0]->Slices();
221     return int3(grid_x, grid_y, grid_z);
222   }
223   if (tensor_to_grid_ == TensorToGrid::kWBToX_HDToY_ZIs1) {
224     const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
225     const int grid_y = dst_[0]->Height() * dst_[0]->Depth();
226     const int grid_z = 1;
227     return int3(grid_x, grid_y, grid_z);
228   }
229   if (tensor_to_grid_ == TensorToGrid::kWBToX_HToY_DToZ) {
230     const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
231     const int grid_y = dst_[0]->Height();
232     const int grid_z = dst_[0]->Depth();
233     return int3(grid_x, grid_y, grid_z);
234   }
235   if (tensor_to_grid_ == TensorToGrid::kBToX_YIs1_ZIs1) {
236     const int grid_x = dst_[0]->Batch();
237     const int grid_y = 1;
238     const int grid_z = 1;
239     return int3(grid_x, grid_y, grid_z);
240   }
241   return grid_size_;
242 }
243 
AddUniquePostfix(const std::string & unique_postfix)244 void GPUOperation::AddUniquePostfix(const std::string& unique_postfix) {
245   for (int i = 0; i < src_tensors_names_.size(); ++i) {
246     src_tensors_names_[i] += unique_postfix;
247   }
248   for (int i = 0; i < dst_tensors_names_.size(); ++i) {
249     dst_tensors_names_[i] += unique_postfix;
250   }
251 }
252 
253 }  // namespace gpu
254 }  // namespace tflite
255