1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
18 
19 #include <string>
20 #include <vector>
21 
22 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
23 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
24 #include "tensorflow/lite/delegates/gpu/common/kernel_info.h"
25 #include "tensorflow/lite/delegates/gpu/common/precision.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
28 #include "tensorflow/lite/delegates/gpu/common/task/buffer_desc.h"
29 #include "tensorflow/lite/delegates/gpu/common/task/compiler_options.h"
30 #include "tensorflow/lite/delegates/gpu/common/task/gpu_tensor.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/serialization_base_generated.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/texture2d_desc.h"
34 #include "tensorflow/lite/delegates/gpu/common/task/tuning_type.h"
35 #include "tensorflow/lite/delegates/gpu/common/types.h"
36 
37 namespace tflite {
38 namespace gpu {
39 namespace cl {
40 class ClOperation;
41 }
42 namespace metal {
43 class ComputeTask;
44 struct ComputeTaskDescriptor;
45 }
46 
47 // kCustom: default value
48 //   GPUOperation::GetGridSize must be overloaded
49 // kWBToX_HDToY_SToZ:
50 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
51 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
52 //   grid_z = dst_[0]->Slices();
53 // kWBToX_HDToY_ZIs1:
54 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
55 //   grid_y = dst_[0]->Height() * dst_[0]->Depth();
56 //   grid_z = 1;
57 // kWBToX_HToY_DToZ:
58 //   grid_x = dst_[0]->Width() * dst_[0]->Batch();
59 //   grid_y = dst_[0]->Height();
60 //   grid_z = dst_[0]->Depth();
61 // kBToX_YIs1_ZIs1:
62 //   grid_x = dst_[0]->Batch();
63 //   grid_y = 1;
64 //   grid_z = 1;
65 enum class TensorToGrid {
66   kCustom,
67   kWBToX_HDToY_SToZ,
68   kWBToX_HDToY_ZIs1,
69   kWBToX_HToY_DToZ,
70   kBToX_YIs1_ZIs1
71 };
72 
73 struct OperationDef {
74   CalculationsPrecision precision;
75   std::vector<TensorDescriptor> src_tensors;
76   std::vector<TensorDescriptor> dst_tensors;
77 
78   // returns FLOAT32 for F32 precision and FLOAT16 for F16 precision
79   DataType GetDataType() const;
80   // Primary means the first src tensor, because first tensor usually defines
81   // the structure of kernel, all other resources(biases) types and etc.
82   DataType GetPrimaryDataType() const;
83   TensorStorageType GetPrimaryStorageType() const;
84   bool IsBatchSupported() const;
85 };
86 
87 // GPUOperation represents some implementation of neural network operation on
88 // GPU. GPUOperation can contain another GPU operations with flag elementwise_.
89 // When GPUOperation contains another GPU ops, this GPUoperation replaces
90 // some sequence of operations Op + op0 + op1 + ...
91 // Because of this abilities of GPUOperation, usage scenario is next:
92 // Create instance of GPUOperation.
93 // Create all instances of GPUOperations that we will(probably) attach
94 // to GPUOperation. Attach all GPUOperations to GPUOperation. Call
95 // GPUOperation.Compile(). Don't call GPUOperations.Compile() if it
96 // attached, it useless(and may be error)
97 class GPUOperation {
98  public:
99   GPUOperation() = default;
100   explicit GPUOperation(const OperationDef& definition);
101   virtual ~GPUOperation() = default;
102   // Move only
103   GPUOperation(GPUOperation&& operation);
104   GPUOperation& operator=(GPUOperation&& operation);
105   GPUOperation(const GPUOperation&) = delete;
106   GPUOperation& operator=(const GPUOperation&) = delete;
107 
108   absl::Status AddOperation(GPUOperation* operation);
109 
110   void SetSrc(GpuSpatialTensor* ptr, int index = 0);
111   void SetDst(GpuSpatialTensor* ptr, int index = 0);
112 
113   virtual void GetPossibleKernelWorkGroups(
114       TuningType tuning_type, const GpuInfo& gpu_info,
115       const KernelInfo& kernel_info, std::vector<int3>* work_groups) const;
116 
117   void AssembleCode(const GpuInfo& gpu_info);
118 
PostCompileCheck(const GpuInfo & gpu_info,const KernelInfo & kernel_info)119   virtual absl::Status PostCompileCheck(const GpuInfo& gpu_info,
120                                         const KernelInfo& kernel_info) {
121     return absl::OkStatus();
122   }
123 
GetDefinition()124   const OperationDef& GetDefinition() const { return definition_; }
125 
126   void AddSrcTensor(const std::string& tensor_name,
127                     const TensorDescriptor& desc);
128   void AddSrcBuffer(const std::string& buffer_name,
129                     const BufferDescriptor& desc);
130   void AddSrcTexture2D(const std::string& texture_name,
131                        const Texture2DDescriptor& desc);
132   void AddDstTensor(const std::string& tensor_name,
133                     const TensorDescriptor& desc);
134 
IsLinkable()135   bool IsLinkable() const { return elementwise_ && linkable_; }
136 
137   // for linking
138   void AddUniquePostfix(const std::string& unique_postfix);
139 
140   Arguments args_;
141   std::string code_;
142   int3 work_group_size_ = int3(8, 4, 1);
143   std::vector<CompilerOptions> compiler_options_;
144   // not applicable to elementwise
145   TensorToGrid tensor_to_grid_ = TensorToGrid::kCustom;
146 
147   bool elementwise_ = false;
148   // applicable only with elementwise_ = true;
149   bool linkable_ = true;  // by default every elementwise is linkable
150   // applicable only with elementwise_ = true;
151   bool check_src_channels_size_ = false;
152 
153  protected:
154   friend class cl::ClOperation;
155   friend class metal::ComputeTask;
156   friend struct metal::ComputeTaskDescriptor;
157   friend flatbuffers::Offset<tflite::gpu::data::GPUOperation> Encode(
158       const GPUOperation& op, flatbuffers::FlatBufferBuilder* builder);
159   friend absl::Status Decode(const tflite::gpu::data::GPUOperation* fb_op,
160                              GPUOperation* op);
161 
BindArguments(ArgumentsBinder * args)162   virtual absl::Status BindArguments(ArgumentsBinder* args) {
163     return absl::OkStatus();
164   }
165   virtual int3 GetGridSize() const;
166 
167   // Defines operation calculation precision and format of src/dst tensors.
168   OperationDef definition_;
169   std::vector<GpuSpatialTensor*> src_;
170   std::vector<GpuSpatialTensor*> dst_;
171   int grid_dimension_ = 3;  // can be 1, 2 or 3
172   int3 work_group_launch_order_ = int3(0, 1, 2);
173   int3 grid_size_ = int3(0, 0, 0);
174   std::vector<std::string> src_tensors_names_;
175   std::vector<std::string> dst_tensors_names_;
176 
177  private:
178   int3 work_groups_count_ = int3(0, 0, 0);
179   int linkable_count_ = 0;
180   std::string elementwise_code_;  // temporary, used during op construction
181 };
182 
183 }  // namespace gpu
184 }  // namespace tflite
185 
186 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASK_GPU_OPERATION_H_
187