1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
18 
19 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
20 #include "tensorflow/lite/delegates/gpu/common/operations.h"
21 #include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h"
22 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
23 #include "tensorflow/lite/delegates/gpu/common/task/tensor_linear_desc.h"
24 
25 namespace tflite {
26 namespace gpu {
27 
28 // You can read https://arxiv.org/pdf/1509.09308.pdf for understanding of basic
29 // principles. In this kernels used different matrices for transformations than
30 // in original work.
31 class Winograd4x4To36 : public GPUOperation {
32  public:
33   Winograd4x4To36() = default;
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups)34   void GetPossibleKernelWorkGroups(
35       TuningType tuning_type, const GpuInfo& gpu_info,
36       const KernelInfo& kernel_info,
37       std::vector<int3>* work_groups) const override {
38     work_groups->push_back(work_group_size_);
39   }
40   int3 GetGridSize() const override;
41   absl::Status BindArguments(ArgumentsBinder* args) override;
42 
43   // Move only
44   Winograd4x4To36(Winograd4x4To36&& kernel) = default;
45   Winograd4x4To36& operator=(Winograd4x4To36&& kernel) = default;
46   Winograd4x4To36(const Winograd4x4To36&) = delete;
47   Winograd4x4To36& operator=(const Winograd4x4To36&) = delete;
48 
49  private:
Winograd4x4To36(const OperationDef & definition,const Padding2D & padding)50   Winograd4x4To36(const OperationDef& definition, const Padding2D& padding)
51       : GPUOperation(definition), padding_(padding) {}
52   friend Winograd4x4To36 CreateWinograd4x4To36(const OperationDef& definition,
53                                                const Padding2D& padding);
54 
55   Padding2D padding_;
56 };
57 
58 Winograd4x4To36 CreateWinograd4x4To36(const OperationDef& definition,
59                                       const Padding2D& padding);
60 
61 class Winograd4x4To36TileX6 : public GPUOperation {
62  public:
63   Winograd4x4To36TileX6() = default;
64   Winograd4x4To36TileX6(const OperationDef& definition,
65                         const Padding2D& padding, const GpuInfo& gpu_info);
66   absl::Status BindArguments(ArgumentsBinder* args) override;
67   int3 GetGridSize() const override;
68   void GetPossibleKernelWorkGroups(
69       TuningType tuning_type, const GpuInfo& gpu_info,
70       const KernelInfo& kernel_info,
71       std::vector<int3>* work_groups) const override;
72 
73   // Move only
74   Winograd4x4To36TileX6(Winograd4x4To36TileX6&& operation) = default;
75   Winograd4x4To36TileX6& operator=(Winograd4x4To36TileX6&& operation) = default;
76   Winograd4x4To36TileX6(const Winograd4x4To36TileX6&) = delete;
77   Winograd4x4To36TileX6& operator=(const Winograd4x4To36TileX6&) = delete;
78 
79  private:
80   friend Winograd4x4To36TileX6 CreateWinograd4x4To36TileX6(
81       const GpuInfo& gpu_info, const OperationDef& definition,
82       const Padding2D& padding);
83 
84   void UploadBt();
85 
86   std::string GetWinograd4x4To36TileX6Code(const OperationDef& op_def);
87 
88   // Must be called after kernel compilation
89   int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const;
90 
91   Padding2D padding_;
92 };
93 
94 Winograd4x4To36TileX6 CreateWinograd4x4To36TileX6(
95     const GpuInfo& gpu_info, const OperationDef& definition,
96     const Padding2D& padding);
97 
98 class Winograd36To4x4 : public GPUOperation {
99  public:
100   Winograd36To4x4() = default;
GetPossibleKernelWorkGroups(TuningType tuning_type,const GpuInfo & gpu_info,const KernelInfo & kernel_info,std::vector<int3> * work_groups)101   void GetPossibleKernelWorkGroups(
102       TuningType tuning_type, const GpuInfo& gpu_info,
103       const KernelInfo& kernel_info,
104       std::vector<int3>* work_groups) const override {
105     work_groups->push_back(work_group_size_);
106   }
107   int3 GetGridSize() const override;
108 
109   // Move only
110   Winograd36To4x4(Winograd36To4x4&& kernel) = default;
111   Winograd36To4x4& operator=(Winograd36To4x4&& kernel) = default;
112   Winograd36To4x4(const Winograd36To4x4&) = delete;
113   Winograd36To4x4& operator=(const Winograd36To4x4&) = delete;
114 
115  private:
Winograd36To4x4(const OperationDef & definition)116   explicit Winograd36To4x4(const OperationDef& definition)
117       : GPUOperation(definition) {}
118   friend Winograd36To4x4 CreateWinograd36To4x4(
119       const OperationDef& definition,
120       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
121 };
122 
123 Winograd36To4x4 CreateWinograd36To4x4(
124     const OperationDef& definition,
125     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
126 
127 class Winograd36To4x4Tile4x1 : public GPUOperation {
128  public:
129   Winograd36To4x4Tile4x1() = default;
130   Winograd36To4x4Tile4x1(const OperationDef& definition,
131                          const GpuInfo& gpu_info);
132   absl::Status BindArguments(ArgumentsBinder* args) override;
133   int3 GetGridSize() const override;
134   void GetPossibleKernelWorkGroups(
135       TuningType tuning_type, const GpuInfo& gpu_info,
136       const KernelInfo& kernel_info,
137       std::vector<int3>* work_groups) const override;
138 
139   // Move only
140   Winograd36To4x4Tile4x1(Winograd36To4x4Tile4x1&& operation) = default;
141   Winograd36To4x4Tile4x1& operator=(Winograd36To4x4Tile4x1&& operation) =
142       default;
143   Winograd36To4x4Tile4x1(const Winograd36To4x4Tile4x1&) = delete;
144   Winograd36To4x4Tile4x1& operator=(const Winograd36To4x4Tile4x1&) = delete;
145 
146  private:
147   friend Winograd36To4x4Tile4x1 CreateWinograd36To4x4Tile4x1(
148       const GpuInfo& gpu_info, const OperationDef& definition,
149       const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
150 
151   void UploadAt();
152 
153   std::string GetWinograd36To4x4Tile4x1Code(const OperationDef& op_def);
154 
155   // Must be called after kernel compilation
156   int3 SelectBestWorkGroup(const KernelInfo& kernel_info) const;
157 };
158 
159 Winograd36To4x4Tile4x1 CreateWinograd36To4x4Tile4x1(
160     const GpuInfo& gpu_info, const OperationDef& definition,
161     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& biases);
162 
163 }  // namespace gpu
164 }  // namespace tflite
165 
166 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_WINOGRAD_H_
167