1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
17 
18 #include <string>
19 
20 #include "absl/strings/str_cat.h"
21 #include "absl/strings/substitute.h"
22 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
23 
24 namespace tflite {
25 namespace gpu {
26 
27 namespace {
GetOneInputCode(const GpuInfo & gpu_info,const OperationType & op_type,CalculationsPrecision precision,const std::string & input0)28 std::string GetOneInputCode(const GpuInfo& gpu_info,
29                             const OperationType& op_type,
30                             CalculationsPrecision precision,
31                             const std::string& input0) {
32   std::string result;
33   switch (op_type) {
34     case OperationType::ABS:
35       result = "$0 = fabs($0);\n";
36       break;
37     case OperationType::COS:
38       result = "$0 = cos($0);\n";
39       break;
40     case OperationType::COPY:
41       // No op as inout_value will be copied to dest automatically.
42       result = "\n";
43       break;
44     case OperationType::ELU:
45       if (gpu_info.IsApiOpenCl()) {
46         result = R"(
47 $0.x = $0.x < INIT_FLT(0.0f) ? expm1($0.x) : $0.x;
48 $0.y = $0.y < INIT_FLT(0.0f) ? expm1($0.y) : $0.y;
49 $0.z = $0.z < INIT_FLT(0.0f) ? expm1($0.z) : $0.z;
50 $0.w = $0.w < INIT_FLT(0.0f) ? expm1($0.w) : $0.w;)";
51       } else {
52         result = R"(
53 $0.x = $0.x < INIT_FLT(0.0f) ? exp($0.x) - INIT_FLT(1.0f) : $0.x;
54 $0.y = $0.y < INIT_FLT(0.0f) ? exp($0.y) - INIT_FLT(1.0f) : $0.y;
55 $0.z = $0.z < INIT_FLT(0.0f) ? exp($0.z) - INIT_FLT(1.0f) : $0.z;
56 $0.w = $0.w < INIT_FLT(0.0f) ? exp($0.w) - INIT_FLT(1.0f) : $0.w;)";
57       }
58       break;
59     case OperationType::EXP:
60       result = "$0 = exp($0);\n";
61       break;
62     case OperationType::HARD_SWISH:
63       result =
64           "$0 *= clamp($0 * INIT_FLT(0.16666667f) + INIT_FLT(0.5f), "
65           "INIT_FLT4(0.0f), "
66           "INIT_FLT4(1.0f));\n";
67       break;
68     case OperationType::LOG:
69       result = "$0 = log($0);\n";
70       break;
71     case OperationType::NEG:
72       result = "$0 = -($0);\n";
73       break;
74     case OperationType::RSQRT:
75       result = "$0 = rsqrt($0);\n";
76       break;
77     case OperationType::SIGMOID:
78       if (gpu_info.IsApiOpenCl() && precision != CalculationsPrecision::F32) {
79         result =
80             "$0 = convert_half4(native_recip(1.0f + "
81             "native_exp(convert_float4(-$0))));\n";
82       } else {
83         result = "$0 = INIT_FLT4(1.0f) / (INIT_FLT4(1.0f) + exp(-($0)));\n";
84       }
85       break;
86     case OperationType::SIN:
87       result = "$0 = sin($0);\n";
88       break;
89     case OperationType::SQRT:
90       result = "$0 = sqrt($0);\n";
91       break;
92     case OperationType::SQUARE:
93       result = "$0 *= $0;\n";
94       break;
95     case OperationType::TANH:
96       result = "$0 = tanh($0);\n";
97       break;
98     default:
99       return "Unknown operation type;\n";
100   }
101   return absl::Substitute(result, input0);
102 }
103 
GetTwoInputCode(const OperationType & op_type,const std::string & result_var,const std::string & input0,const std::string & input1,bool swap_inputs=false)104 std::string GetTwoInputCode(const OperationType& op_type,
105                             const std::string& result_var,
106                             const std::string& input0,
107                             const std::string& input1,
108                             bool swap_inputs = false) {
109   std::string result;
110   switch (op_type) {
111     case OperationType::ADD:
112       result += "$0 = $1 + $2;\n";
113       break;
114     case OperationType::DIV:
115       result += "$0 = $1 / $2;\n";
116       break;
117     case OperationType::MAXIMUM:
118       result += "$0 = max($1, $2);\n";
119       break;
120     case OperationType::MINIMUM:
121       result += "$0 = min($1, $2);\n";
122       break;
123     case OperationType::MUL:
124       result += "$0 = $1 * $2;\n";
125       break;
126     case OperationType::POW:
127       result += "$0 = pow($1, $2);\n";
128       break;
129     case OperationType::SQUARED_DIFF:
130       result += "$0 = ($1 - $2) * ($1 - $2);\n";
131       break;
132     case OperationType::SUB:
133       result += "$0 = $1 - $2;\n";
134       break;
135     // Comparison operators
136     case OperationType::LESS:
137       result = "$0.x = $1.x < $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
138       result += "$0.y = $1.y < $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
139       result += "$0.z = $1.z < $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
140       result += "$0.w = $1.w < $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
141       break;
142     case OperationType::LESS_EQUAL:
143       result = "$0.x = $1.x <= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
144       result += "$0.y = $1.y <= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
145       result += "$0.z = $1.z <= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
146       result += "$0.w = $1.w <= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
147       break;
148     case OperationType::GREATER:
149       result = "$0.x = $1.x > $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
150       result += "$0.y = $1.y > $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
151       result += "$0.z = $1.z > $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
152       result += "$0.w = $1.w > $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
153       break;
154     case OperationType::GREATER_EQUAL:
155       result = "$0.x = $1.x >= $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
156       result += "$0.y = $1.y >= $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
157       result += "$0.z = $1.z >= $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
158       result += "$0.w = $1.w >= $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
159       break;
160     case OperationType::EQUAL:
161       result = "$0.x = $1.x == $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
162       result += "$0.y = $1.y == $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
163       result += "$0.z = $1.z == $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
164       result += "$0.w = $1.w == $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
165       break;
166     case OperationType::NOT_EQUAL:
167       result = "$0.x = $1.x != $2.x ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
168       result += "$0.y = $1.y != $2.y ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
169       result += "$0.z = $1.z != $2.z ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
170       result += "$0.w = $1.w != $2.w ? INIT_FLT(1.0f) : INIT_FLT(0.0f);\n";
171       break;
172     default:
173       return "Unknown operation type;\n";
174   }
175   if (swap_inputs) {
176     return absl::Substitute(result, result_var, input1, input0);
177   } else {
178     return absl::Substitute(result, result_var, input0, input1);
179   }
180 }
181 
182 // Creates simple two input (first input is runtime tensor and second input is
183 // scalar argument) operation, for example sub, div, pow, etc.
CreateElementwiseOneRuntimeOneScalar(const OperationDef & definition,const OperationType & op_type,float scalar_parameter,bool swap_inputs)184 GPUOperation CreateElementwiseOneRuntimeOneScalar(
185     const OperationDef& definition, const OperationType& op_type,
186     float scalar_parameter, bool swap_inputs) {
187   GPUOperation op(definition);
188   op.elementwise_ = true;
189   if (definition.precision == CalculationsPrecision::F32) {
190     op.args_.AddFloat("scalar", scalar_parameter);
191   } else {
192     op.args_.AddHalf("scalar", half(scalar_parameter));
193   }
194   op.code_ = "FLT4 second_val = INIT_FLT4(args.scalar);\n";
195   op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
196                               "second_val", swap_inputs);
197   return op;
198 }
199 
200 // Creates simple two input(first input is runtime tensor and second input is
201 // constant linear tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<Linear,DataType::FLOAT32> & constant_tensor,bool swap_inputs)202 GPUOperation CreateElementwiseTwoInput(
203     const GpuInfo& gpu_info, const OperationDef& definition,
204     const OperationType& op_type,
205     const tflite::gpu::Tensor<Linear, DataType::FLOAT32>& constant_tensor,
206     bool swap_inputs) {
207   const BHWC shape = BHWC(1, 1, 1, constant_tensor.shape.v);
208   TensorStorageType storage_type =
209       SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(),
210                             definition.GetDataType(), Layout::HWC);
211   TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
212   desc.UploadData(constant_tensor);
213 
214   GPUOperation result(definition);
215   result.elementwise_ = true;
216   result.args_.AddObject("second_tensor",
217                          absl::make_unique<TensorDescriptor>(std::move(desc)));
218   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
219   result.code_ = absl::StrCat(
220       "FLT4 second_val = args.second_tensor.Read(0, 0, ", s_coord, ");\n");
221   if (shape.c == 1) {
222     result.code_ += "  second_val.y = second_val.x;\n";
223     result.code_ += "  second_val.z = second_val.x;\n";
224     result.code_ += "  second_val.w = second_val.x;\n";
225   }
226   result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
227                                   "second_val", swap_inputs);
228   return result;
229 }
230 
231 // Creates simple two input(first input is runtime tensor and second input is
232 // constant HWC tensor) operation, for example sub, div and etc.
CreateElementwiseTwoInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const tflite::gpu::Tensor<HWC,DataType::FLOAT32> & constant_tensor,bool swap_inputs)233 GPUOperation CreateElementwiseTwoInput(
234     const GpuInfo& gpu_info, const OperationDef& definition,
235     const OperationType& op_type,
236     const tflite::gpu::Tensor<HWC, DataType::FLOAT32>& constant_tensor,
237     bool swap_inputs) {
238   const BHWC shape = BHWC(1, constant_tensor.shape.h, constant_tensor.shape.w,
239                           constant_tensor.shape.c);
240   TensorStorageType storage_type =
241       SelectBestStorageType(gpu_info, shape, definition.GetPrimaryStorageType(),
242                             definition.GetDataType(), Layout::HWC);
243   TensorDescriptor desc{definition.GetDataType(), storage_type, Layout::HWC};
244   desc.UploadData(constant_tensor);
245 
246   GPUOperation result(definition);
247   result.elementwise_ = true;
248   result.args_.AddObject("second_tensor",
249                          absl::make_unique<TensorDescriptor>(std::move(desc)));
250   const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
251   const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
252   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
253   result.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(",
254                               x_coord, ", ", y_coord, ", ", s_coord, ");\n");
255   if (shape.c == 1) {
256     result.code_ += "  second_val.y = second_val.x;\n";
257     result.code_ += "  second_val.z = second_val.x;\n";
258     result.code_ += "  second_val.w = second_val.x;\n";
259   }
260   result.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
261                                   "second_val", swap_inputs);
262 
263   return result;
264 }
265 
266 }  // namespace
267 
CreateElementwiseOneInput(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type)268 GPUOperation CreateElementwiseOneInput(const GpuInfo& gpu_info,
269                                        const OperationDef& definition,
270                                        const OperationType& op_type) {
271   GPUOperation op(definition);
272   op.elementwise_ = true;
273   op.code_ =
274       GetOneInputCode(gpu_info, op_type, definition.precision, "in_out_value");
275   return op;
276 }
277 
CreateElementwise(const GpuInfo & gpu_info,const OperationDef & definition,const OperationType & op_type,const ElementwiseAttributes & attr)278 GPUOperation CreateElementwise(const GpuInfo& gpu_info,
279                                const OperationDef& definition,
280                                const OperationType& op_type,
281                                const ElementwiseAttributes& attr) {
282   const float* scalar = absl::get_if<float>(&attr.param);
283   const auto* linear_tensor =
284       absl::get_if<tflite::gpu::Tensor<Linear, DataType::FLOAT32>>(&attr.param);
285   const auto* hwc_tensor =
286       absl::get_if<tflite::gpu::Tensor<HWC, DataType::FLOAT32>>(&attr.param);
287 
288   if (scalar) {
289     return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar,
290                                                 attr.runtime_tensor_is_second);
291   } else if (linear_tensor) {
292     return CreateElementwiseTwoInput(gpu_info, definition, op_type,
293                                      *linear_tensor,
294                                      attr.runtime_tensor_is_second);
295   } else if (hwc_tensor) {
296     return CreateElementwiseTwoInput(gpu_info, definition, op_type, *hwc_tensor,
297                                      attr.runtime_tensor_is_second);
298   } else {
299     return GPUOperation(definition);
300   }
301 }
302 
CreateElementwiseTwoInput(const OperationDef & definition,const OperationType & op_type,const BHWC & shape)303 GPUOperation CreateElementwiseTwoInput(const OperationDef& definition,
304                                        const OperationType& op_type,
305                                        const BHWC& shape) {
306   GPUOperation op(definition);
307   op.elementwise_ = true;
308   auto src_desc = definition.src_tensors[1];
309   if (definition.IsBatchSupported()) {
310     src_desc.SetStateVar("BatchedWidth", "true");
311   }
312   op.AddSrcTensor("second_tensor", src_desc);
313   const std::string x_coord = shape.w == 1 ? "0" : "X_COORD";
314   const std::string y_coord = shape.h == 1 ? "0" : "Y_COORD";
315   const std::string s_coord = shape.c == 1 ? "0" : "S_COORD";
316   op.code_ = absl::StrCat("FLT4 second_val = args.second_tensor.Read(", x_coord,
317                           ", ", y_coord, ", ", s_coord, ");\n");
318   if (shape.c == 1) {
319     op.code_ += "  second_val.y = second_val.x;\n";
320     op.code_ += "  second_val.z = second_val.x;\n";
321     op.code_ += "  second_val.w = second_val.x;\n";
322   }
323   op.code_ += GetTwoInputCode(op_type, "in_out_value", "in_out_value",
324                               "second_val", false);
325   return op;
326 }
327 
328 }  // namespace gpu
329 }  // namespace tflite
330