1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h"
17 
18 #include "absl/strings/str_cat.h"
19 #include "absl/types/any.h"
20 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
21 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
22 #include "tensorflow/lite/delegates/gpu/common/operations.h"
23 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h"
24 #include "tensorflow/lite/delegates/gpu/common/selectors/convolution_transposed_selector.h"
25 #include "tensorflow/lite/delegates/gpu/common/selectors/default_selector.h"
26 #include "tensorflow/lite/delegates/gpu/common/selectors/dw_convolution_selector.h"
27 #include "tensorflow/lite/delegates/gpu/common/selectors/fully_connected_selector.h"
28 #include "tensorflow/lite/delegates/gpu/common/selectors/simple_selectors.h"
29 #include "tensorflow/lite/delegates/gpu/common/shape.h"
30 #include "tensorflow/lite/delegates/gpu/common/status.h"
31 #include "tensorflow/lite/delegates/gpu/common/task/storage_type_util.h"
32 #include "tensorflow/lite/delegates/gpu/common/task/tensor_desc.h"
33 #include "tensorflow/lite/delegates/gpu/common/task/weights_conversion.h"
34 #include "tensorflow/lite/delegates/gpu/common/tasks/elementwise.h"
35 #include "tensorflow/lite/delegates/gpu/common/tasks/mean_stddev_normalization.h"
36 #include "tensorflow/lite/delegates/gpu/common/tasks/transpose.h"
37 #include "tensorflow/lite/delegates/gpu/common/tensor.h"
38 #include "tensorflow/lite/delegates/gpu/common/winograd_util.h"
39 
40 namespace tflite {
41 namespace gpu {
42 namespace {
IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes & attr,const GpuInfo & gpu_info,const BHWC & dst_shape)43 bool IsRecommendedForWinograd4x4To6x6(const Convolution2DAttributes& attr,
44                                       const GpuInfo& gpu_info,
45                                       const BHWC& dst_shape) {
46   const int tiles_x = DivideRoundUp(dst_shape.w, 4);
47   const int tiles_y = DivideRoundUp(dst_shape.h, 4);
48   const int total_tiles = tiles_x * tiles_y;
49   const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
50   const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4);
51   int min_depth = 16;
52   if (gpu_info.IsAdreno() || gpu_info.IsAMD()) {
53     min_depth = 32;
54   }
55   int min_tiles = 32;
56   if (gpu_info.IsAdreno()) {
57     if (gpu_info.adreno_info.IsAdreno6xx()) {
58       min_tiles = 128;
59     } else {
60       min_tiles = 64;
61     }
62   }
63   if (gpu_info.IsAMD()) {
64     min_tiles = 64;
65   }
66   if (total_tiles >= min_tiles * 8) {
67     min_depth /= 4;
68     min_depth = std::max(min_depth, 8);
69   } else if (total_tiles >= min_tiles * 4) {
70     min_depth /= 2;
71     min_depth = std::max(min_depth, 8);
72   }
73   const bool recommended_channels =
74       src_depth >= min_depth && dst_depth >= min_depth;
75   const bool recommended_hw = total_tiles >= min_tiles;
76   return recommended_channels && recommended_hw;
77 }
78 
WinogradFromNode(const GpuInfo & gpu_info,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const OperationDef & op_def,ModelHints hints,const BHWC & input_shape,const BHWC & output_shape,const Convolution2DAttributes & attr,GPUOperationsSubgraph * gpu_subgraph)79 absl::Status WinogradFromNode(const GpuInfo& gpu_info,
80                               const std::vector<Value*>& inputs,
81                               const std::vector<Value*>& outputs,
82                               const OperationDef& op_def, ModelHints hints,
83                               const BHWC& input_shape, const BHWC& output_shape,
84                               const Convolution2DAttributes& attr,
85                               GPUOperationsSubgraph* gpu_subgraph) {
86   if (!IsSuitableForWinograd4x4To6x6(attr)) {
87     return absl::UnimplementedError("No implementation for this case.");
88   }
89   if (!IsRecommendedForWinograd4x4To6x6(attr, gpu_info, output_shape)) {
90     return absl::UnimplementedError("Not recommended for this case.");
91   }
92 
93   const int tiles_x = DivideRoundUp(output_shape.w, 4);
94   const int tiles_y = DivideRoundUp(output_shape.h, 4);
95   const BHWC shape_0{input_shape.b, 36, tiles_x * tiles_y, input_shape.c};
96   const BHWC shape_1{input_shape.b, 36, tiles_x * tiles_y, output_shape.c};
97   TensorDescriptor td_0;
98   td_0.storage_type = SelectBestStorageType(
99       gpu_info, shape_0, op_def.src_tensors[0].storage_type,
100       op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout);
101   td_0.data_type = op_def.src_tensors[0].data_type;
102   td_0.layout = op_def.src_tensors[0].layout;
103   TensorDescriptor td_1;
104   td_1.storage_type = SelectBestStorageType(
105       gpu_info, shape_1, op_def.src_tensors[0].storage_type,
106       op_def.src_tensors[0].data_type, op_def.src_tensors[0].layout);
107   td_1.data_type = op_def.src_tensors[0].data_type;
108   td_1.layout = op_def.src_tensors[0].layout;
109   gpu_subgraph->new_tensors = {{shape_0, td_0}, {shape_1, td_1}};
110   gpu_subgraph->operations.clear();
111   gpu_subgraph->operations.resize(3);
112 
113   OperationDef winograd_up_def;
114   winograd_up_def.precision = op_def.precision;
115   winograd_up_def.src_tensors.push_back(op_def.src_tensors[0]);
116   winograd_up_def.dst_tensors.push_back(td_0);
117   auto& winograd_up = gpu_subgraph->operations[0];
118   winograd_up.operation =
119       SelectWinograd4x4To36(gpu_info, attr.padding, winograd_up_def);
120   winograd_up.input_ids = {static_cast<int>(inputs[0]->id)};
121   winograd_up.output_ids = {-1};
122 
123   OperationDef conv_def;
124   conv_def.precision = op_def.precision;
125   conv_def.src_tensors.push_back(td_0);
126   conv_def.dst_tensors.push_back(td_1);
127   auto& conv = gpu_subgraph->operations[1];
128   conv.input_ids = {-1};
129   conv.output_ids = {-2};
130   conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info,
131                                                 conv_def, hints);
132 
133   OperationDef winograd_down_def;
134   winograd_down_def.precision = op_def.precision;
135   winograd_down_def.src_tensors.push_back(td_1);
136   winograd_down_def.dst_tensors.push_back(op_def.dst_tensors[0]);
137   auto& winograd_down = gpu_subgraph->operations[2];
138   winograd_down.input_ids = {-2};
139   winograd_down.output_ids = {static_cast<int>(outputs[0]->id)};
140   auto bias_copy = attr.bias;
141   if (bias_copy.shape.v < attr.weights.shape.o) {
142     bias_copy.shape = Linear(attr.weights.shape.o);
143     bias_copy.data.resize(attr.weights.shape.o);
144   }
145   winograd_down.operation =
146       SelectWinograd36To4x4(gpu_info, winograd_down_def, bias_copy);
147   return absl::OkStatus();
148 }
149 
150 }  // namespace
151 
GPUOperationFromNode(const GpuInfo & gpu_info,const OperationDef & op_def,ModelHints hints,const std::vector<Value * > & inputs,const std::vector<Value * > & outputs,const Node & node,GPUOperationsSubgraph * gpu_subgraph)152 absl::Status GPUOperationFromNode(const GpuInfo& gpu_info,
153                                   const OperationDef& op_def, ModelHints hints,
154                                   const std::vector<Value*>& inputs,
155                                   const std::vector<Value*>& outputs,
156                                   const Node& node,
157                                   GPUOperationsSubgraph* gpu_subgraph) {
158   std::unique_ptr<GPUOperation>* gpu_op =
159       InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
160   auto op_type = OperationTypeFromString(node.operation.type);
161   switch (op_type) {
162     case OperationType::ADD: {
163       if (inputs.size() == 2 &&
164           (inputs[0]->tensor.shape.c == inputs[1]->tensor.shape.c ||
165            inputs[1]->tensor.shape.c == 1)) {
166         GPUOperation operation =
167             CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
168         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
169         return absl::OkStatus();
170       } else if (inputs.size() >= 2) {
171         auto output = outputs[0];
172         std::vector<int> channels(inputs.size());
173         for (int i = 0; i < inputs.size(); ++i) {
174           channels[i] = inputs[i]->tensor.shape.c;
175         }
176         SelectAdd(op_def, channels, output->tensor.shape.c, gpu_op);
177         return absl::OkStatus();
178       } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
179         auto attr =
180             absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
181         GPUOperation operation =
182             CreateElementwise(gpu_info, op_def, op_type, attr);
183         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
184         return absl::OkStatus();
185       }
186       return absl::UnimplementedError(absl::StrCat(
187           "No support of ", node.operation.type, " with this parameters"));
188     }
189     case OperationType::BATCHED_MATMUL: {
190       // Currently only batch = 1 is supported.
191       // Matmul replaced with this sequence:
192       //   1) Transpose second tensor(weights). (1x1xHxW)->(Wx1x1xH)
193       //   2) Convert second tensor(weights) from 1) to Convolution weights
194       //   3) Run usual convolution
195       auto second_shape = inputs[1]->tensor.shape;
196       auto dst_shape = outputs[0]->tensor.shape;
197       if (dst_shape.b != 1) {
198         return absl::UnimplementedError(
199             "Currently only batch = 1 supported for BATCHED_MATMUL.");
200       }
201       BHWC weights_shape(second_shape.c, 1, 1, second_shape.w);
202       Convolution2DAttributes attr;
203       attr.strides = HW(1, 1);
204       attr.dilations = HW(1, 1);
205       attr.padding.appended = HW(0, 0);
206       attr.padding.prepended = HW(0, 0);
207       attr.bias.shape = Linear(weights_shape.b);
208       attr.bias.data.resize(weights_shape.b, 0.0f);
209 
210       TensorDescriptor transposed_desc = {op_def.src_tensors[1].data_type,
211                                           op_def.src_tensors[1].storage_type,
212                                           Layout::BHWC};
213       transposed_desc.storage_type = SelectBestStorageType(
214           gpu_info, weights_shape, transposed_desc.storage_type,
215           transposed_desc.data_type, transposed_desc.layout);
216       TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
217                                        TensorStorageType::BUFFER, Layout::BHWC};
218       gpu_subgraph->operations.clear();
219       gpu_subgraph->operations.resize(3);
220       auto& transpose_op = gpu_subgraph->operations[0];
221       auto& converter_op = gpu_subgraph->operations[1];
222       auto& conv_op = gpu_subgraph->operations[2];
223       conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
224       conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
225       OperationDef conv_def = op_def;
226       conv_def.src_tensors[1] = weights_desc;
227       WeightsDescription conv_weights_desc;
228       conv_op.operation = SelectConvolutionWithDynamicWeights(
229           attr, weights_shape, dst_shape, gpu_info, conv_def, hints,
230           &conv_weights_desc);
231 
232       int aligned_output =
233           AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
234       int aligned_input = AlignByN(weights_shape.c, 4);
235       gpu_subgraph->new_tensors = {{BHWC(1, 1, 1,
236                                          aligned_output * aligned_input *
237                                              weights_shape.h * weights_shape.w),
238                                     weights_desc},
239                                    {weights_shape, transposed_desc}};
240       OperationDef converter_def;
241       converter_def.precision = op_def.precision;
242       converter_def.src_tensors.push_back(transposed_desc);
243       converter_def.dst_tensors.push_back(weights_desc);
244 
245       converter_op.input_ids = {-2};
246       converter_op.output_ids = {-1};
247       converter_op.operation =
248           SelectConverterToConvWeights(conv_weights_desc, converter_def, hints);
249 
250       OperationDef transpose_def;
251       transpose_def.precision = op_def.precision;
252       transpose_def.src_tensors.push_back(op_def.src_tensors[1]);
253       transpose_def.dst_tensors.push_back(transposed_desc);
254 
255       transpose_op.input_ids = {static_cast<int>(inputs[1]->id)};
256       transpose_op.output_ids = {-2};
257       TransposeAttributes transpose_attr;
258       transpose_attr.perm = BHWC(3, 0, 1, 2);
259       transpose_op.operation = absl::make_unique<GPUOperation>(
260           CreateTranspose(transpose_def, transpose_attr));
261       return absl::OkStatus();
262     }
263     case OperationType::CONCAT: {
264       auto attr = absl::any_cast<ConcatAttributes>(node.operation.attributes);
265       std::vector<int> channels(inputs.size());
266       for (int i = 0; i < inputs.size(); ++i) {
267         channels[i] = inputs[i]->tensor.shape.c;
268       }
269       return SelectConcat(attr, channels, op_def, gpu_info, gpu_op);
270     }
271     case OperationType::CONVOLUTION_2D: {
272       auto attr =
273           absl::any_cast<Convolution2DAttributes>(node.operation.attributes);
274       auto input_shape = inputs[0]->tensor.shape;
275       auto output_shape = outputs[0]->tensor.shape;
276       if (inputs.size() == 1) {
277         if (WinogradFromNode(gpu_info, inputs, outputs, op_def, hints,
278                              input_shape, output_shape, attr, gpu_subgraph)
279                 .ok()) {
280           return absl::OkStatus();
281         } else {
282           gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
283           *gpu_op =
284               SelectConvolution(attr, output_shape, gpu_info, op_def, hints);
285           return absl::OkStatus();
286         }
287       } else {
288         auto weights_shape = inputs[1]->tensor.shape;
289         if (attr.bias.data.empty()) {
290           attr.bias.shape = Linear(weights_shape.b);
291           attr.bias.data.resize(weights_shape.b, 0.0f);
292         }
293         TensorDescriptor weights_desc = {op_def.src_tensors[1].data_type,
294                                          TensorStorageType::BUFFER,
295                                          Layout::BHWC};
296         gpu_subgraph->operations.clear();
297         gpu_subgraph->operations.resize(2);
298         auto& converter_op = gpu_subgraph->operations[0];
299         auto& conv_op = gpu_subgraph->operations[1];
300         conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
301         conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
302         OperationDef conv_def = op_def;
303         conv_def.src_tensors[1] = weights_desc;
304         WeightsDescription conv_weights_desc;
305         conv_op.operation = SelectConvolutionWithDynamicWeights(
306             attr, weights_shape, output_shape, gpu_info, conv_def, hints,
307             &conv_weights_desc);
308 
309         int aligned_output = AlignByN(
310             weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
311         int aligned_input = AlignByN(weights_shape.c, 4);
312         gpu_subgraph->new_tensors = {
313             {BHWC(1, 1, 1,
314                   aligned_output * aligned_input * weights_shape.h *
315                       weights_shape.w),
316              weights_desc}};
317         OperationDef converter_def;
318         converter_def.precision = op_def.precision;
319         converter_def.src_tensors.push_back(op_def.src_tensors[1]);
320         converter_def.dst_tensors.push_back(weights_desc);
321 
322         converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
323         converter_op.output_ids = {-1};
324         converter_op.operation = SelectConverterToConvWeights(
325             conv_weights_desc, converter_def, hints);
326         return absl::OkStatus();
327       }
328     }
329     case OperationType::CONVOLUTION_TRANSPOSED: {
330       auto attr = absl::any_cast<ConvolutionTransposedAttributes>(
331           node.operation.attributes);
332       if (inputs.size() == 1) {
333         *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def);
334         return absl::OkStatus();
335       } else {
336         // CONVOLUTION_TRANSPOSED with runtime weights
337         OHWI weights_shape =
338             OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h,
339                  inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c);
340         if (attr.bias.data.empty()) {
341           attr.bias.shape = Linear(weights_shape.o);
342           attr.bias.data.resize(weights_shape.o, 0.0f);
343         }
344         gpu_subgraph->operations.clear();
345         gpu_subgraph->operations.resize(2);
346         auto& converter_op = gpu_subgraph->operations[0];
347         auto& conv_op = gpu_subgraph->operations[1];
348         WeightsDescription weights_desc;
349         conv_op.operation = SelectConvolutionTransposedWithDynamicWeights(
350             attr, gpu_info, op_def, &weights_desc);
351         conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
352 
353         const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4),
354                                        weights_desc.GetOutputGroupSize());
355         const int src_depth = DivideRoundUp(weights_shape.i, 4);
356         const int kernel_x = weights_shape.w;
357         const int kernel_y = weights_shape.h;
358         if (weights_desc.layout ==
359                 WeightsLayout::k2DX4I4YIsHWIAndXIsOOGroupO4 ||
360             weights_desc.layout ==
361                 WeightsLayout::k2DX4O4YIsHWIAndXIsOOGroupI4) {
362           // weights are 4x textures 2d
363           conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1, -2, -3, -4};
364           int texture_width = dst_depth;
365           int texture_height = src_depth * kernel_x * kernel_y;
366           for (int i = 0; i < 4; ++i) {
367             gpu_subgraph->new_tensors.push_back(
368                 {BHWC(1, texture_height, texture_width, 4),
369                  TensorDescriptor(op_def.GetDataType(),
370                                   TensorStorageType::TEXTURE_2D, Layout::HWC)});
371           }
372         } else {
373           // weights is single buffer
374           conv_op.input_ids = {static_cast<int>(inputs[0]->id), -1};
375           gpu_subgraph->new_tensors = {
376               {BHWC(
377                    1, 1, 1,
378                    GetTotalElementsCountForLayout(weights_desc, weights_shape)),
379                TensorDescriptor(op_def.GetDataType(), TensorStorageType::BUFFER,
380                                 Layout::HWC)}};
381         }
382         OperationDef conv_def = conv_op.operation->GetDefinition();
383         OperationDef converter_def;
384         converter_def.precision = op_def.precision;
385         converter_def.src_tensors.push_back(op_def.src_tensors[1]);
386         for (int i = 1; i < conv_def.src_tensors.size(); ++i) {
387           converter_def.dst_tensors.push_back(conv_def.src_tensors[i]);
388           converter_op.output_ids.push_back(-i);
389         }
390 
391         converter_op.input_ids = {static_cast<int>(inputs[1]->id)};
392         converter_op.operation =
393             SelectConverterToConvWeights(weights_desc, converter_def, hints);
394         return absl::OkStatus();
395       }
396     }
397     case OperationType::DEPTHWISE_CONVOLUTION: {
398       auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
399           node.operation.attributes);
400       if (inputs.size() == 1) {
401         *gpu_op = SelectDWConvolution(attr, gpu_info, op_def);
402       } else {
403         if (inputs[1]->tensor.shape.b != 1) {
404           return absl::UnimplementedError(
405               "No support of depthwise runtime weights with channel multiplier "
406               "!= 1");
407         }
408         *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def);
409       }
410       return absl::OkStatus();
411     }
412     case OperationType::FULLY_CONNECTED: {
413       auto attr =
414           absl::any_cast<FullyConnectedAttributes>(node.operation.attributes);
415       *gpu_op = SelectFullyConnected(attr, gpu_info, op_def,
416                                      inputs[0]->tensor.shape.b);
417       return absl::OkStatus();
418     }
419     case OperationType::LSTM: {
420       *gpu_op = SelectLSTM(op_def, gpu_info);
421       return absl::OkStatus();
422     }
423     case OperationType::MAX_UNPOOLING_2D: {
424       auto attr =
425           absl::any_cast<MaxUnpooling2DAttributes>(node.operation.attributes);
426       *gpu_op = SelectMaxUnpooling(attr, op_def);
427       return absl::OkStatus();
428     }
429     case OperationType::MEAN: {
430       auto attr = absl::any_cast<MeanAttributes>(node.operation.attributes);
431       *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
432                              op_def, gpu_info);
433       return absl::OkStatus();
434     }
435     case OperationType::MEAN_STDDEV_NORMALIZATION: {
436       MeanStdDevNormalization operation = CreateMeanStdDevNormalization(
437           op_def, gpu_info, (inputs[0]->tensor.shape.c + 3) / 4);
438       *gpu_op =
439           absl::make_unique<MeanStdDevNormalization>(std::move(operation));
440       return absl::OkStatus();
441     }
442     case OperationType::PAD: {
443       auto attr = absl::any_cast<PadAttributes>(node.operation.attributes);
444       SelectPadding(attr, op_def, gpu_op);
445       return absl::OkStatus();
446     }
447     case OperationType::POOLING_2D: {
448       auto attr =
449           absl::any_cast<Pooling2DAttributes>(node.operation.attributes);
450       *gpu_op = SelectPooling(attr, op_def);
451       return absl::OkStatus();
452     }
453     case OperationType::PRELU: {
454       auto attr = absl::any_cast<PReLUAttributes>(node.operation.attributes);
455       *gpu_op = SelectPReLU(attr, gpu_info, op_def);
456       return absl::OkStatus();
457     }
458     case OperationType::QUANTIZE_AND_DEQUANTIZE: {
459       auto attr = absl::any_cast<QuantizeAndDequantizeAttributes>(
460           node.operation.attributes);
461       *gpu_op = SelectQuantizeAndDequantize(attr, op_def);
462       return absl::OkStatus();
463     }
464     case OperationType::RELU: {
465       auto attr = absl::any_cast<ReLUAttributes>(node.operation.attributes);
466       *gpu_op = SelectReLU(attr, op_def);
467       return absl::OkStatus();
468     }
469     case OperationType::RESHAPE: {
470       const int src_channels = inputs[0]->tensor.shape.c;
471       auto attr = absl::any_cast<ReshapeAttributes>(node.operation.attributes);
472       SelectReshape(src_channels, attr.new_shape.c, op_def, gpu_op);
473       return absl::OkStatus();
474     }
475     case OperationType::RESIZE: {
476       auto attr = absl::any_cast<Resize2DAttributes>(node.operation.attributes);
477       return SelectResize(attr, op_def, gpu_op);
478     }
479     case OperationType::SLICE: {
480       auto attr = absl::any_cast<SliceAttributes>(node.operation.attributes);
481       SelectStridedSlice(attr, op_def, gpu_op);
482       return absl::OkStatus();
483     }
484     case OperationType::SOFTMAX: {
485       SelectSoftmax(inputs[0]->tensor.shape, op_def, gpu_op);
486       return absl::OkStatus();
487     }
488     case OperationType::SPACE_TO_DEPTH: {
489       auto attr =
490           absl::any_cast<SpaceToDepthAttributes>(node.operation.attributes);
491       SelectSpaceToDepth(attr, op_def, gpu_op);
492       return absl::OkStatus();
493     }
494     case OperationType::SPLIT: {
495       auto attr = absl::any_cast<SplitAttributes>(node.operation.attributes);
496       RETURN_IF_ERROR(SelectSplit(attr, op_def, gpu_op));
497       return absl::OkStatus();
498     }
499     case OperationType::TRANSPOSE: {
500       auto attr =
501           absl::any_cast<TransposeAttributes>(node.operation.attributes);
502       SelectTranspose(attr, op_def, gpu_op);
503       return absl::OkStatus();
504     }
505     case OperationType::ABS:
506     case OperationType::COPY:
507     case OperationType::COS:
508     case OperationType::ELU:
509     case OperationType::EXP:
510     case OperationType::HARD_SWISH:
511     case OperationType::LOG:
512     case OperationType::NEG:
513     case OperationType::RSQRT:
514     case OperationType::SIGMOID:
515     case OperationType::SIN:
516     case OperationType::SQRT:
517     case OperationType::SQUARE:
518     case OperationType::TANH: {
519       GPUOperation operation =
520           CreateElementwiseOneInput(gpu_info, op_def, op_type);
521       *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
522       return absl::OkStatus();
523     }
524     case OperationType::DIV:
525     case OperationType::EQUAL:
526     case OperationType::GREATER:
527     case OperationType::GREATER_EQUAL:
528     case OperationType::LESS:
529     case OperationType::LESS_EQUAL:
530     case OperationType::MAXIMUM:
531     case OperationType::MINIMUM:
532     case OperationType::MUL:
533     case OperationType::NOT_EQUAL:
534     case OperationType::POW:
535     case OperationType::SQUARED_DIFF:
536     case OperationType::SUB: {
537       if (inputs.size() == 2) {
538         GPUOperation operation =
539             CreateElementwiseTwoInput(op_def, op_type, inputs[1]->tensor.shape);
540         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
541         return absl::OkStatus();
542       } else if (inputs.size() == 1 && node.operation.attributes.has_value()) {
543         auto attr =
544             absl::any_cast<ElementwiseAttributes>(node.operation.attributes);
545         GPUOperation operation =
546             CreateElementwise(gpu_info, op_def, op_type, attr);
547         *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
548         return absl::OkStatus();
549       }
550       return absl::UnimplementedError(absl::StrCat(
551           "No support of ", node.operation.type, " with this parameters"));
552     }
553     case OperationType::REDUCE_MAXIMUM:
554     case OperationType::REDUCE_MINIMUM:
555     case OperationType::REDUCE_PRODUCT:
556     case OperationType::REDUCE_SUM: {
557       auto attr = absl::any_cast<ReduceAttributes>(node.operation.attributes);
558       *gpu_op = SelectReduce(attr.dims, inputs[0]->tensor.shape, op_type,
559                              op_def, gpu_info);
560       return absl::OkStatus();
561     }
562     default:
563       return SelectDefault(gpu_info, op_def, hints, inputs, outputs, node,
564                            gpu_subgraph);
565   }
566 }
567 
568 }  // namespace gpu
569 }  // namespace tflite
570