1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_
18 
19 #include <array>
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 #include "absl/types/any.h"
26 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
27 #include "tensorflow/lite/delegates/gpu/common/model.h"
28 #include "tensorflow/lite/delegates/gpu/common/status.h"
29 #include "tensorflow/lite/delegates/gpu/common/types.h"
30 #include "tensorflow/lite/delegates/gpu/gl/compiler_options.h"
31 #include "tensorflow/lite/delegates/gpu/gl/object.h"
32 #include "tensorflow/lite/delegates/gpu/gl/variable.h"
33 
34 namespace tflite {
35 namespace gpu {
36 namespace gl {
37 
38 enum class IOStructure {
39   // Source code uses standard inputs or outputs that should be generated from
40   // node inputs/outputs. Compiler will generate them automatically as
41   // 'input_data_N'/'output_data_N', where N is an index of the input/output.
42   //
43   // Generated code should not return input objects.
44   ONLY_DEFINITIONS,
45 
46   // For inputs:
47   //   Source code runs computations using 'vec4 value_N' declared by the
48   //   compiler, where N is an index of the input. Each value comes from inputs
49   //   using coordinates set by GlobalInvocationID and a dispatch method,
50   //   therefore, source code should not explicitly read values.
51   //
52   // For outputs:
53   //   Source code runs computations and leaves results in 'vec4 value_N'
54   //   declared by the compiler, where N is an index of the output. Value will
55   //   be written to the output using coordinates set by GlobalInvocationID and
56   //   a dispatch method. Therefore, source code should not explicitly write
57   //   results.
58   AUTO,
59 };
60 
61 struct GeneratedCode {
62   // A list of parameters to be set as uniform or hardcoded in a shader.
63   std::vector<Variable> parameters;
64 
65   // A list of objects to bind before shader could be executed.
66   std::vector<std::pair<std::string, Object>> objects;
67 
68   // A list of shared variables in the shader program.
69   std::vector<Variable> shared_variables;
70 
71   // Compute shader operate on an abstract concept of work groups, each
72   // three-dimensional. The number of work groups to be executed is defined by
73   // workload tuple. Therefore,
74   //   workload[x,y,z] := workgroup_size[x,y,z] X workgroup_count[x,y,z]
75   // where 'X' is element-wise multiplication.
76   //
77   // Zero workload is calculated as PHWC4 based on output tensor.
78   uint3 workload;
79 
80   // operation may specify recommended workgroup size. If not set, runtime will
81   // figure it out automatically.
82   uint3 workgroup;
83 
84   std::string source_code;
85 
86   // Parameters below reveal additional information about source_code.
87 
88   IOStructure input;
89   IOStructure output;
90 };
91 
92 // A class handles shader generation and setting runtime shader parameters.
93 class NodeShader {
94  public:
95   virtual ~NodeShader() = default;
96 
97   // A context for generating a code.
98   struct GenerationContext {
99     const GpuInfo* gpu_info;
100     CompilationOptions compiler_options;
101 
102     // Information extracted & copied from compiled graph.
103     const std::string& op_type;
104     const absl::any& op_attr;
105     // Do NOT use StrongShape<Layout::BHWC> in preparation for
106     // RankedTensorType::getShape() which returns ArrayRef<int64_t>.
107     std::vector<std::array<int64_t, 4>> input_shapes;
108     std::vector<std::array<int64_t, 4>> output_shapes;
109   };
110 
111   // Generates shader code for a node. The code should be just a function body.
112   virtual absl::Status GenerateCode(const GenerationContext& ctx,
113                                     GeneratedCode* generated_code) const = 0;
114 
115   // Limit the size of the const offsets array
116   static constexpr int kMaxConstArraySize = 9;
117 };
118 
119 }  // namespace gl
120 }  // namespace gpu
121 }  // namespace tflite
122 
123 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_GL_NODE_SHADER_H_
124