1 /* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
18 
19 #include <map>
20 #include <string>
21 #include <vector>
22 
23 #include "tensorflow/lite/delegates/gpu/cl/cl_context.h"
24 #include "tensorflow/lite/delegates/gpu/cl/gpu_object.h"
25 #include "tensorflow/lite/delegates/gpu/common/gpu_info.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/delegates/gpu/common/task/arguments.h"
28 
29 namespace tflite {
30 namespace gpu {
31 namespace cl {
32 
33 class CLArguments : public ArgumentsBinder {
34  public:
35   CLArguments() = default;
36 
37   absl::Status Init(const GpuInfo& gpu_info,
38                     const std::map<std::string, std::string>& linkables,
39                     CLContext* context, Arguments* args, std::string* code);
40   absl::Status Init(const GpuInfo& gpu_info, Arguments* args,
41                     CLContext* context);
42 
43   // Temporary, will be resolved later
MoveObjectRefsIn(Arguments * args)44   void MoveObjectRefsIn(Arguments* args) {
45     object_refs_ = std::move(args->object_refs_);
46   }
MoveObjectRefsOut(Arguments * args)47   void MoveObjectRefsOut(Arguments* args) {
48     args->object_refs_ = std::move(object_refs_);
49   }
50   void CopyScalarValues(Arguments* args) const;
51 
52   // Move only
53   CLArguments(CLArguments&& args) = default;
54   CLArguments& operator=(CLArguments&& args) = default;
55   CLArguments(const CLArguments&) = delete;
56   CLArguments& operator=(const CLArguments&) = delete;
57 
58   absl::Status SetInt(const std::string& name, int value) override;
59   absl::Status SetFloat(const std::string& name, float value) override;
60   absl::Status SetHalf(const std::string& name, half value) override;
61   absl::Status SetObjectRef(const std::string& name, const GPUObject* object);
62 
63   absl::Status Bind(cl_kernel kernel, int offset = 0);
64 
65  private:
66   absl::Status AllocateObjects(const Arguments& args, CLContext* context);
67   absl::Status AddObjectArgs(Arguments* args);
68 
69   absl::Status ResolveSelectorsPass(
70       const GpuInfo& gpu_info, const Arguments& args,
71       const std::map<std::string, std::string>& linkables, std::string* code);
72   absl::Status ResolveSelector(
73       const GpuInfo& gpu_info, const Arguments& args,
74       const std::map<std::string, std::string>& linkables,
75       const std::string& object_name, const std::string& selector,
76       const std::vector<std::string>& function_args,
77       const std::vector<std::string>& template_args, std::string* result);
78   void ResolveObjectNames(const std::string& object_name,
79                           const std::vector<std::string>& member_names,
80                           std::string* code);
81   void ResolveArgsPass(std::string* code);
82 
83   void CopyArguments(const Arguments& args, bool use_f32_for_halfs);
84   void RenameArgumentsInCode(std::string* code);
85   std::string GetListOfArgs();
86 
87   void AddBuffer(const std::string& name, const GPUBufferDescriptor& desc);
88   void AddImage2D(const std::string& name, const GPUImage2DDescriptor& desc);
89   void AddImage2DArray(const std::string& name,
90                        const GPUImage2DArrayDescriptor& desc);
91   void AddImage3D(const std::string& name, const GPUImage3DDescriptor& desc);
92   void AddImageBuffer(const std::string& name,
93                       const GPUImageBufferDescriptor& desc);
94   void AddCustomMemory(const std::string& name,
95                        const GPUCustomMemoryDescriptor& desc);
96   void AddGPUResources(const std::string& name, const GPUResources& resources,
97                        Arguments* args);
98   absl::Status SetObjectsResources(const Arguments& args);
99   absl::Status SetGPUResources(const std::string& name,
100                                const GPUResourcesWithValue& resources);
101 
102   absl::Status SetImage2D(const std::string& name, cl_mem memory);
103   absl::Status SetBuffer(const std::string& name, cl_mem memory);
104   absl::Status SetImage2DArray(const std::string& name, cl_mem memory);
105   absl::Status SetImage3D(const std::string& name, cl_mem memory);
106   absl::Status SetImageBuffer(const std::string& name, cl_mem memory);
107   absl::Status SetCustomMemory(const std::string& name, cl_mem memory);
108 
109   static constexpr char kArgsPrefix[] = "args.";
110   struct IntValue {
111     int value;
112 
113     // many arguments generated automatically and not used
114     // to reduce amount of data transferred we adding this optimization
115     bool active = false;
116 
117     // offset to shared storage.
118     uint32_t offset = -1;
119   };
120   std::map<std::string, IntValue> int_values_;
121   std::vector<int32_t> shared_int4s_data_;
122 
123   struct FloatValue {
124     float value;
125 
126     // many arguments generated automatically and not used
127     // to reduce amount of data transferred we adding this optimization
128     bool active = false;
129 
130     // offset to shared storage.
131     uint32_t offset = -1;
132   };
133   std::map<std::string, FloatValue> float_values_;
134   std::vector<float> shared_float4s_data_;
135 
136   struct HalfValue {
137     half value;
138 
139     // many arguments generated automatically and not used
140     // to reduce amount of data transferred we adding this optimization
141     bool active = false;
142 
143     // some devices have issues with half parameters.
144     bool store_as_f32 = false;
145 
146     // offset to shared uniform storage.
147     uint32_t offset = -1;
148   };
149   std::map<std::string, HalfValue> half_values_;
150   std::vector<half> shared_half4s_data_;
151 
152   struct CLBufferDescriptor {
153     GPUBufferDescriptor desc;
154     cl_mem memory;
155   };
156   struct CLImage2DDescriptor {
157     GPUImage2DDescriptor desc;
158     cl_mem memory;
159   };
160   struct CLImage2DArrayDescriptor {
161     GPUImage2DArrayDescriptor desc;
162     cl_mem memory;
163   };
164   struct CLImage3DDescriptor {
165     GPUImage3DDescriptor desc;
166     cl_mem memory;
167   };
168   struct CLImageBufferDescriptor {
169     GPUImageBufferDescriptor desc;
170     cl_mem memory;
171   };
172   struct CLCustomMemoryDescriptor {
173     GPUCustomMemoryDescriptor desc;
174     cl_mem memory;
175   };
176 
177   std::map<std::string, CLBufferDescriptor> buffers_;
178   std::map<std::string, CLImage2DDescriptor> images2d_;
179   std::map<std::string, CLImage2DArrayDescriptor> image2d_arrays_;
180   std::map<std::string, CLImage3DDescriptor> images3d_;
181   std::map<std::string, CLImageBufferDescriptor> image_buffers_;
182   std::map<std::string, CLCustomMemoryDescriptor> custom_memories_;
183 
184   std::map<std::string, GPUObjectDescriptorPtr> object_refs_;
185   std::vector<GPUObjectPtr> objects_;
186 };
187 
188 }  // namespace cl
189 }  // namespace gpu
190 }  // namespace tflite
191 
192 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_CL_CL_ARGUMENTS_H_
193