1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_API_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_API_H_
18 
19 // Usage example:
20 //
21 //   // Builder is created from a model using GPU-specific parameters.
22 //   std::unique_ptr<InferenceBuilder> builder = ...;
23 //
24 //   // input data is coming from a texture
25 //   // output data goes to CPU
26 //   builder->SetInputObjectDef(0, {DataType::FLOAT16, DataLayout::PHWC4,
27 //                                  ObjectType::OPENGL_TEXTURE, true});
28 //   builder->SetOutputObjectDef(0, {DataType::FLOAT32, DataLayout::BHWC,
29 //                                  ObjectType::CPU_MEMORY, false});
30 //   std::unique_ptr<InferenceRunner> runner;
31 //   RETURN_IF_ERROR(builder->Build(&runner));  // may take significant time.
32 //   RETURN_IF_ERROR(
33 //       runner->SetInputObject(0, OpenGlTexture{texture_ud, texture_format}));
34 //   RETURN_IF_ERROR(runner->Run());
35 
36 #include <cstdint>
37 #include <memory>
38 #include <vector>
39 
40 #include "absl/types/span.h"
41 #include "absl/types/variant.h"
42 #include <CL/cl.h>
43 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
44 #include "tensorflow/lite/delegates/gpu/common/status.h"
45 #include "tensorflow/lite/delegates/gpu/common/util.h"
46 #include <vulkan/vulkan.h>
47 
48 #define GL_NO_PROTOTYPES
49 #define EGL_NO_PROTOTYPES
50 #include "tensorflow/lite/delegates/gpu/gl/portable_gl31.h"
51 #undef GL_NO_PROTOTYPES
52 #undef EGL_NO_PROTOTYPES
53 
54 namespace tflite {
55 namespace gpu {
56 
57 // Common abbreviations:
58 //   B  - batch
59 //   H  - height
60 //   W  - width
61 //   C  - channels
62 //   D  - depth := DivideRoundUp(C, 4)
63 //   C4 - is the constant = 4.
64 enum class DataLayout {
65   UNKNOWN,
66   BHWC,
67   DHWC4,
68   HWDC4,
69   HDWC4,
70 };
71 
72 enum class ObjectType {
73   UNKNOWN,
74   OPENGL_SSBO,
75   OPENGL_TEXTURE,
76   CPU_MEMORY,
77   OPENCL_TEXTURE,
78   OPENCL_BUFFER,
79   VULKAN_BUFFER,
80   VULKAN_TEXTURE
81 };
82 
83 struct OpenGlBuffer {
84   OpenGlBuffer() = default;
OpenGlBufferOpenGlBuffer85   explicit OpenGlBuffer(GLuint new_id) : id(new_id) {}
86 
87   GLuint id = GL_INVALID_INDEX;
88 };
89 
90 struct OpenGlTexture {
91   OpenGlTexture() = default;
OpenGlTextureOpenGlTexture92   OpenGlTexture(GLuint new_id, GLenum new_format)
93       : id(new_id), format(new_format) {}
94 
95   GLuint id = GL_INVALID_INDEX;
96   GLenum format = GL_INVALID_ENUM;
97 };
98 
99 struct OpenClBuffer {
100   OpenClBuffer() = default;
OpenClBufferOpenClBuffer101   explicit OpenClBuffer(cl_mem new_memobj) : memobj(new_memobj) {}
102 
103   cl_mem memobj = nullptr;
104 };
105 
106 struct OpenClTexture {
107   OpenClTexture() = default;
OpenClTextureOpenClTexture108   explicit OpenClTexture(cl_mem new_memobj) : memobj(new_memobj) {}
109 
110   cl_mem memobj = nullptr;
111   // TODO(akulik): should it specify texture format?
112 };
113 
114 struct VulkanBuffer {
115   VulkanBuffer() = default;
VulkanBufferVulkanBuffer116   explicit VulkanBuffer(VkBuffer buffer_, VkDeviceSize size_,
117                         VkDeviceMemory memory_, VkDeviceSize offset_)
118       : buffer(buffer_), size(size_), memory(memory_), offset(offset_) {}
119 
120   VkBuffer buffer;
121   VkDeviceSize size;
122   VkDeviceMemory memory;
123   VkDeviceSize offset;
124 };
125 
126 struct VulkanTexture {
127   VulkanTexture() = default;
VulkanTextureVulkanTexture128   explicit VulkanTexture(VkDeviceMemory new_memory) : memory(new_memory) {}
129 
130   VkImage image;
131   VkImageView image_view;
132   VkFormat format;
133   VkExtent3D extent;
134   VkDeviceMemory memory;
135   VkDeviceSize offset;
136 };
137 
138 struct VulkanMemory {
139   VulkanMemory() = default;
VulkanMemoryVulkanMemory140   explicit VulkanMemory(VkDeviceMemory new_memory) : memory(new_memory) {}
141 
142   VkDeviceMemory memory;
143   VkDeviceSize size;
144   VkDeviceSize offset;
145 };
146 
147 struct CpuMemory {
148   CpuMemory() = default;
CpuMemoryCpuMemory149   CpuMemory(void* new_data, size_t new_size_bytes)
150       : data(new_data), size_bytes(new_size_bytes) {}
151 
152   void* data = nullptr;
153   size_t size_bytes = 0;
154 };
155 
156 template <typename T>
MakeCpuMemory(absl::Span<T> t)157 inline CpuMemory MakeCpuMemory(absl::Span<T> t) {
158   CpuMemory m;
159   m.data = t.data();
160   m.size_bytes = t.size() * sizeof(T);
161   return m;
162 }
163 
164 template <typename T>
MakeReadableCpuMemory(absl::Span<const T> t)165 inline CpuMemory MakeReadableCpuMemory(absl::Span<const T> t) {
166   CpuMemory m;
167   m.data = const_cast<T*>(t.data());
168   m.size_bytes = t.size() * sizeof(T);
169   return m;
170 }
171 
172 // Defines object representation.
173 struct ObjectDef {
174   DataType data_type = DataType::UNKNOWN;
175   DataLayout data_layout = DataLayout::UNKNOWN;
176   ObjectType object_type = ObjectType::UNKNOWN;
177 
178   // If true, then object is managed externally and needs to be provided to
179   // InferenceRunner by a user before running inference.
180   //
181   // User-provided objects will not be re-used internally for any purpose to
182   // lower overall memory usage.
183   bool user_provided = false;
184 
185   bool operator==(const ObjectDef& other) const {
186     return data_type == other.data_type && data_layout == other.data_layout &&
187            object_type == other.object_type &&
188            user_provided == other.user_provided;
189   }
190 };
191 
192 bool IsValid(const ObjectDef& def);
193 
194 struct Dimensions {
DimensionsDimensions195   Dimensions() : b(1), h(1), w(1), c(1) {}
196 
DimensionsDimensions197   Dimensions(int32_t batch, int32_t height, int32_t width, int32_t channels)
198       : b(batch), h(height), w(width), c(channels) {}
199 
dDimensions200   int32_t d() const { return DivideRoundUp(c, 4); }
201 
productDimensions202   int32_t product() const { return b * h * w * c; }
203 
204   bool operator==(const Dimensions& other) const {
205     return b == other.b && h == other.h && w == other.w && c == other.c;
206   }
207 
208   int32_t b;
209   int32_t h;
210   int32_t w;
211   int32_t c;
212 };
213 
214 // Connects tensor shape with corresponding object definition.
215 struct TensorObjectDef {
216   // Dimensions semantic is defined by corresponding DataLayout.
217   Dimensions dimensions;
218   ObjectDef object_def;
219 
220   bool operator==(const TensorObjectDef& other) const {
221     return dimensions == other.dimensions && object_def == other.object_def;
222   }
223 };
224 
225 // @return true if tensor object def is defined.
226 bool IsValid(const TensorObjectDef& def);
227 
228 // @return the number of elements in a tensor object.
229 uint32_t NumElements(const TensorObjectDef& def);
230 
231 using TensorObject =
232     absl::variant<absl::monostate, OpenGlBuffer, OpenGlTexture, CpuMemory,
233                   OpenClBuffer, OpenClTexture, VulkanBuffer, VulkanTexture>;
234 
235 // @return true if object is set and corresponding values are defined.
236 bool IsValid(const TensorObjectDef& def, const TensorObject& object);
237 
238 ObjectType GetType(const TensorObject& object);
239 
240 // @return true if corresponding object is set for the given type
241 bool IsObjectPresent(ObjectType type, const TensorObject& obj);
242 
243 // @return true if corresponding object has already been initialized and
244 // assigned with a specific ObjectType.
245 bool IsObjectInitialized(const TensorObject& obj);
246 
247 class InferenceRunner;
248 
249 // Allows to inspect and change input and output definitions before a graph is
250 // prepared for the inference.
251 class InferenceBuilder {
252  public:
~InferenceBuilder()253   virtual ~InferenceBuilder() {}
254 
255   // Returns inference graph inputs and outputs definitions.
256   virtual std::vector<TensorObjectDef> inputs() const = 0;
257   virtual std::vector<TensorObjectDef> outputs() const = 0;
258 
259   // Sets new shape for the input if underlying implementation and graph
260   // structure allows dynamic tensors.
261   virtual absl::Status SetInputShape(int index,
262                                      const Dimensions& dimensions) = 0;
263 
264   // Updates object definitions for the given index. Implementation may allow
265   // to use different layouts and/or data type conversions between objects
266   // defined in a graph and given objects, for example:
267   //   input '0' is DataType::FLOAT32, DataLayout::BHWC.
268   //   A user, however, has an input in DataType::FLOAT16, DataLayout::PHWC4.
269   //   An implementation may allow this transformation to happen automatically
270   //   under the hood.
271   virtual absl::Status SetInputObjectDef(int index, ObjectDef def) = 0;
272   virtual absl::Status SetOutputObjectDef(int index, ObjectDef def) = 0;
SetAllInputObjectDefsTo(ObjectDef def)273   virtual absl::Status SetAllInputObjectDefsTo(ObjectDef def) {
274     auto input_defs = inputs();
275     for (int i = 0; i < input_defs.size(); ++i) {
276       RETURN_IF_ERROR(SetInputObjectDef(i, def));
277     }
278     return absl::OkStatus();
279   }
SetAllOutputObjectDefsTo(ObjectDef def)280   virtual absl::Status SetAllOutputObjectDefsTo(ObjectDef def) {
281     auto output_defs = outputs();
282     for (int i = 0; i < output_defs.size(); ++i) {
283       RETURN_IF_ERROR(SetOutputObjectDef(i, def));
284     }
285     return absl::OkStatus();
286   }
287 
288   // Creates new instance of the inference runner. InferenceBuilder stays valid
289   // and could be used to create another inference runner if needed.
290   //
291   // This method may take significant time to prepare new inference runner. For
292   // example, it may require to compile OpenGL shaders.
293   virtual absl::Status Build(std::unique_ptr<InferenceRunner>* runner) = 0;
294 };
295 
296 // Runs prepared inference. Every object marked as external needs to be set
297 // prior calling Run method.
298 class InferenceRunner {
299  public:
~InferenceRunner()300   virtual ~InferenceRunner() {}
301 
302   // Returns inference graph inputs and outputs definitions.
303   virtual std::vector<TensorObjectDef> inputs() const = 0;
304   virtual std::vector<TensorObjectDef> outputs() const = 0;
305 
306   // Getters provide access to underlying objects for the given index.
307   // Setters allow to set or change external object for the given index. Note,
308   // object need to match object definition set before in InferenceBuilder.
309 
310   virtual absl::Status GetInputObject(int index, TensorObject* object) = 0;
311   virtual absl::Status GetOutputObject(int index, TensorObject* object) = 0;
312   virtual absl::Status SetInputObject(int index, TensorObject object) = 0;
313   virtual absl::Status SetOutputObject(int index, TensorObject object) = 0;
314 
315   virtual absl::Status Run() = 0;
316 };
317 
318 // Encapsulated compilation/runtime tradeoffs.
319 enum class InferenceUsage {
320   UNKNOWN,
321 
322   // InferenceRunner will be used only once. Therefore, it is important to
323   // minimize bootstrap time as well.
324   FAST_SINGLE_ANSWER,
325 
326   // Prefer maximizing the throughput. Same inference runner will be used
327   // repeatedly on different inputs.
328   SUSTAINED_SPEED,
329 };
330 
331 // Defines aspects to control while instantiating a runner.
332 enum class InferencePriority {
333   UNKNOWN,
334 
335   AUTO,
336 
337   MIN_LATENCY,
338 
339   MAX_PRECISION,
340 
341   MIN_MEMORY_USAGE,
342 };
343 
344 struct InferenceOptions {
345   InferenceUsage usage = InferenceUsage::SUSTAINED_SPEED;
346 
347   // Ordered priorities provide better understanding of desired semantics,
348   // where priority(n) is more important than priority(n+1).
349   // AUTO priority is needed when a single priority is the most important
350   // factor. For example, priority1 = InferencePriority::MIN_LATENCY and leaving
351   // everything else to AUTO would result in configuration that achieves maximum
352   // performance.
353   //
354   // AUTO priority can only be used when higher priorities are fully specified.
355   // For example:
356   //   VALID:   priority1 = MIN_LATENCY, priority2 = AUTO, priority3 = AUTO
357   //   VALID:   priority1 = MIN_LATENCY, priority2 = MAX_PRECISION,
358   //            priority3 = AUTO
359   //   INVALID: priority1 = AUTO, priority2 = MIN_LATENCY, priority3 = AUTO
360   //   INVALID: priority1 = MIN_LATENCY, priority2 = AUTO,
361   //            priority3 = MAX_PRECISION
362   // Invalid priorities will result in error.
363   InferencePriority priority1 = InferencePriority::MAX_PRECISION;
364 
365   InferencePriority priority2 = InferencePriority::AUTO;
366 
367   InferencePriority priority3 = InferencePriority::AUTO;
368 };
369 
370 // Returns a position number for the priority. If priority is missing,
371 // then it would return 'max num priorities + 1'.
372 int GetPosition(const InferenceOptions& options, InferencePriority p);
373 
374 // Return true if options are valid.
375 bool IsValid(const InferenceOptions& options);
376 
377 // Resolves AUTO priorities and specifies them explicitly.
378 // Note, no-one should assume that these mappings will not change.
379 // Technically this function is declared here for code re-use purposes and
380 // by no means it should be treated as canonical way to resolve AUTO.
381 void ResolveAutoPriority(InferenceOptions* options);
382 
383 enum class PriorityImportance {
384   UNKNOWN,
385   HIGHER,
386   LOWER,
387 };
388 
389 // If both p1 and p2 are not present in options, return UNKNOWN
390 // If p1 is present, but p2 is not, return HIGHER
391 // If p2 is present, but p1 is not, return LOWER
392 // If both are present, and p1 is more important, return HIGHER, otherwise,
393 // LOWER.
394 PriorityImportance GetRelativeImportance(const InferenceOptions& options,
395                                          InferencePriority p1,
396                                          InferencePriority p2);
397 
398 }  // namespace gpu
399 }  // namespace tflite
400 
401 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_API_H_
402