1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
17 #define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
18 
19 #include <cstdint>
20 #include <string>
21 #include <vector>
22 
23 #include "tensorflow/lite/delegates/gpu/common/data_type.h"
24 
25 namespace tflite {
26 namespace gpu {
27 
28 // The VendorID returned by the GPU driver.
29 enum class GpuVendor {
30   kApple,
31   kQualcomm,
32   kMali,
33   kPowerVR,
34   kNvidia,
35   kAMD,
36   kIntel,
37   kUnknown
38 };
39 
40 enum class GpuApi {
41   kUnknown,
42   kOpenCl,
43   kMetal,
44   kVulkan,
45   kOpenGl,
46 };
47 
48 enum class AdrenoGpu {
49   // Adreno 6xx series
50   kAdreno685,
51   kAdreno680,
52   kAdreno675,
53   kAdreno650,
54   kAdreno640,
55   kAdreno630,
56   kAdreno620,
57   kAdreno618,
58   kAdreno616,
59   kAdreno615,
60   kAdreno612,
61   kAdreno610,
62   kAdreno605,
63   // Adreno 5xx series
64   kAdreno540,
65   kAdreno530,
66   kAdreno512,
67   kAdreno510,
68   kAdreno509,
69   kAdreno508,
70   kAdreno506,
71   kAdreno505,
72   kAdreno504,
73   // Adreno 4xx series
74   kAdreno430,
75   kAdreno420,
76   kAdreno418,
77   kAdreno405,
78   // Adreno 3xx series
79   kAdreno330,
80   kAdreno320,
81   kAdreno308,
82   kAdreno306,
83   kAdreno305,
84   kAdreno304,
85   // Adreno 2xx series
86   kAdreno225,
87   kAdreno220,
88   kAdreno205,
89   kAdreno203,
90   kAdreno200,
91   // Adreno 1xx series
92   kAdreno130,
93   kAdreno120,
94   kUnknown
95 };
96 
97 struct AdrenoInfo {
98   AdrenoInfo() = default;
99   explicit AdrenoInfo(const std::string& device_version);
100 
101   AdrenoGpu adreno_gpu;
102 
103   bool IsAdreno1xx() const;
104   bool IsAdreno2xx() const;
105   bool IsAdreno3xx() const;
106   bool IsAdreno4xx() const;
107   bool IsAdreno5xx() const;
108   bool IsAdreno6xx() const;
109   bool IsAdreno6xxOrHigher() const;
110 
111   // This function returns some not very documented physical parameter of
112   // Adreno6xx GPU.
113   // We obtained it using Snapdragon Profiler.
114   int GetMaximumWavesCount() const;
115 
116   // returns amount of register memory per CU(Compute Unit) in bytes.
117   int GetRegisterMemorySizePerComputeUnit() const;
118 
119   // returns maximum possible amount of waves based on register usage.
120   int GetMaximumWavesCount(int register_footprint_per_tread,
121                            bool full_wave = true) const;
122 
123   int GetWaveSize(bool full_wave) const;
124 
125   // Not supported on some Adreno devices with specific driver version.
126   // b/131099086
127   bool support_one_layer_texture_array = true;
128 
129   bool compiler_bugs_in_a6xx = false;
130 };
131 
132 enum class AppleGpu {
133   kUnknown,
134   kA7,
135   kA8,
136   kA8X,
137   kA9,
138   kA9X,
139   kA10,
140   kA10X,
141   kA11,
142   kA12,
143   kA12X,
144   kA12Z,
145   kA13,
146   kA14,
147 };
148 
149 struct AppleInfo {
150   AppleInfo() = default;
151   explicit AppleInfo(const std::string& gpu_description);
152   AppleGpu gpu_type;
153 
154   bool IsLocalMemoryPreferredOverGlobal() const;
155 
156   bool IsBionic() const;
157 
158   // floating point rounding mode
159   bool IsRoundToNearestSupported() const;
160 
161   int GetComputeUnitsCount() const;
162 };
163 
164 enum class MaliGpu {
165   kUnknown,
166   kT604,
167   kT622,
168   kT624,
169   kT628,
170   kT658,
171   kT678,
172   kT720,
173   kT760,
174   kT820,
175   kT830,
176   kT860,
177   kT880,
178   kG31,
179   kG51,
180   kG71,
181   kG52,
182   kG72,
183   kG76,
184   kG57,
185   kG77,
186   kG68,
187   kG78,
188 };
189 
190 struct MaliInfo {
191   MaliInfo() = default;
192   explicit MaliInfo(const std::string& gpu_description);
193   MaliGpu gpu_version;
194 
195   bool IsMaliT6xx() const;
196   bool IsMaliT7xx() const;
197   bool IsMaliT8xx() const;
198   bool IsMidgard() const;
199   bool IsBifrostGen1() const;
200   bool IsBifrostGen2() const;
201   bool IsBifrostGen3() const;
202   bool IsBifrost() const;
203   bool IsValhall() const;
204 };
205 
206 struct OpenGlInfo {
207   std::string renderer_name;
208   std::string vendor_name;
209   std::string version;
210   int major_version = -1;
211   int minor_version = -1;
212 
213   int max_image_units = 0;
214   int max_ssbo_bindings = 0;
215   int max_image_bindings = 0;
216   int max_work_group_invocations = 0;
217   int max_texture_size = 0;
218   int max_array_texture_layers = 0;
219 
220   std::vector<std::string> extensions;
221   int max_compute_work_group_size_x;
222   int max_compute_work_group_size_y;
223   int max_compute_work_group_size_z;
224 };
225 
226 struct VulkanInfo {
227   std::string vendor_name;
228   uint32_t api_version = -1;
229   uint32_t api_version_major = -1;
230   uint32_t api_version_minor = -1;
231   uint32_t api_version_patch = -1;
232 
233   uint32_t max_per_stage_descriptor_sampled_images = 0;
234   uint32_t max_compute_work_group_invocations;
235   uint32_t max_image_dimension_2d;
236   uint32_t max_image_array_layers;
237 
238   uint32_t subgroup_size = 0;
239   bool supports_subgroup_arithmetic = false;
240 
241   std::vector<std::string> extensions;
242   int max_compute_work_group_size_x;
243   int max_compute_work_group_size_y;
244   int max_compute_work_group_size_z;
245 };
246 
247 enum class OpenClVersion {
248   kCl1_0,
249   kCl1_1,
250   kCl1_2,
251   kCl2_0,
252   kCl2_1,
253   kCl2_2,
254   kCl3_0,
255   kUnknown,
256 };
257 std::string OpenClVersionToString(OpenClVersion version);
258 
259 struct OpenClInfo {
260   OpenClVersion cl_version;
261 
262   std::vector<std::string> extensions;
263   bool supports_fp16;
264   bool supports_image3d_writes;
265   bool supports_images;
266   int compute_units_count;
267   uint64_t buffer_max_size;
268   uint64_t image2d_max_width;
269   uint64_t image2d_max_height;
270   uint64_t image_buffer_max_size;
271   uint64_t image_array_max_layers;
272   uint64_t image3d_max_width;
273   uint64_t image3d_max_height;
274   uint64_t image3d_max_depth;
275   int max_work_group_size_x;
276   int max_work_group_size_y;
277   int max_work_group_size_z;
278   int max_work_group_total_size;
279 
280   // rtn is ROUND_TO_NEAREST
281   // with rtn precision is much better then with rtz (ROUND_TO_ZERO)
282   // Adreno 3xx supports only rtz, Adreno 4xx and more support rtn
283   // Mali from T6xx supports rtn
284   // PowerVR supports only rtz
285   bool supports_fp32_rtn;
286   bool supports_fp16_rtn;
287 
288   bool supports_r_f16_tex2d = false;
289   bool supports_rg_f16_tex2d = false;
290   bool supports_rgb_f16_tex2d = false;
291   bool supports_rgba_f16_tex2d = false;
292 
293   bool supports_r_f32_tex2d = false;
294   bool supports_rg_f32_tex2d = false;
295   bool supports_rgb_f32_tex2d = false;
296   bool supports_rgba_f32_tex2d = false;
297 };
298 
299 enum class MetalLanguageVersion {
300   kMetal1_0,
301   kMetal1_1,
302   kMetal1_2,
303   kMetal2_0,
304   kMetal2_1,
305   kMetal2_2,
306   kMetal2_3,
307   kUnknown,
308 };
309 
310 struct MetalInfo {
311   MetalLanguageVersion language_version;
312 
313   int max_work_group_size_x;
314   int max_work_group_size_y;
315   int max_work_group_size_z;
316 
317   uint64_t buffer_max_size;
318 };
319 
320 struct GpuInfo {
321   bool IsAdreno() const;
322   bool IsApple() const;
323   bool IsMali() const;
324   bool IsPowerVR() const;
325   bool IsNvidia() const;
326   bool IsAMD() const;
327   bool IsIntel() const;
328 
329   // floating point rounding mode
330   bool IsRoundToNearestSupported() const;
331 
332   bool SupportsFP16() const;
333 
334   bool SupportsImages() const;
335   bool SupportsTextureArray() const;
336   bool SupportsImageBuffer() const;
337   bool SupportsImage3D() const;
338 
339   // returns true if device have fixed wave size equal to 32
340   bool IsWaveSizeEqualTo32() const;
341   bool SupportsSubGroupWithSize(int sub_group_size) const;
342 
343   bool SupportsFloatImage2D(DataType data_type, int channels) const;
344   bool SupportsExtension(const std::string& extension) const;
345 
346   int GetComputeUnitsCount() const;
347 
348   int GetMaxImageArguments() const;
349 
350   int GetMaxWorkGroupSizeForX() const;
351   int GetMaxWorkGroupSizeForY() const;
352   int GetMaxWorkGroupSizeForZ() const;
353   int GetMaxWorkGroupTotalSize() const;
354 
355   uint64_t GetMaxImage2DWidth() const;
356   uint64_t GetMaxImage2DHeight() const;
357   uint64_t GetMaxImage2DArrayLayers() const;
358   uint64_t GetMaxImage3DWidth() const;
359   uint64_t GetMaxImage3DHeight() const;
360   uint64_t GetMaxImage3DDepth() const;
361   uint64_t GetMaxBufferSize() const;
362   uint64_t GetMaxImageBufferWidth() const;
363 
364   GpuVendor vendor = GpuVendor::kUnknown;
365   GpuApi gpu_api = GpuApi::kUnknown;
366 
367   std::vector<int> supported_subgroup_sizes;
368 
369   AdrenoInfo adreno_info;
370   AppleInfo apple_info;
371   MaliInfo mali_info;
372 
373   // OpenGL specific, gpu_api should be kOpenGl
374   OpenGlInfo opengl_info;
375   bool IsApiOpenGl() const;
376   bool IsApiOpenGl31OrAbove() const;
377 
378   // Vulkan specific, gpu_api should be kVulkan
379   VulkanInfo vulkan_info;
380   bool IsApiVulkan() const;
381 
382   MetalInfo metal_info;
383   bool IsApiMetal() const;
384 
385   OpenClInfo opencl_info;
386   bool IsApiOpenCl() const;
387   bool IsCL20OrHigher() const;
388   bool IsCL30OrHigher() const;
389 };
390 
391 // Currently it initializes:
392 // vendor
393 // AdrenoInfo if vendor is kQualcomm
394 // AppleInfo if vendor is kApple
395 // MaliInfo if vendor is kMali
396 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
397                                      GpuApi gpu_api, GpuInfo* gpu_info);
398 
399 }  // namespace gpu
400 }  // namespace tflite
401 
402 #endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_GPU_INFO_H_
403