1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/cl_device.h"
17 
18 #include <algorithm>
19 #include <string>
20 #include <vector>
21 
22 #include "absl/strings/numbers.h"
23 #include "absl/strings/str_cat.h"
24 #include "absl/strings/str_split.h"
25 #include "tensorflow/lite/delegates/gpu/cl/util.h"
26 #include "tensorflow/lite/delegates/gpu/common/status.h"
27 #include "tensorflow/lite/experimental/acceleration/compatibility/android_info.h"
28 
29 namespace tflite {
30 namespace gpu {
31 namespace cl {
32 
33 template <>
GetDeviceInfo(cl_device_id id,cl_device_info info)34 std::string GetDeviceInfo<std::string>(cl_device_id id, cl_device_info info) {
35   size_t size;
36   cl_int error = clGetDeviceInfo(id, info, 0, nullptr, &size);
37   if (error != CL_SUCCESS) {
38     return "";
39   }
40 
41   std::string result(size - 1, 0);
42   error = clGetDeviceInfo(id, info, size, &result[0], nullptr);
43   if (error != CL_SUCCESS) {
44     return "";
45   }
46   return result;
47 }
48 
49 namespace {
50 template <typename T>
GetPlatformInfo(cl_platform_id id,cl_platform_info info)51 T GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
52   T result;
53   cl_int error = clGetPlatformInfo(id, info, sizeof(T), &result, nullptr);
54   if (error != CL_SUCCESS) {
55     return -1;
56   }
57   return result;
58 }
59 
GetPlatformInfo(cl_platform_id id,cl_platform_info info)60 std::string GetPlatformInfo(cl_platform_id id, cl_platform_info info) {
61   size_t size;
62   cl_int error = clGetPlatformInfo(id, info, 0, nullptr, &size);
63   if (error != CL_SUCCESS) {
64     return "";
65   }
66 
67   std::string result(size - 1, 0);
68   error = clGetPlatformInfo(id, info, size, &result[0], nullptr);
69   if (error != CL_SUCCESS) {
70     return "";
71   }
72   return result;
73 }
74 
GetDeviceWorkDimsSizes(cl_device_id id,int3 * result)75 void GetDeviceWorkDimsSizes(cl_device_id id, int3* result) {
76   int dims_count =
77       GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS);
78   if (dims_count < 3) {
79     return;
80   }
81   std::vector<size_t> limits(dims_count);
82   cl_int error =
83       clGetDeviceInfo(id, CL_DEVICE_MAX_WORK_ITEM_SIZES,
84                       sizeof(size_t) * dims_count, limits.data(), nullptr);
85   if (error != CL_SUCCESS) {
86     return;
87   }
88   // dims_count must be at least 3 according to spec
89   result->x = limits[0];
90   result->y = limits[1];
91   result->z = limits[2];
92 }
93 
ParseCLVersion(const std::string & version)94 OpenClVersion ParseCLVersion(const std::string& version) {
95   const auto first_dot_pos = version.find_first_of('.');
96   if (first_dot_pos == std::string::npos) {
97     return OpenClVersion::kCl1_0;
98   }
99   const int major = version[first_dot_pos - 1] - '0';
100   const int minor = version[first_dot_pos + 1] - '0';
101 
102   if (major == 1) {
103     if (minor == 2) {
104       return OpenClVersion::kCl1_2;
105     } else if (minor == 1) {
106       return OpenClVersion::kCl1_1;
107     } else {
108       return OpenClVersion::kCl1_0;
109     }
110   } else if (major == 2) {
111     if (minor == 2) {
112       return OpenClVersion::kCl2_2;
113     } else if (minor == 1) {
114       return OpenClVersion::kCl2_1;
115     } else {
116       return OpenClVersion::kCl2_0;
117     }
118   } else if (major == 3) {
119     return OpenClVersion::kCl3_0;
120   } else {
121     return OpenClVersion::kCl1_0;
122   }
123 }
124 
125 // check that gpu_version belong to range min_version-max_version
126 // min_version is included and max_version is excluded.
IsGPUVersionInRange(int gpu_version,int min_version,int max_version)127 bool IsGPUVersionInRange(int gpu_version, int min_version, int max_version) {
128   return gpu_version >= min_version && gpu_version < max_version;
129 }
130 
GpuInfoFromDeviceID(cl_device_id id)131 GpuInfo GpuInfoFromDeviceID(cl_device_id id) {
132   GpuInfo info;
133   const auto device_name = GetDeviceInfo<std::string>(id, CL_DEVICE_NAME);
134   const auto vendor_name = GetDeviceInfo<std::string>(id, CL_DEVICE_VENDOR);
135   const auto opencl_c_version =
136       GetDeviceInfo<std::string>(id, CL_DEVICE_OPENCL_C_VERSION);
137   const std::string gpu_description =
138       absl::StrCat(device_name, " ", vendor_name, " ", opencl_c_version);
139   GetGpuInfoFromDeviceDescription(gpu_description, GpuApi::kOpenCl, &info);
140   info.opencl_info.cl_version = ParseCLVersion(opencl_c_version);
141   info.opencl_info.extensions =
142       absl::StrSplit(GetDeviceInfo<std::string>(id, CL_DEVICE_EXTENSIONS), ' ');
143   info.opencl_info.supports_fp16 = false;
144   info.opencl_info.supports_image3d_writes = false;
145   for (const auto& ext : info.opencl_info.extensions) {
146     if (ext == "cl_khr_fp16") {
147       info.opencl_info.supports_fp16 = true;
148     }
149     if (ext == "cl_khr_3d_image_writes") {
150       info.opencl_info.supports_image3d_writes = true;
151     }
152   }
153 
154   info.opencl_info.supports_images =
155       GetDeviceInfo<cl_bool>(id, CL_DEVICE_IMAGE_SUPPORT);
156 
157   cl_device_fp_config f32_config =
158       GetDeviceInfo<cl_device_fp_config>(id, CL_DEVICE_SINGLE_FP_CONFIG);
159   info.opencl_info.supports_fp32_rtn = f32_config & CL_FP_ROUND_TO_NEAREST;
160 
161   if (info.opencl_info.supports_fp16) {
162     cl_device_fp_config f16_config;
163     auto status = GetDeviceInfo<cl_device_fp_config>(
164         id, CL_DEVICE_HALF_FP_CONFIG, &f16_config);
165     // AMD supports cl_khr_fp16 but CL_DEVICE_HALF_FP_CONFIG is empty.
166     if (status.ok() && !info.IsAMD()) {
167       info.opencl_info.supports_fp16_rtn = f16_config & CL_FP_ROUND_TO_NEAREST;
168     } else {  // happens on PowerVR
169       f16_config = f32_config;
170       info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
171     }
172   } else {
173     info.opencl_info.supports_fp16_rtn = false;
174   }
175 
176   if (info.IsPowerVR() && !info.opencl_info.supports_fp16) {
177     // PowerVR doesn't have full support of fp16 and so doesn't list this
178     // extension. But it can support fp16 in MADs and as buffers/textures types,
179     // so we will use it.
180     info.opencl_info.supports_fp16 = true;
181     info.opencl_info.supports_fp16_rtn = info.opencl_info.supports_fp32_rtn;
182   }
183 
184   if (!info.opencl_info.supports_image3d_writes &&
185       ((info.IsAdreno() && info.adreno_info.IsAdreno4xx()) ||
186        info.IsNvidia())) {
187     // in local tests Adreno 430 can write in image 3d, at least on small sizes,
188     // but it doesn't have cl_khr_3d_image_writes in list of available
189     // extensions
190     // The same for NVidia
191     info.opencl_info.supports_image3d_writes = true;
192   }
193   info.opencl_info.compute_units_count =
194       GetDeviceInfo<cl_uint>(id, CL_DEVICE_MAX_COMPUTE_UNITS);
195   info.opencl_info.image2d_max_width =
196       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_WIDTH);
197   info.opencl_info.image2d_max_height =
198       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
199   info.opencl_info.buffer_max_size =
200       GetDeviceInfo<cl_ulong>(id, CL_DEVICE_MAX_MEM_ALLOC_SIZE);
201   if (info.opencl_info.cl_version >= OpenClVersion::kCl1_2) {
202     info.opencl_info.image_buffer_max_size =
203         GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE);
204     info.opencl_info.image_array_max_layers =
205         GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE);
206   }
207   info.opencl_info.image3d_max_width =
208       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_WIDTH);
209   info.opencl_info.image3d_max_height =
210       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE2D_MAX_HEIGHT);
211   info.opencl_info.image3d_max_depth =
212       GetDeviceInfo<size_t>(id, CL_DEVICE_IMAGE3D_MAX_DEPTH);
213   int3 max_work_group_sizes;
214   GetDeviceWorkDimsSizes(id, &max_work_group_sizes);
215   info.opencl_info.max_work_group_size_x = max_work_group_sizes.x;
216   info.opencl_info.max_work_group_size_y = max_work_group_sizes.y;
217   info.opencl_info.max_work_group_size_z = max_work_group_sizes.z;
218   info.opencl_info.max_work_group_total_size =
219       GetDeviceInfo<size_t>(id, CL_DEVICE_MAX_WORK_GROUP_SIZE);
220 
221   if (info.IsIntel()) {
222     if (info.SupportsExtension("cl_intel_required_subgroup_size")) {
223       size_t sub_groups_count;
224       cl_int status =
225           clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/, 0,
226                           nullptr, &sub_groups_count);
227       if (status == CL_SUCCESS) {
228         std::vector<size_t> sub_group_sizes(sub_groups_count);
229         status = clGetDeviceInfo(id, 0x4108 /*CL_DEVICE_SUB_GROUP_SIZES_INTEL*/,
230                                  sizeof(size_t) * sub_groups_count,
231                                  sub_group_sizes.data(), nullptr);
232         if (status == CL_SUCCESS) {
233           for (int i = 0; i < sub_groups_count; ++i) {
234             info.supported_subgroup_sizes.push_back(sub_group_sizes[i]);
235           }
236         }
237       }
238     }
239   }
240   return info;
241 }
242 
243 }  // namespace
244 
CLDevice(cl_device_id id,cl_platform_id platform_id)245 CLDevice::CLDevice(cl_device_id id, cl_platform_id platform_id)
246     : info_(GpuInfoFromDeviceID(id)), id_(id), platform_id_(platform_id) {
247   if (info_.IsAdreno() &&
248       info_.adreno_info.adreno_gpu == AdrenoGpu::kAdreno630) {
249     acceleration::AndroidInfo android_info;
250     if (acceleration::RequestAndroidInfo(&android_info).ok()) {
251       info_.adreno_info.compiler_bugs_in_a6xx =
252           android_info.android_sdk_version == "26";
253     }
254   }
255 }
256 
CLDevice(const CLDevice & device)257 CLDevice::CLDevice(const CLDevice& device)
258     : info_(device.info_), id_(device.id_), platform_id_(device.platform_id_) {}
259 
operator =(const CLDevice & device)260 CLDevice& CLDevice::operator=(const CLDevice& device) {
261   if (this != &device) {
262     info_ = device.info_;
263     id_ = device.id_;
264     platform_id_ = device.platform_id_;
265   }
266   return *this;
267 }
268 
CLDevice(CLDevice && device)269 CLDevice::CLDevice(CLDevice&& device)
270     : info_(std::move(device.info_)),
271       id_(device.id_),
272       platform_id_(device.platform_id_) {
273   device.id_ = nullptr;
274   device.platform_id_ = nullptr;
275 }
276 
operator =(CLDevice && device)277 CLDevice& CLDevice::operator=(CLDevice&& device) {
278   if (this != &device) {
279     id_ = nullptr;
280     platform_id_ = nullptr;
281     info_ = std::move(device.info_);
282     std::swap(id_, device.id_);
283     std::swap(platform_id_, device.platform_id_);
284   }
285   return *this;
286 }
287 
GetPlatformVersion() const288 std::string CLDevice::GetPlatformVersion() const {
289   return GetPlatformInfo(platform_id_, CL_PLATFORM_VERSION);
290 }
291 
DisableOneLayerTextureArray()292 void CLDevice::DisableOneLayerTextureArray() {
293   info_.adreno_info.support_one_layer_texture_array = false;
294 }
295 
CreateDefaultGPUDevice(CLDevice * result)296 absl::Status CreateDefaultGPUDevice(CLDevice* result) {
297   cl_uint num_platforms;
298   clGetPlatformIDs(0, nullptr, &num_platforms);
299   if (num_platforms == 0) {
300     return absl::UnknownError("No supported OpenCL platform.");
301   }
302   std::vector<cl_platform_id> platforms(num_platforms);
303   clGetPlatformIDs(num_platforms, platforms.data(), nullptr);
304 
305   cl_platform_id platform_id = platforms[0];
306   cl_uint num_devices;
307   clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, 0, nullptr, &num_devices);
308   if (num_devices == 0) {
309     return absl::UnknownError("No GPU on current platform.");
310   }
311 
312   std::vector<cl_device_id> devices(num_devices);
313   clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_GPU, num_devices, devices.data(),
314                  nullptr);
315 
316   *result = CLDevice(devices[0], platform_id);
317   return absl::OkStatus();
318 }
319 
320 }  // namespace cl
321 }  // namespace gpu
322 }  // namespace tflite
323