1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h"
17 
18 #if defined(_WIN32)
19 #define __WINDOWS__
20 #endif
21 
22 #ifdef __WINDOWS__
23 #include <windows.h>
24 #else
25 #include <dlfcn.h>
26 #endif
27 
28 #include <string>
29 
30 #include "absl/strings/str_cat.h"
31 #include "tensorflow/lite/delegates/gpu/common/status.h"
32 
33 namespace tflite {
34 namespace gpu {
35 namespace cl {
36 
37 #ifdef __ANDROID__
38 #define LoadFunction(function)                                                 \
39   if (use_wrapper) {                                                           \
40     function = reinterpret_cast<PFN_##function>(loadOpenCLPointer(#function)); \
41   } else {                                                                     \
42     function = reinterpret_cast<PFN_##function>(dlsym(libopencl, #function));  \
43   }
44 #elif defined(__WINDOWS__)
45 #define LoadFunction(function) \
46   function =                   \
47       reinterpret_cast<PFN_##function>(GetProcAddress(libopencl, #function));
48 #else
49 #define LoadFunction(function) \
50   function = reinterpret_cast<PFN_##function>(dlsym(libopencl, #function));
51 #endif
52 
53 #ifdef __WINDOWS__
54 void LoadOpenCLFunctions(HMODULE libopencl);
55 #else
56 void LoadOpenCLFunctions(void* libopencl, bool use_wrapper);
57 #endif
58 
LoadOpenCL()59 absl::Status LoadOpenCL() {
60 #ifdef __WINDOWS__
61   HMODULE libopencl = LoadLibraryA("OpenCL.dll");
62   if (libopencl) {
63     LoadOpenCLFunctions(libopencl);
64     return absl::OkStatus();
65   } else {
66     DWORD error_code = GetLastError();
67     return absl::UnknownError(absl::StrCat(
68         "Can not open OpenCL library on this device, error code - ",
69         error_code));
70   }
71 #else
72   void* libopencl = nullptr;
73 #ifdef __ANDROID__
74   // Pixel phone or auto?
75   libopencl = dlopen("libOpenCL-pixel.so", RTLD_NOW | RTLD_LOCAL);
76   if (!libopencl) {
77     libopencl = dlopen("libOpenCL-car.so", RTLD_NOW | RTLD_LOCAL);
78   }
79   if (libopencl) {
80     typedef void (*enableOpenCL_t)();
81     enableOpenCL_t enableOpenCL =
82         reinterpret_cast<enableOpenCL_t>(dlsym(libopencl, "enableOpenCL"));
83     enableOpenCL();
84     LoadOpenCLFunctions(libopencl, true);
85     return absl::OkStatus();
86   }
87 #endif
88 #ifdef __APPLE__
89   static const char* kClLibName =
90       "/System/Library/Frameworks/OpenCL.framework/OpenCL";
91 #else
92   static const char* kClLibName = "libOpenCL.so";
93 #endif
94   libopencl = dlopen(kClLibName, RTLD_NOW | RTLD_LOCAL);
95   if (libopencl) {
96     LoadOpenCLFunctions(libopencl, false);
97     return absl::OkStatus();
98   }
99   // record error
100   std::string error(dlerror());
101   return absl::UnknownError(
102       absl::StrCat("Can not open OpenCL library on this device - ", error));
103 #endif
104 }
105 
106 #ifdef __WINDOWS__
LoadOpenCLFunctions(HMODULE libopencl)107 void LoadOpenCLFunctions(HMODULE libopencl) {
108 #else
109 void LoadOpenCLFunctions(void* libopencl, bool use_wrapper) {
110 #ifdef __ANDROID__
111   typedef void* (*loadOpenCLPointer_t)(const char* name);
112   loadOpenCLPointer_t loadOpenCLPointer;
113   if (use_wrapper) {
114     loadOpenCLPointer = reinterpret_cast<loadOpenCLPointer_t>(
115         dlsym(libopencl, "loadOpenCLPointer"));
116   }
117 #endif
118 #endif
119 
120   LoadFunction(clGetPlatformIDs);
121   LoadFunction(clGetPlatformInfo);
122   LoadFunction(clGetDeviceIDs);
123   LoadFunction(clGetDeviceInfo);
124   LoadFunction(clCreateSubDevices);
125   LoadFunction(clRetainDevice);
126   LoadFunction(clReleaseDevice);
127   LoadFunction(clCreateContext);
128   LoadFunction(clCreateContextFromType);
129   LoadFunction(clRetainContext);
130   LoadFunction(clReleaseContext);
131   LoadFunction(clGetContextInfo);
132   LoadFunction(clCreateCommandQueueWithProperties);
133   LoadFunction(clRetainCommandQueue);
134   LoadFunction(clReleaseCommandQueue);
135   LoadFunction(clGetCommandQueueInfo);
136   LoadFunction(clCreateBuffer);
137   LoadFunction(clCreateSubBuffer);
138   LoadFunction(clCreateImage);
139   LoadFunction(clCreatePipe);
140   LoadFunction(clRetainMemObject);
141   LoadFunction(clReleaseMemObject);
142   LoadFunction(clGetSupportedImageFormats);
143   LoadFunction(clGetMemObjectInfo);
144   LoadFunction(clGetImageInfo);
145   LoadFunction(clGetPipeInfo);
146   LoadFunction(clSetMemObjectDestructorCallback);
147   LoadFunction(clSVMAlloc);
148   LoadFunction(clSVMFree);
149   LoadFunction(clCreateSamplerWithProperties);
150   LoadFunction(clRetainSampler);
151   LoadFunction(clReleaseSampler);
152   LoadFunction(clGetSamplerInfo);
153   LoadFunction(clCreateProgramWithSource);
154   LoadFunction(clCreateProgramWithBinary);
155   LoadFunction(clCreateProgramWithBuiltInKernels);
156   LoadFunction(clRetainProgram);
157   LoadFunction(clReleaseProgram);
158   LoadFunction(clBuildProgram);
159   LoadFunction(clCompileProgram);
160   LoadFunction(clLinkProgram);
161   LoadFunction(clUnloadPlatformCompiler);
162   LoadFunction(clGetProgramInfo);
163   LoadFunction(clGetProgramBuildInfo);
164   LoadFunction(clCreateKernel);
165   LoadFunction(clCreateKernelsInProgram);
166   LoadFunction(clRetainKernel);
167   LoadFunction(clReleaseKernel);
168   LoadFunction(clSetKernelArg);
169   LoadFunction(clSetKernelArgSVMPointer);
170   LoadFunction(clSetKernelExecInfo);
171   LoadFunction(clGetKernelInfo);
172   LoadFunction(clGetKernelArgInfo);
173   LoadFunction(clGetKernelWorkGroupInfo);
174   LoadFunction(clWaitForEvents);
175   LoadFunction(clGetEventInfo);
176   LoadFunction(clCreateUserEvent);
177   LoadFunction(clRetainEvent);
178   LoadFunction(clReleaseEvent);
179   LoadFunction(clSetUserEventStatus);
180   LoadFunction(clSetEventCallback);
181   LoadFunction(clGetEventProfilingInfo);
182   LoadFunction(clFlush);
183   LoadFunction(clFinish);
184   LoadFunction(clEnqueueReadBuffer);
185   LoadFunction(clEnqueueReadBufferRect);
186   LoadFunction(clEnqueueWriteBuffer);
187   LoadFunction(clEnqueueWriteBufferRect);
188   LoadFunction(clEnqueueFillBuffer);
189   LoadFunction(clEnqueueCopyBuffer);
190   LoadFunction(clEnqueueCopyBufferRect);
191   LoadFunction(clEnqueueReadImage);
192   LoadFunction(clEnqueueWriteImage);
193   LoadFunction(clEnqueueFillImage);
194   LoadFunction(clEnqueueCopyImage);
195   LoadFunction(clEnqueueCopyImageToBuffer);
196   LoadFunction(clEnqueueCopyBufferToImage);
197   LoadFunction(clEnqueueMapBuffer);
198   LoadFunction(clEnqueueMapImage);
199   LoadFunction(clEnqueueUnmapMemObject);
200   LoadFunction(clEnqueueMigrateMemObjects);
201   LoadFunction(clEnqueueNDRangeKernel);
202   LoadFunction(clEnqueueNativeKernel);
203   LoadFunction(clEnqueueMarkerWithWaitList);
204   LoadFunction(clEnqueueBarrierWithWaitList);
205   LoadFunction(clEnqueueSVMFree);
206   LoadFunction(clEnqueueSVMMemcpy);
207   LoadFunction(clEnqueueSVMMemFill);
208   LoadFunction(clEnqueueSVMMap);
209   LoadFunction(clEnqueueSVMUnmap);
210   LoadFunction(clGetExtensionFunctionAddressForPlatform);
211   LoadFunction(clCreateImage2D);
212   LoadFunction(clCreateImage3D);
213   LoadFunction(clEnqueueMarker);
214   LoadFunction(clEnqueueWaitForEvents);
215   LoadFunction(clEnqueueBarrier);
216   LoadFunction(clUnloadCompiler);
217   LoadFunction(clGetExtensionFunctionAddress);
218   LoadFunction(clCreateCommandQueue);
219   LoadFunction(clCreateSampler);
220   LoadFunction(clEnqueueTask);
221 
222   // OpenGL sharing
223   LoadFunction(clCreateFromGLBuffer);
224   LoadFunction(clCreateFromGLTexture);
225   LoadFunction(clEnqueueAcquireGLObjects);
226   LoadFunction(clEnqueueReleaseGLObjects);
227 
228   // cl_khr_egl_event extension
229   LoadFunction(clCreateEventFromEGLSyncKHR);
230 
231   // EGL sharing
232   LoadFunction(clCreateFromEGLImageKHR);
233   LoadFunction(clEnqueueAcquireEGLObjectsKHR);
234   LoadFunction(clEnqueueReleaseEGLObjectsKHR);
235 }
236 
237 // No OpenCL support, do not set function addresses
238 PFN_clGetPlatformIDs clGetPlatformIDs;
239 PFN_clGetPlatformInfo clGetPlatformInfo;
240 PFN_clGetDeviceIDs clGetDeviceIDs;
241 PFN_clGetDeviceInfo clGetDeviceInfo;
242 PFN_clCreateSubDevices clCreateSubDevices;
243 PFN_clRetainDevice clRetainDevice;
244 PFN_clReleaseDevice clReleaseDevice;
245 PFN_clCreateContext clCreateContext;
246 PFN_clCreateContextFromType clCreateContextFromType;
247 PFN_clRetainContext clRetainContext;
248 PFN_clReleaseContext clReleaseContext;
249 PFN_clGetContextInfo clGetContextInfo;
250 PFN_clCreateCommandQueueWithProperties clCreateCommandQueueWithProperties;
251 PFN_clRetainCommandQueue clRetainCommandQueue;
252 PFN_clReleaseCommandQueue clReleaseCommandQueue;
253 PFN_clGetCommandQueueInfo clGetCommandQueueInfo;
254 PFN_clCreateBuffer clCreateBuffer;
255 PFN_clCreateSubBuffer clCreateSubBuffer;
256 PFN_clCreateImage clCreateImage;
257 PFN_clCreatePipe clCreatePipe;
258 PFN_clRetainMemObject clRetainMemObject;
259 PFN_clReleaseMemObject clReleaseMemObject;
260 PFN_clGetSupportedImageFormats clGetSupportedImageFormats;
261 PFN_clGetMemObjectInfo clGetMemObjectInfo;
262 PFN_clGetImageInfo clGetImageInfo;
263 PFN_clGetPipeInfo clGetPipeInfo;
264 PFN_clSetMemObjectDestructorCallback clSetMemObjectDestructorCallback;
265 PFN_clSVMAlloc clSVMAlloc;
266 PFN_clSVMFree clSVMFree;
267 PFN_clCreateSamplerWithProperties clCreateSamplerWithProperties;
268 PFN_clRetainSampler clRetainSampler;
269 PFN_clReleaseSampler clReleaseSampler;
270 PFN_clGetSamplerInfo clGetSamplerInfo;
271 PFN_clCreateProgramWithSource clCreateProgramWithSource;
272 PFN_clCreateProgramWithBinary clCreateProgramWithBinary;
273 PFN_clCreateProgramWithBuiltInKernels clCreateProgramWithBuiltInKernels;
274 PFN_clRetainProgram clRetainProgram;
275 PFN_clReleaseProgram clReleaseProgram;
276 PFN_clBuildProgram clBuildProgram;
277 PFN_clCompileProgram clCompileProgram;
278 PFN_clLinkProgram clLinkProgram;
279 PFN_clUnloadPlatformCompiler clUnloadPlatformCompiler;
280 PFN_clGetProgramInfo clGetProgramInfo;
281 PFN_clGetProgramBuildInfo clGetProgramBuildInfo;
282 PFN_clCreateKernel clCreateKernel;
283 PFN_clCreateKernelsInProgram clCreateKernelsInProgram;
284 PFN_clRetainKernel clRetainKernel;
285 PFN_clReleaseKernel clReleaseKernel;
286 PFN_clSetKernelArg clSetKernelArg;
287 PFN_clSetKernelArgSVMPointer clSetKernelArgSVMPointer;
288 PFN_clSetKernelExecInfo clSetKernelExecInfo;
289 PFN_clGetKernelInfo clGetKernelInfo;
290 PFN_clGetKernelArgInfo clGetKernelArgInfo;
291 PFN_clGetKernelWorkGroupInfo clGetKernelWorkGroupInfo;
292 PFN_clWaitForEvents clWaitForEvents;
293 PFN_clGetEventInfo clGetEventInfo;
294 PFN_clCreateUserEvent clCreateUserEvent;
295 PFN_clRetainEvent clRetainEvent;
296 PFN_clReleaseEvent clReleaseEvent;
297 PFN_clSetUserEventStatus clSetUserEventStatus;
298 PFN_clSetEventCallback clSetEventCallback;
299 PFN_clGetEventProfilingInfo clGetEventProfilingInfo;
300 PFN_clFlush clFlush;
301 PFN_clFinish clFinish;
302 PFN_clEnqueueReadBuffer clEnqueueReadBuffer;
303 PFN_clEnqueueReadBufferRect clEnqueueReadBufferRect;
304 PFN_clEnqueueWriteBuffer clEnqueueWriteBuffer;
305 PFN_clEnqueueWriteBufferRect clEnqueueWriteBufferRect;
306 PFN_clEnqueueFillBuffer clEnqueueFillBuffer;
307 PFN_clEnqueueCopyBuffer clEnqueueCopyBuffer;
308 PFN_clEnqueueCopyBufferRect clEnqueueCopyBufferRect;
309 PFN_clEnqueueReadImage clEnqueueReadImage;
310 PFN_clEnqueueWriteImage clEnqueueWriteImage;
311 PFN_clEnqueueFillImage clEnqueueFillImage;
312 PFN_clEnqueueCopyImage clEnqueueCopyImage;
313 PFN_clEnqueueCopyImageToBuffer clEnqueueCopyImageToBuffer;
314 PFN_clEnqueueCopyBufferToImage clEnqueueCopyBufferToImage;
315 PFN_clEnqueueMapBuffer clEnqueueMapBuffer;
316 PFN_clEnqueueMapImage clEnqueueMapImage;
317 PFN_clEnqueueUnmapMemObject clEnqueueUnmapMemObject;
318 PFN_clEnqueueMigrateMemObjects clEnqueueMigrateMemObjects;
319 PFN_clEnqueueNDRangeKernel clEnqueueNDRangeKernel;
320 PFN_clEnqueueNativeKernel clEnqueueNativeKernel;
321 PFN_clEnqueueMarkerWithWaitList clEnqueueMarkerWithWaitList;
322 PFN_clEnqueueBarrierWithWaitList clEnqueueBarrierWithWaitList;
323 PFN_clEnqueueSVMFree clEnqueueSVMFree;
324 PFN_clEnqueueSVMMemcpy clEnqueueSVMMemcpy;
325 PFN_clEnqueueSVMMemFill clEnqueueSVMMemFill;
326 PFN_clEnqueueSVMMap clEnqueueSVMMap;
327 PFN_clEnqueueSVMUnmap clEnqueueSVMUnmap;
328 PFN_clGetExtensionFunctionAddressForPlatform
329     clGetExtensionFunctionAddressForPlatform;
330 PFN_clCreateImage2D clCreateImage2D;
331 PFN_clCreateImage3D clCreateImage3D;
332 PFN_clEnqueueMarker clEnqueueMarker;
333 PFN_clEnqueueWaitForEvents clEnqueueWaitForEvents;
334 PFN_clEnqueueBarrier clEnqueueBarrier;
335 PFN_clUnloadCompiler clUnloadCompiler;
336 PFN_clGetExtensionFunctionAddress clGetExtensionFunctionAddress;
337 PFN_clCreateCommandQueue clCreateCommandQueue;
338 PFN_clCreateSampler clCreateSampler;
339 PFN_clEnqueueTask clEnqueueTask;
340 
341 // OpenGL sharing
342 PFN_clCreateFromGLBuffer clCreateFromGLBuffer;
343 PFN_clCreateFromGLTexture clCreateFromGLTexture;
344 PFN_clEnqueueAcquireGLObjects clEnqueueAcquireGLObjects;
345 PFN_clEnqueueReleaseGLObjects clEnqueueReleaseGLObjects;
346 
347 // cl_khr_egl_event extension
348 PFN_clCreateEventFromEGLSyncKHR clCreateEventFromEGLSyncKHR;
349 
350 // EGL sharing
351 PFN_clCreateFromEGLImageKHR clCreateFromEGLImageKHR;
352 PFN_clEnqueueAcquireEGLObjectsKHR clEnqueueAcquireEGLObjectsKHR;
353 PFN_clEnqueueReleaseEGLObjectsKHR clEnqueueReleaseEGLObjectsKHR;
354 
355 cl_mem CreateImage2DLegacy(cl_context context, cl_mem_flags flags,
356                            const cl_image_format* image_format,
357                            const cl_image_desc* image_desc, void* host_ptr,
358                            cl_int* errcode_ret) {
359   if (clCreateImage) {  // clCreateImage available since OpenCL 1.2
360     return clCreateImage(context, flags, image_format, image_desc, host_ptr,
361                          errcode_ret);
362   } else {
363     return clCreateImage2D(context, flags, image_format,
364                            image_desc->image_width, image_desc->image_height,
365                            image_desc->image_row_pitch, host_ptr, errcode_ret);
366   }
367 }
368 
369 cl_mem CreateImage3DLegacy(cl_context context, cl_mem_flags flags,
370                            const cl_image_format* image_format,
371                            const cl_image_desc* image_desc, void* host_ptr,
372                            cl_int* errcode_ret) {
373   if (clCreateImage) {  // clCreateImage available since OpenCL 1.2
374     return clCreateImage(context, flags, image_format, image_desc, host_ptr,
375                          errcode_ret);
376   } else {
377     return clCreateImage3D(context, flags, image_format,
378                            image_desc->image_width, image_desc->image_height,
379                            image_desc->image_depth, image_desc->image_row_pitch,
380                            image_desc->image_slice_pitch, host_ptr,
381                            errcode_ret);
382   }
383 }
384 }  // namespace cl
385 }  // namespace gpu
386 }  // namespace tflite
387