1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17 
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
20 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
21 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
22 
23 namespace tflite {
24 namespace gpu {
25 namespace cl {
26 namespace {
27 
28 #ifndef EGL_VERSION_1_5
29 typedef void* EGLSync;
30 #define EGL_SYNC_CL_EVENT 0x30FE
31 #define EGL_CL_EVENT_HANDLE 0x309C
32 #define EGL_NO_SYNC 0
33 #endif /* EGL_VERSION_1_5 */
34 
35 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
36 // it should use KHR_cl_event2 extension. More details are in b/129974818.
37 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
38     EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
39 
40 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
41 
42 }  // namespace
43 
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)44 absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
45                                       EglSync* sync) {
46   if (!IsEglSyncFromClEventSupported()) {
47     return absl::UnimplementedError(
48         "CreateEglSyncFromClEvent is not supported");
49   }
50   EGLSync egl_sync;
51   const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
52                                   reinterpret_cast<EGLAttrib>(event), EGL_NONE};
53   RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
54                                       EGL_SYNC_CL_EVENT, attributes));
55   if (egl_sync == EGL_NO_SYNC) {
56     return absl::InternalError("Returned empty EGL sync");
57   }
58   *sync = EglSync(display, egl_sync);
59   return absl::OkStatus();
60 }
61 
IsEglSyncFromClEventSupported()62 bool IsEglSyncFromClEventSupported() {
63   // In C++11, static initializers are guaranteed to be evaluated only once.
64   static bool supported = []() -> bool {
65     // This function requires EGL 1.5 to work
66     g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
67         eglGetProcAddress("eglCreateSync"));
68     // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
69     if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
70       g_eglCreateSync = nullptr;
71     }
72     return (g_eglCreateSync != nullptr);
73   }();
74   return supported;
75 }
76 
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)77 absl::Status CreateClEventFromEglSync(cl_context context,
78                                       const EglSync& egl_sync, CLEvent* event) {
79   cl_int error_code;
80   cl_event new_event = clCreateEventFromEGLSyncKHR(
81       context, egl_sync.sync(), egl_sync.display(), &error_code);
82   if (error_code != CL_SUCCESS) {
83     return absl::InternalError(
84         absl::StrCat("Unable to create CL sync from EGL sync. ",
85                      CLErrorCodeToString(error_code)));
86   }
87   *event = CLEvent(new_event);
88   return absl::OkStatus();
89 }
90 
IsClEventFromEglSyncSupported(const CLDevice & device)91 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
92   return device.GetInfo().SupportsExtension("cl_khr_egl_event");
93 }
94 
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)95 absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
96                                         AccessType access_type,
97                                         CLContext* context, CLMemory* memory) {
98   cl_int error_code;
99   auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
100                                   gl_ssbo_id, &error_code);
101   if (error_code != CL_SUCCESS) {
102     return absl::InternalError(
103         absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
104                      CLErrorCodeToString(error_code)));
105   }
106   *memory = CLMemory(mem, true);
107   return absl::OkStatus();
108 }
109 
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)110 absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
111                                          GLuint texture_id,
112                                          AccessType access_type,
113                                          CLContext* context, CLMemory* memory) {
114   cl_int error_code;
115   auto mem =
116       clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
117                             texture_target, 0, texture_id, &error_code);
118   if (error_code != CL_SUCCESS) {
119     return absl::InternalError(
120         absl::StrCat("Unable to create CL buffer from GL texture. ",
121                      CLErrorCodeToString(error_code)));
122   }
123   *memory = CLMemory(mem, true);
124   return absl::OkStatus();
125 }
126 
IsGlSharingSupported(const CLDevice & device)127 bool IsGlSharingSupported(const CLDevice& device) {
128   return clCreateFromGLBuffer && clCreateFromGLTexture &&
129          device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
130 }
131 
~AcquiredGlObjects()132 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
133 
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)134 absl::Status AcquiredGlObjects::Acquire(
135     const std::vector<cl_mem>& memory, cl_command_queue queue,
136     const std::vector<cl_event>& wait_events, CLEvent* acquire_event,
137     AcquiredGlObjects* objects) {
138   if (!memory.empty()) {
139     cl_event new_event;
140     cl_int error_code = clEnqueueAcquireGLObjects(
141         queue, memory.size(), memory.data(), wait_events.size(),
142         wait_events.data(), acquire_event ? &new_event : nullptr);
143     if (error_code != CL_SUCCESS) {
144       return absl::InternalError(absl::StrCat("Unable to acquire GL object. ",
145                                               CLErrorCodeToString(error_code)));
146     }
147     if (acquire_event) {
148       *acquire_event = CLEvent(new_event);
149     }
150     clFlush(queue);
151   }
152   *objects = AcquiredGlObjects(memory, queue);
153   return absl::OkStatus();
154 }
155 
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)156 absl::Status AcquiredGlObjects::Release(
157     const std::vector<cl_event>& wait_events, CLEvent* release_event) {
158   if (queue_ && !memory_.empty()) {
159     cl_event new_event;
160     cl_int error_code = clEnqueueReleaseGLObjects(
161         queue_, memory_.size(), memory_.data(), wait_events.size(),
162         wait_events.data(), release_event ? &new_event : nullptr);
163     if (error_code != CL_SUCCESS) {
164       return absl::InternalError(absl::StrCat("Unable to release GL object. ",
165                                               CLErrorCodeToString(error_code)));
166     }
167     if (release_event) {
168       *release_event = CLEvent(new_event);
169     }
170     clFlush(queue_);
171     queue_ = nullptr;
172   }
173   return absl::OkStatus();
174 }
175 
GlInteropFabric(EGLDisplay egl_display,Environment * environment)176 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
177                                  Environment* environment)
178     : is_egl_sync_supported_(true),
179       is_egl_to_cl_mapping_supported_(
180           IsClEventFromEglSyncSupported(environment->device())),
181       is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
182       egl_display_(egl_display),
183       context_(environment->context().context()),
184       queue_(environment->queue()->queue()) {}
185 
RegisterMemory(cl_mem memory)186 void GlInteropFabric::RegisterMemory(cl_mem memory) {
187   memory_.push_back(memory);
188 }
189 
UnregisterMemory(cl_mem memory)190 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
191   auto it = std::find(memory_.begin(), memory_.end(), memory);
192   if (it != memory_.end()) {
193     memory_.erase(it);
194   }
195 }
196 
Start()197 absl::Status GlInteropFabric::Start() {
198   if (!is_enabled()) {
199     return absl::OkStatus();
200   }
201 
202   // In GL-CL interoperability, we need to make sure GL finished processing of
203   // all commands that might affect GL objects. There are a few ways:
204   //   a) glFinish
205   //      slow, but portable
206   //   b) EglSync + ClientWait
207   //      faster alternative for glFinish, but still slow as it stalls GPU
208   //      pipeline.
209   //   c) EglSync->CLEvent or GlSync->CLEvent mapping
210   //      Fast, as it allows to map sync to CL event and use it as a dependency
211   //      later without stalling GPU pipeline.
212   CLEvent inbound_event;
213   std::vector<cl_event> inbound_events;
214   if (is_egl_sync_supported_) {
215     EglSync sync;
216     RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
217     if (is_egl_to_cl_mapping_supported_) {
218       // (c) EglSync->CLEvent or GlSync->CLEvent mapping
219       glFlush();
220       RETURN_IF_ERROR(CreateClEventFromEglSync(context_, sync, &inbound_event));
221       inbound_events.push_back(inbound_event.event());
222     } else {
223       // (b) EglSync + ClientWait
224       RETURN_IF_ERROR(sync.ClientWait());
225     }
226   } else {
227     // (a) glFinish / GL fence sync
228     RETURN_IF_ERROR(gl::GlActiveSyncWait());
229   }
230 
231   // Acquire all GL objects needed while processing.
232   return AcquiredGlObjects::Acquire(memory_, queue_, inbound_events, nullptr,
233                                     &gl_objects_);
234 }
235 
Finish()236 absl::Status GlInteropFabric::Finish() {
237   if (!is_enabled()) {
238     return absl::OkStatus();
239   }
240   CLEvent outbound_event;
241   RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event));
242 
243   // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
244   //   EglSync egl_outbound_sync;
245   //   RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event.event(),
246   //                                            egl_display_,
247   //                                            &egl_outbound_sync));
248   //   // Instruct GL pipeline to wait until corresponding CL event is signaled.
249   //   RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
250   //   glFlush();
251   // } else {
252   //   // Slower option if proper sync is not supported. It is equivalent to
253   //   // clFinish, but, hopefully, faster.
254   //   outbound_event.Wait();
255   // }
256 
257   // This slow sync is the only working solution right now. We have to debug why
258   // above version is not working fast and reliable.
259   outbound_event.Wait();
260   return absl::OkStatus();
261 }
262 
GlClBufferCopier(const TensorObjectDef & input_def,const TensorObjectDef & output_def,Environment * environment)263 GlClBufferCopier::GlClBufferCopier(const TensorObjectDef& input_def,
264                                    const TensorObjectDef& output_def,
265                                    Environment* environment) {
266   queue_ = environment->queue();
267   size_in_bytes_ =
268       NumElements(input_def) * SizeOf(input_def.object_def.data_type);
269 }
270 
Convert(const TensorObject & input_obj,const TensorObject & output_obj)271 absl::Status GlClBufferCopier::Convert(const TensorObject& input_obj,
272                                        const TensorObject& output_obj) {
273   if (absl::holds_alternative<OpenGlBuffer>(input_obj)) {
274     auto ssbo = absl::get_if<OpenGlBuffer>(&input_obj);
275     auto cl_mem = absl::get_if<OpenClBuffer>(&output_obj);
276     RETURN_IF_ERROR(
277         TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
278     void* ptr;
279     RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
280                                        GL_SHADER_STORAGE_BUFFER, 0,
281                                        size_in_bytes_, GL_MAP_READ_BIT));
282     RETURN_IF_ERROR(
283         queue_->EnqueueWriteBuffer(cl_mem->memobj, size_in_bytes_, ptr));
284     RETURN_IF_ERROR(
285         TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
286   } else {
287     auto cl_mem = absl::get_if<OpenClBuffer>(&input_obj);
288     auto ssbo = absl::get_if<OpenGlBuffer>(&output_obj);
289     RETURN_IF_ERROR(
290         TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
291     void* ptr;
292     RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
293                                        GL_SHADER_STORAGE_BUFFER, 0,
294                                        size_in_bytes_, GL_MAP_WRITE_BIT));
295     RETURN_IF_ERROR(
296         queue_->EnqueueReadBuffer(cl_mem->memobj, size_in_bytes_, ptr));
297     RETURN_IF_ERROR(
298         TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
299   }
300   return absl::OkStatus();
301 }
302 
303 }  // namespace cl
304 }  // namespace gpu
305 }  // namespace tflite
306