1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6
7 http://www.apache.org/licenses/LICENSE-2.0
8
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15
16 #include "tensorflow/lite/delegates/gpu/cl/gl_interop.h"
17
18 #include "absl/strings/str_cat.h"
19 #include "tensorflow/lite/delegates/gpu/cl/cl_errors.h"
20 #include "tensorflow/lite/delegates/gpu/gl/gl_call.h"
21 #include "tensorflow/lite/delegates/gpu/gl/gl_sync.h"
22
23 namespace tflite {
24 namespace gpu {
25 namespace cl {
26 namespace {
27
28 #ifndef EGL_VERSION_1_5
29 typedef void* EGLSync;
30 #define EGL_SYNC_CL_EVENT 0x30FE
31 #define EGL_CL_EVENT_HANDLE 0x309C
32 #define EGL_NO_SYNC 0
33 #endif /* EGL_VERSION_1_5 */
34
35 // TODO(b/131897059): replace with 64 version when EGL 1.5 is available.
36 // it should use KHR_cl_event2 extension. More details are in b/129974818.
37 using PFNEGLCREATESYNCPROC = EGLSync(EGLAPIENTRYP)(
38 EGLDisplay dpy, EGLenum type, const EGLAttrib* attrib_list);
39
40 PFNEGLCREATESYNCPROC g_eglCreateSync = nullptr;
41
42 } // namespace
43
CreateEglSyncFromClEvent(cl_event event,EGLDisplay display,EglSync * sync)44 absl::Status CreateEglSyncFromClEvent(cl_event event, EGLDisplay display,
45 EglSync* sync) {
46 if (!IsEglSyncFromClEventSupported()) {
47 return absl::UnimplementedError(
48 "CreateEglSyncFromClEvent is not supported");
49 }
50 EGLSync egl_sync;
51 const EGLAttrib attributes[] = {EGL_CL_EVENT_HANDLE,
52 reinterpret_cast<EGLAttrib>(event), EGL_NONE};
53 RETURN_IF_ERROR(TFLITE_GPU_CALL_EGL(g_eglCreateSync, &egl_sync, display,
54 EGL_SYNC_CL_EVENT, attributes));
55 if (egl_sync == EGL_NO_SYNC) {
56 return absl::InternalError("Returned empty EGL sync");
57 }
58 *sync = EglSync(display, egl_sync);
59 return absl::OkStatus();
60 }
61
IsEglSyncFromClEventSupported()62 bool IsEglSyncFromClEventSupported() {
63 // In C++11, static initializers are guaranteed to be evaluated only once.
64 static bool supported = []() -> bool {
65 // This function requires EGL 1.5 to work
66 g_eglCreateSync = reinterpret_cast<PFNEGLCREATESYNCPROC>(
67 eglGetProcAddress("eglCreateSync"));
68 // eglQueryString accepts EGL_NO_DISPLAY only starting EGL 1.5
69 if (!eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS)) {
70 g_eglCreateSync = nullptr;
71 }
72 return (g_eglCreateSync != nullptr);
73 }();
74 return supported;
75 }
76
CreateClEventFromEglSync(cl_context context,const EglSync & egl_sync,CLEvent * event)77 absl::Status CreateClEventFromEglSync(cl_context context,
78 const EglSync& egl_sync, CLEvent* event) {
79 cl_int error_code;
80 cl_event new_event = clCreateEventFromEGLSyncKHR(
81 context, egl_sync.sync(), egl_sync.display(), &error_code);
82 if (error_code != CL_SUCCESS) {
83 return absl::InternalError(
84 absl::StrCat("Unable to create CL sync from EGL sync. ",
85 CLErrorCodeToString(error_code)));
86 }
87 *event = CLEvent(new_event);
88 return absl::OkStatus();
89 }
90
IsClEventFromEglSyncSupported(const CLDevice & device)91 bool IsClEventFromEglSyncSupported(const CLDevice& device) {
92 return device.GetInfo().SupportsExtension("cl_khr_egl_event");
93 }
94
CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,AccessType access_type,CLContext * context,CLMemory * memory)95 absl::Status CreateClMemoryFromGlBuffer(GLuint gl_ssbo_id,
96 AccessType access_type,
97 CLContext* context, CLMemory* memory) {
98 cl_int error_code;
99 auto mem = clCreateFromGLBuffer(context->context(), ToClMemFlags(access_type),
100 gl_ssbo_id, &error_code);
101 if (error_code != CL_SUCCESS) {
102 return absl::InternalError(
103 absl::StrCat("Unable to acquire CL buffer from GL buffer. ",
104 CLErrorCodeToString(error_code)));
105 }
106 *memory = CLMemory(mem, true);
107 return absl::OkStatus();
108 }
109
CreateClMemoryFromGlTexture(GLenum texture_target,GLuint texture_id,AccessType access_type,CLContext * context,CLMemory * memory)110 absl::Status CreateClMemoryFromGlTexture(GLenum texture_target,
111 GLuint texture_id,
112 AccessType access_type,
113 CLContext* context, CLMemory* memory) {
114 cl_int error_code;
115 auto mem =
116 clCreateFromGLTexture(context->context(), ToClMemFlags(access_type),
117 texture_target, 0, texture_id, &error_code);
118 if (error_code != CL_SUCCESS) {
119 return absl::InternalError(
120 absl::StrCat("Unable to create CL buffer from GL texture. ",
121 CLErrorCodeToString(error_code)));
122 }
123 *memory = CLMemory(mem, true);
124 return absl::OkStatus();
125 }
126
IsGlSharingSupported(const CLDevice & device)127 bool IsGlSharingSupported(const CLDevice& device) {
128 return clCreateFromGLBuffer && clCreateFromGLTexture &&
129 device.GetInfo().SupportsExtension("cl_khr_gl_sharing");
130 }
131
~AcquiredGlObjects()132 AcquiredGlObjects::~AcquiredGlObjects() { Release({}, nullptr).IgnoreError(); }
133
Acquire(const std::vector<cl_mem> & memory,cl_command_queue queue,const std::vector<cl_event> & wait_events,CLEvent * acquire_event,AcquiredGlObjects * objects)134 absl::Status AcquiredGlObjects::Acquire(
135 const std::vector<cl_mem>& memory, cl_command_queue queue,
136 const std::vector<cl_event>& wait_events, CLEvent* acquire_event,
137 AcquiredGlObjects* objects) {
138 if (!memory.empty()) {
139 cl_event new_event;
140 cl_int error_code = clEnqueueAcquireGLObjects(
141 queue, memory.size(), memory.data(), wait_events.size(),
142 wait_events.data(), acquire_event ? &new_event : nullptr);
143 if (error_code != CL_SUCCESS) {
144 return absl::InternalError(absl::StrCat("Unable to acquire GL object. ",
145 CLErrorCodeToString(error_code)));
146 }
147 if (acquire_event) {
148 *acquire_event = CLEvent(new_event);
149 }
150 clFlush(queue);
151 }
152 *objects = AcquiredGlObjects(memory, queue);
153 return absl::OkStatus();
154 }
155
Release(const std::vector<cl_event> & wait_events,CLEvent * release_event)156 absl::Status AcquiredGlObjects::Release(
157 const std::vector<cl_event>& wait_events, CLEvent* release_event) {
158 if (queue_ && !memory_.empty()) {
159 cl_event new_event;
160 cl_int error_code = clEnqueueReleaseGLObjects(
161 queue_, memory_.size(), memory_.data(), wait_events.size(),
162 wait_events.data(), release_event ? &new_event : nullptr);
163 if (error_code != CL_SUCCESS) {
164 return absl::InternalError(absl::StrCat("Unable to release GL object. ",
165 CLErrorCodeToString(error_code)));
166 }
167 if (release_event) {
168 *release_event = CLEvent(new_event);
169 }
170 clFlush(queue_);
171 queue_ = nullptr;
172 }
173 return absl::OkStatus();
174 }
175
GlInteropFabric(EGLDisplay egl_display,Environment * environment)176 GlInteropFabric::GlInteropFabric(EGLDisplay egl_display,
177 Environment* environment)
178 : is_egl_sync_supported_(true),
179 is_egl_to_cl_mapping_supported_(
180 IsClEventFromEglSyncSupported(environment->device())),
181 is_cl_to_egl_mapping_supported_(IsEglSyncFromClEventSupported()),
182 egl_display_(egl_display),
183 context_(environment->context().context()),
184 queue_(environment->queue()->queue()) {}
185
RegisterMemory(cl_mem memory)186 void GlInteropFabric::RegisterMemory(cl_mem memory) {
187 memory_.push_back(memory);
188 }
189
UnregisterMemory(cl_mem memory)190 void GlInteropFabric::UnregisterMemory(cl_mem memory) {
191 auto it = std::find(memory_.begin(), memory_.end(), memory);
192 if (it != memory_.end()) {
193 memory_.erase(it);
194 }
195 }
196
Start()197 absl::Status GlInteropFabric::Start() {
198 if (!is_enabled()) {
199 return absl::OkStatus();
200 }
201
202 // In GL-CL interoperability, we need to make sure GL finished processing of
203 // all commands that might affect GL objects. There are a few ways:
204 // a) glFinish
205 // slow, but portable
206 // b) EglSync + ClientWait
207 // faster alternative for glFinish, but still slow as it stalls GPU
208 // pipeline.
209 // c) EglSync->CLEvent or GlSync->CLEvent mapping
210 // Fast, as it allows to map sync to CL event and use it as a dependency
211 // later without stalling GPU pipeline.
212 CLEvent inbound_event;
213 std::vector<cl_event> inbound_events;
214 if (is_egl_sync_supported_) {
215 EglSync sync;
216 RETURN_IF_ERROR(EglSync::NewFence(egl_display_, &sync));
217 if (is_egl_to_cl_mapping_supported_) {
218 // (c) EglSync->CLEvent or GlSync->CLEvent mapping
219 glFlush();
220 RETURN_IF_ERROR(CreateClEventFromEglSync(context_, sync, &inbound_event));
221 inbound_events.push_back(inbound_event.event());
222 } else {
223 // (b) EglSync + ClientWait
224 RETURN_IF_ERROR(sync.ClientWait());
225 }
226 } else {
227 // (a) glFinish / GL fence sync
228 RETURN_IF_ERROR(gl::GlActiveSyncWait());
229 }
230
231 // Acquire all GL objects needed while processing.
232 return AcquiredGlObjects::Acquire(memory_, queue_, inbound_events, nullptr,
233 &gl_objects_);
234 }
235
Finish()236 absl::Status GlInteropFabric::Finish() {
237 if (!is_enabled()) {
238 return absl::OkStatus();
239 }
240 CLEvent outbound_event;
241 RETURN_IF_ERROR(gl_objects_.Release({}, &outbound_event));
242
243 // if (is_egl_sync_supported_ && is_cl_to_egl_mapping_supported_) {
244 // EglSync egl_outbound_sync;
245 // RETURN_IF_ERROR(CreateEglSyncFromClEvent(outbound_event.event(),
246 // egl_display_,
247 // &egl_outbound_sync));
248 // // Instruct GL pipeline to wait until corresponding CL event is signaled.
249 // RETURN_IF_ERROR(egl_outbound_sync.ServerWait());
250 // glFlush();
251 // } else {
252 // // Slower option if proper sync is not supported. It is equivalent to
253 // // clFinish, but, hopefully, faster.
254 // outbound_event.Wait();
255 // }
256
257 // This slow sync is the only working solution right now. We have to debug why
258 // above version is not working fast and reliable.
259 outbound_event.Wait();
260 return absl::OkStatus();
261 }
262
GlClBufferCopier(const TensorObjectDef & input_def,const TensorObjectDef & output_def,Environment * environment)263 GlClBufferCopier::GlClBufferCopier(const TensorObjectDef& input_def,
264 const TensorObjectDef& output_def,
265 Environment* environment) {
266 queue_ = environment->queue();
267 size_in_bytes_ =
268 NumElements(input_def) * SizeOf(input_def.object_def.data_type);
269 }
270
Convert(const TensorObject & input_obj,const TensorObject & output_obj)271 absl::Status GlClBufferCopier::Convert(const TensorObject& input_obj,
272 const TensorObject& output_obj) {
273 if (absl::holds_alternative<OpenGlBuffer>(input_obj)) {
274 auto ssbo = absl::get_if<OpenGlBuffer>(&input_obj);
275 auto cl_mem = absl::get_if<OpenClBuffer>(&output_obj);
276 RETURN_IF_ERROR(
277 TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
278 void* ptr;
279 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
280 GL_SHADER_STORAGE_BUFFER, 0,
281 size_in_bytes_, GL_MAP_READ_BIT));
282 RETURN_IF_ERROR(
283 queue_->EnqueueWriteBuffer(cl_mem->memobj, size_in_bytes_, ptr));
284 RETURN_IF_ERROR(
285 TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
286 } else {
287 auto cl_mem = absl::get_if<OpenClBuffer>(&input_obj);
288 auto ssbo = absl::get_if<OpenGlBuffer>(&output_obj);
289 RETURN_IF_ERROR(
290 TFLITE_GPU_CALL_GL(glBindBuffer, GL_SHADER_STORAGE_BUFFER, ssbo->id));
291 void* ptr;
292 RETURN_IF_ERROR(TFLITE_GPU_CALL_GL(glMapBufferRange, &ptr,
293 GL_SHADER_STORAGE_BUFFER, 0,
294 size_in_bytes_, GL_MAP_WRITE_BIT));
295 RETURN_IF_ERROR(
296 queue_->EnqueueReadBuffer(cl_mem->memobj, size_in_bytes_, ptr));
297 RETURN_IF_ERROR(
298 TFLITE_GPU_CALL_GL(glUnmapBuffer, GL_SHADER_STORAGE_BUFFER));
299 }
300 return absl::OkStatus();
301 }
302
303 } // namespace cl
304 } // namespace gpu
305 } // namespace tflite
306