1 //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// This file defines the standard OpenCL implementation of the Acxxel API.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #include "acxxel.h"
14
15 #include "CL/cl.h"
16
17 #include <mutex>
18 #include <sstream>
19 #include <utility>
20 #include <vector>
21
22 namespace acxxel {
23
24 namespace {
25
26 /// An ID containing the platform ID and the device ID within the platform.
27 struct FullDeviceID {
28 cl_platform_id PlatformID;
29 cl_device_id DeviceID;
30
FullDeviceIDacxxel::__anonacf1525c0111::FullDeviceID31 FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
32 : PlatformID(PlatformID), DeviceID(DeviceID) {}
33 };
34
getOpenCLErrorMessage(cl_int Result)35 static std::string getOpenCLErrorMessage(cl_int Result) {
36 if (!Result)
37 return "success";
38 std::ostringstream OutStream;
39 OutStream << "OpenCL error: code = " << Result;
40 return OutStream.str();
41 }
42
getOpenCLError(cl_int Result,const std::string & Message)43 static Status getOpenCLError(cl_int Result, const std::string &Message) {
44 if (!Result)
45 return Status();
46 std::ostringstream OutStream;
47 OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
48 return Status(OutStream.str());
49 }
50
logOpenCLWarning(cl_int Result,const std::string & Message)51 static void logOpenCLWarning(cl_int Result, const std::string &Message) {
52 if (Result) {
53 std::ostringstream OutStream;
54 OutStream << Message << ": " << getOpenCLErrorMessage(Result);
55 logWarning(OutStream.str());
56 }
57 }
58
59 class OpenCLPlatform : public Platform {
60 public:
61 ~OpenCLPlatform() override = default;
62
63 static Expected<OpenCLPlatform> create();
64
65 Expected<int> getDeviceCount() override;
66
67 Expected<Stream> createStream(int DeviceIndex) override;
68
69 Expected<Event> createEvent(int DeviceIndex) override;
70
71 Expected<Program> createProgramFromSource(Span<const char> Source,
72 int DeviceIndex) override;
73
74 protected:
75 Status streamSync(void *Stream) override;
76
77 Status streamWaitOnEvent(void *Stream, void *Event) override;
78
79 Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
80 HandleDestructor getDeviceMemoryHandleDestructor() override;
81 void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
82 size_t ByteOffset) override;
83 void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
84
85 Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
86 int DeviceIndex) override;
87 Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
88 int DeviceIndex) override;
89
90 Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
91 HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
92
93 Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
94 HandleDestructor getFreeHostMemoryHandleDestructor() override;
95
96 Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
97 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
98 ptrdiff_t ByteCount, void *Stream) override;
99 Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
100 void *HostDst, ptrdiff_t ByteCount,
101 void *Stream) override;
102 Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
103 ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
104 void *Stream) override;
105
106 Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
107 ptrdiff_t ByteCount, char ByteValue,
108 void *Stream) override;
109
110 Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
111
112 Status enqueueEvent(void *Event, void *Stream) override;
113 bool eventIsDone(void *Event) override;
114 Status eventSync(void *Event) override;
115 Expected<float> getSecondsBetweenEvents(void *StartEvent,
116 void *EndEvent) override;
117
118 Expected<void *> rawCreateKernel(void *Program,
119 const std::string &Name) override;
120 HandleDestructor getKernelHandleDestructor() override;
121
122 Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
123 KernelLaunchDimensions LaunchDimensions,
124 Span<void *> Arguments,
125 Span<size_t> ArgumentSizes,
126 size_t SharedMemoryBytes) override;
127
128 private:
OpenCLPlatform(std::vector<FullDeviceID> && FullDeviceIDs,std::vector<cl_context> && Contexts,std::vector<cl_command_queue> && CommandQueues)129 OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
130 std::vector<cl_context> &&Contexts,
131 std::vector<cl_command_queue> &&CommandQueues)
132 : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
133 CommandQueues(std::move(CommandQueues)) {}
134
135 std::vector<FullDeviceID> FullDeviceIDs;
136 std::vector<cl_context> Contexts;
137 std::vector<cl_command_queue> CommandQueues;
138 };
139
create()140 Expected<OpenCLPlatform> OpenCLPlatform::create() {
141 constexpr cl_uint MaxNumEntries = 100;
142 cl_platform_id Platforms[MaxNumEntries];
143 cl_uint NumPlatforms;
144 if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
145 return getOpenCLError(Result, "clGetPlatformIDs");
146
147 std::vector<FullDeviceID> FullDeviceIDs;
148 for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
149 ++PlatformIndex) {
150 cl_uint NumDevices;
151 cl_device_id Devices[MaxNumEntries];
152 if (cl_int Result =
153 clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
154 MaxNumEntries, Devices, &NumDevices))
155 return getOpenCLError(Result, "clGetDeviceIDs");
156 for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
157 FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
158 Devices[DeviceIndex]);
159 }
160
161 if (FullDeviceIDs.empty())
162 return Status("No OpenCL device available on this system.");
163
164 std::vector<cl_context> Contexts(FullDeviceIDs.size());
165 std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
166 for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
167 cl_int CreateContextResult;
168 Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
169 nullptr, nullptr, &CreateContextResult);
170 if (CreateContextResult)
171 return getOpenCLError(CreateContextResult, "clCreateContext");
172
173 cl_int CreateCommandQueueResult;
174 CommandQueues[I] = clCreateCommandQueue(
175 Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
176 &CreateCommandQueueResult);
177 if (CreateCommandQueueResult)
178 return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
179 }
180
181 return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
182 std::move(CommandQueues));
183 }
184
getDeviceCount()185 Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
186
openCLDestroyStream(void * H)187 static void openCLDestroyStream(void *H) {
188 logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
189 "clReleaseCommandQueue");
190 }
191
createStream(int DeviceIndex)192 Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
193 cl_int Result;
194 cl_command_queue Queue = clCreateCommandQueue(
195 Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
196 CL_QUEUE_PROFILING_ENABLE, &Result);
197 if (Result)
198 return getOpenCLError(Result, "clCreateCommandQueue");
199 return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
200 }
201
openCLEventDestroy(void * H)202 static void openCLEventDestroy(void *H) {
203 cl_event *CLEvent = static_cast<cl_event *>(H);
204 logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
205 delete CLEvent;
206 }
207
streamSync(void * Stream)208 Status OpenCLPlatform::streamSync(void *Stream) {
209 return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
210 "clFinish");
211 }
212
streamWaitOnEvent(void * Stream,void * Event)213 Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
214 cl_event *CLEvent = static_cast<cl_event *>(Event);
215 return getOpenCLError(
216 clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
217 CLEvent, nullptr),
218 "clEnqueueMarkerWithWaitList");
219 }
220
createEvent(int DeviceIndex)221 Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
222 cl_int Result;
223 cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
224 if (Result)
225 return getOpenCLError(Result, "clCreateUserEvent");
226 if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
227 return getOpenCLError(Result, "clSetUserEventStatus");
228 return constructEvent(this, DeviceIndex, new cl_event(Event),
229 openCLEventDestroy);
230 }
231
openCLDestroyProgram(void * H)232 static void openCLDestroyProgram(void *H) {
233 logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
234 "clReleaseProgram");
235 }
236
237 Expected<Program>
createProgramFromSource(Span<const char> Source,int DeviceIndex)238 OpenCLPlatform::createProgramFromSource(Span<const char> Source,
239 int DeviceIndex) {
240 cl_int Error;
241 const char *CSource = Source.data();
242 size_t SourceSize = Source.size();
243 cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
244 &CSource, &SourceSize, &Error);
245 if (Error)
246 return getOpenCLError(Error, "clCreateProgramWithSource");
247 cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
248 if (cl_int Error =
249 clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
250 return getOpenCLError(Error, "clBuildProgram");
251 return constructProgram(this, Program, openCLDestroyProgram);
252 }
253
rawMallocD(ptrdiff_t ByteCount,int DeviceIndex)254 Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
255 int DeviceIndex) {
256 cl_int Result;
257 cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
258 ByteCount, nullptr, &Result);
259 if (Result)
260 return getOpenCLError(Result, "clCreateBuffer");
261 return reinterpret_cast<void *>(Memory);
262 }
263
openCLDestroyDeviceMemory(void * H)264 static void openCLDestroyDeviceMemory(void *H) {
265 logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
266 "clReleaseMemObject");
267 }
268
getDeviceMemoryHandleDestructor()269 HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
270 return openCLDestroyDeviceMemory;
271 }
272
getDeviceMemorySpanHandle(void * BaseHandle,size_t ByteSize,size_t ByteOffset)273 void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
274 size_t ByteSize,
275 size_t ByteOffset) {
276 cl_int Error;
277 cl_buffer_region Region;
278 Region.origin = ByteOffset;
279 Region.size = ByteSize;
280 cl_mem SubBuffer =
281 clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
282 CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
283 logOpenCLWarning(Error, "clCreateSubBuffer");
284 if (Error)
285 return nullptr;
286 return SubBuffer;
287 }
288
rawDestroyDeviceMemorySpanHandle(void * Handle)289 void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
290 openCLDestroyDeviceMemory(Handle);
291 }
292
293 Expected<void *>
rawGetDeviceSymbolAddress(const void *,int)294 OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
295 int /*DeviceIndex*/) {
296 // This doesn't seem to have any equivalent in OpenCL.
297 return Status("not implemented");
298 }
299
300 Expected<ptrdiff_t>
rawGetDeviceSymbolSize(const void *,int)301 OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
302 int /*DeviceIndex*/) {
303 // This doesn't seem to have any equivalent in OpenCL.
304 return Status("not implemented");
305 }
306
noOpHandleDestructor(void *)307 static void noOpHandleDestructor(void *) {}
308
rawRegisterHostMem(const void *,ptrdiff_t)309 Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
310 ptrdiff_t /*ByteCount*/) {
311 // TODO(jhen): Do we want to do something to pin the memory here?
312 return Status();
313 }
314
getUnregisterHostMemoryHandleDestructor()315 HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
316 // TODO(jhen): Do we want to unpin the memory here?
317 return noOpHandleDestructor;
318 }
319
rawMallocRegisteredH(ptrdiff_t ByteCount)320 Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
321 // TODO(jhen): Do we want to do something to pin the memory here?
322 return std::malloc(ByteCount);
323 }
324
freeMemoryHandleDestructor(void * Memory)325 static void freeMemoryHandleDestructor(void *Memory) {
326 // TODO(jhen): Do we want to unpin the memory here?
327 std::free(Memory);
328 }
329
getFreeHostMemoryHandleDestructor()330 HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
331 return freeMemoryHandleDestructor;
332 }
333
asyncCopyDToD(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)334 Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
335 ptrdiff_t DeviceSrcByteOffset,
336 void *DeviceDst,
337 ptrdiff_t DeviceDstByteOffset,
338 ptrdiff_t ByteCount, void *Stream) {
339 return getOpenCLError(
340 clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
341 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
342 static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
343 DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
344 "clEnqueueCopyBuffer");
345 }
346
asyncCopyDToH(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * HostDst,ptrdiff_t ByteCount,void * Stream)347 Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
348 ptrdiff_t DeviceSrcByteOffset,
349 void *HostDst, ptrdiff_t ByteCount,
350 void *Stream) {
351 return getOpenCLError(
352 clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
353 static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
354 CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
355 nullptr, nullptr),
356 "clEnqueueReadBuffer");
357 }
358
asyncCopyHToD(const void * HostSrc,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)359 Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
360 ptrdiff_t DeviceDstByteOffset,
361 ptrdiff_t ByteCount, void *Stream) {
362 return getOpenCLError(
363 clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
364 static_cast<cl_mem>(DeviceDst), CL_TRUE,
365 DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
366 nullptr),
367 "clEnqueueWriteBuffer");
368 }
369
asyncMemsetD(void * DeviceDst,ptrdiff_t ByteOffset,ptrdiff_t ByteCount,char ByteValue,void * Stream)370 Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
371 ptrdiff_t ByteCount, char ByteValue,
372 void *Stream) {
373 return getOpenCLError(
374 clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
375 static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
376 ByteOffset, ByteCount, 0, nullptr, nullptr),
377 "clEnqueueFillBuffer");
378 }
379
380 struct StreamCallbackUserData {
StreamCallbackUserDataacxxel::__anonacf1525c0111::StreamCallbackUserData381 StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
382 cl_event EndEvent)
383 : TheStream(TheStream), TheFunction(std::move(Function)),
384 EndEvent(EndEvent) {}
385
386 Stream &TheStream;
387 StreamCallback TheFunction;
388 cl_event EndEvent;
389 };
390
391 // A function with the right signature to pass to clSetEventCallback.
openCLStreamCallbackShim(cl_event,cl_int EventCommandExecStatus,void * UserData)392 void CL_CALLBACK openCLStreamCallbackShim(cl_event,
393 cl_int EventCommandExecStatus,
394 void *UserData) {
395 std::unique_ptr<StreamCallbackUserData> Data(
396 static_cast<StreamCallbackUserData *>(UserData));
397 Data->TheFunction(
398 Data->TheStream,
399 getOpenCLError(EventCommandExecStatus, "stream callback error state"));
400 if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
401 logOpenCLWarning(Result, "clSetUserEventStatus");
402 if (cl_int Result = clReleaseEvent(Data->EndEvent))
403 logOpenCLWarning(Result, "clReleaseEvent");
404 }
405
addStreamCallback(Stream & TheStream,StreamCallback Callback)406 Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
407 StreamCallback Callback) {
408 cl_int Result;
409 cl_event StartEvent =
410 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
411 if (Result)
412 return getOpenCLError(Result, "clCreateUserEvent");
413 cl_event EndEvent =
414 clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
415 if (Result)
416 return getOpenCLError(Result, "clCreateUserEvent");
417 cl_event StartBarrierEvent;
418 if (cl_int Result = clEnqueueBarrierWithWaitList(
419 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
420 &StartEvent, &StartBarrierEvent))
421 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
422
423 if (cl_int Result = clEnqueueBarrierWithWaitList(
424 static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
425 &EndEvent, nullptr))
426 return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
427
428 std::unique_ptr<StreamCallbackUserData> UserData(
429 new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
430 if (cl_int Result =
431 clSetEventCallback(StartBarrierEvent, CL_RUNNING,
432 openCLStreamCallbackShim, UserData.release()))
433 return getOpenCLError(Result, "clSetEventCallback");
434
435 if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
436 return getOpenCLError(Result, "clSetUserEventStatus");
437
438 if (cl_int Result = clReleaseEvent(StartBarrierEvent))
439 return getOpenCLError(Result, "clReleaseEvent");
440
441 return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
442 }
443
enqueueEvent(void * Event,void * Stream)444 Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
445 cl_event *CLEvent = static_cast<cl_event *>(Event);
446 cl_event OldEvent = *CLEvent;
447 cl_event NewEvent;
448 if (cl_int Result = clEnqueueMarkerWithWaitList(
449 static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
450 return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
451 *CLEvent = NewEvent;
452 return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
453 }
454
eventIsDone(void * Event)455 bool OpenCLPlatform::eventIsDone(void *Event) {
456 cl_event *CLEvent = static_cast<cl_event *>(Event);
457 cl_int EventStatus;
458 logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
459 sizeof(EventStatus), &EventStatus, nullptr),
460 "clGetEventInfo");
461 return EventStatus == CL_COMPLETE || EventStatus < 0;
462 }
463
eventSync(void * Event)464 Status OpenCLPlatform::eventSync(void *Event) {
465 cl_event *CLEvent = static_cast<cl_event *>(Event);
466 return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
467 }
468
getSecondsBetweenEvents(void * StartEvent,void * EndEvent)469 Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
470 void *EndEvent) {
471 cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
472 cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
473
474 cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
475 cl_ulong StartNanoseconds;
476 cl_ulong EndNanoseconds;
477 if (cl_int Result =
478 clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
479 &StartNanoseconds, nullptr))
480 return getOpenCLError(Result, "clGetEventProfilingInfo");
481 if (cl_int Result = clGetEventProfilingInfo(
482 *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
483 return getOpenCLError(Result, "clGetEventProfilingInfo");
484 return (EndNanoseconds - StartNanoseconds) * 1e-12;
485 }
486
rawCreateKernel(void * Program,const std::string & Name)487 Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
488 const std::string &Name) {
489
490 cl_int Error;
491 cl_kernel Kernel =
492 clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
493 if (Error)
494 return getOpenCLError(Error, "clCreateKernel");
495 return Kernel;
496 }
497
openCLDestroyKernel(void * H)498 static void openCLDestroyKernel(void *H) {
499 logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
500 "clReleaseKernel");
501 }
502
getKernelHandleDestructor()503 HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
504 return openCLDestroyKernel;
505 }
506
rawEnqueueKernelLaunch(void * Stream,void * Kernel,KernelLaunchDimensions LaunchDimensions,Span<void * > Arguments,Span<size_t> ArgumentSizes,size_t SharedMemoryBytes)507 Status OpenCLPlatform::rawEnqueueKernelLaunch(
508 void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
509 Span<void *> Arguments, Span<size_t> ArgumentSizes,
510 size_t SharedMemoryBytes) {
511 if (SharedMemoryBytes != 0)
512 return Status("OpenCL kernel launches only accept zero for the shared "
513 "memory byte size");
514 cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
515 for (int I = 0; I < Arguments.size(); ++I)
516 if (cl_int Error =
517 clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
518 return getOpenCLError(Error, "clSetKernelArg");
519 size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
520 LaunchDimensions.BlockZ};
521 size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
522 LaunchDimensions.BlockY * LaunchDimensions.GridY,
523 LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
524 return getOpenCLError(
525 clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
526 3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
527 nullptr, nullptr),
528 "clEnqueueNDRangeKernel");
529 }
530
531 } // namespace
532
533 namespace opencl {
534
535 /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
536 /// Platform.
getPlatform()537 Expected<Platform *> getPlatform() {
538 static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
539 Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
540 if (CreationResult.isError())
541 return CreationResult.getError();
542 else
543 return new OpenCLPlatform(CreationResult.takeValue());
544 }();
545 return MaybePlatform;
546 }
547
548 } // namespace opencl
549
550 } // namespace acxxel
551