1 //===--- opencl_acxxel.cpp - OpenCL implementation of the Acxxel API ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// This file defines the standard OpenCL implementation of the Acxxel API.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "acxxel.h"
14 
15 #include "CL/cl.h"
16 
17 #include <mutex>
18 #include <sstream>
19 #include <utility>
20 #include <vector>
21 
22 namespace acxxel {
23 
24 namespace {
25 
26 /// An ID containing the platform ID and the device ID within the platform.
27 struct FullDeviceID {
28   cl_platform_id PlatformID;
29   cl_device_id DeviceID;
30 
FullDeviceIDacxxel::__anonacf1525c0111::FullDeviceID31   FullDeviceID(cl_platform_id PlatformID, cl_device_id DeviceID)
32       : PlatformID(PlatformID), DeviceID(DeviceID) {}
33 };
34 
getOpenCLErrorMessage(cl_int Result)35 static std::string getOpenCLErrorMessage(cl_int Result) {
36   if (!Result)
37     return "success";
38   std::ostringstream OutStream;
39   OutStream << "OpenCL error: code = " << Result;
40   return OutStream.str();
41 }
42 
getOpenCLError(cl_int Result,const std::string & Message)43 static Status getOpenCLError(cl_int Result, const std::string &Message) {
44   if (!Result)
45     return Status();
46   std::ostringstream OutStream;
47   OutStream << getOpenCLErrorMessage(Result) << ", message = " << Message;
48   return Status(OutStream.str());
49 }
50 
logOpenCLWarning(cl_int Result,const std::string & Message)51 static void logOpenCLWarning(cl_int Result, const std::string &Message) {
52   if (Result) {
53     std::ostringstream OutStream;
54     OutStream << Message << ": " << getOpenCLErrorMessage(Result);
55     logWarning(OutStream.str());
56   }
57 }
58 
59 class OpenCLPlatform : public Platform {
60 public:
61   ~OpenCLPlatform() override = default;
62 
63   static Expected<OpenCLPlatform> create();
64 
65   Expected<int> getDeviceCount() override;
66 
67   Expected<Stream> createStream(int DeviceIndex) override;
68 
69   Expected<Event> createEvent(int DeviceIndex) override;
70 
71   Expected<Program> createProgramFromSource(Span<const char> Source,
72                                             int DeviceIndex) override;
73 
74 protected:
75   Status streamSync(void *Stream) override;
76 
77   Status streamWaitOnEvent(void *Stream, void *Event) override;
78 
79   Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
80   HandleDestructor getDeviceMemoryHandleDestructor() override;
81   void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
82                                   size_t ByteOffset) override;
83   void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
84 
85   Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
86                                              int DeviceIndex) override;
87   Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
88                                              int DeviceIndex) override;
89 
90   Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
91   HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
92 
93   Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
94   HandleDestructor getFreeHostMemoryHandleDestructor() override;
95 
96   Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
97                        void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
98                        ptrdiff_t ByteCount, void *Stream) override;
99   Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
100                        void *HostDst, ptrdiff_t ByteCount,
101                        void *Stream) override;
102   Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
103                        ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
104                        void *Stream) override;
105 
106   Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
107                       ptrdiff_t ByteCount, char ByteValue,
108                       void *Stream) override;
109 
110   Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
111 
112   Status enqueueEvent(void *Event, void *Stream) override;
113   bool eventIsDone(void *Event) override;
114   Status eventSync(void *Event) override;
115   Expected<float> getSecondsBetweenEvents(void *StartEvent,
116                                           void *EndEvent) override;
117 
118   Expected<void *> rawCreateKernel(void *Program,
119                                    const std::string &Name) override;
120   HandleDestructor getKernelHandleDestructor() override;
121 
122   Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
123                                 KernelLaunchDimensions LaunchDimensions,
124                                 Span<void *> Arguments,
125                                 Span<size_t> ArgumentSizes,
126                                 size_t SharedMemoryBytes) override;
127 
128 private:
OpenCLPlatform(std::vector<FullDeviceID> && FullDeviceIDs,std::vector<cl_context> && Contexts,std::vector<cl_command_queue> && CommandQueues)129   OpenCLPlatform(std::vector<FullDeviceID> &&FullDeviceIDs,
130                  std::vector<cl_context> &&Contexts,
131                  std::vector<cl_command_queue> &&CommandQueues)
132       : FullDeviceIDs(std::move(FullDeviceIDs)), Contexts(std::move(Contexts)),
133         CommandQueues(std::move(CommandQueues)) {}
134 
135   std::vector<FullDeviceID> FullDeviceIDs;
136   std::vector<cl_context> Contexts;
137   std::vector<cl_command_queue> CommandQueues;
138 };
139 
create()140 Expected<OpenCLPlatform> OpenCLPlatform::create() {
141   constexpr cl_uint MaxNumEntries = 100;
142   cl_platform_id Platforms[MaxNumEntries];
143   cl_uint NumPlatforms;
144   if (cl_int Result = clGetPlatformIDs(MaxNumEntries, Platforms, &NumPlatforms))
145     return getOpenCLError(Result, "clGetPlatformIDs");
146 
147   std::vector<FullDeviceID> FullDeviceIDs;
148   for (cl_uint PlatformIndex = 0; PlatformIndex < NumPlatforms;
149        ++PlatformIndex) {
150     cl_uint NumDevices;
151     cl_device_id Devices[MaxNumEntries];
152     if (cl_int Result =
153             clGetDeviceIDs(Platforms[PlatformIndex], CL_DEVICE_TYPE_ALL,
154                            MaxNumEntries, Devices, &NumDevices))
155       return getOpenCLError(Result, "clGetDeviceIDs");
156     for (cl_uint DeviceIndex = 0; DeviceIndex < NumDevices; ++DeviceIndex)
157       FullDeviceIDs.emplace_back(Platforms[PlatformIndex],
158                                  Devices[DeviceIndex]);
159   }
160 
161   if (FullDeviceIDs.empty())
162     return Status("No OpenCL device available on this system.");
163 
164   std::vector<cl_context> Contexts(FullDeviceIDs.size());
165   std::vector<cl_command_queue> CommandQueues(FullDeviceIDs.size());
166   for (size_t I = 0; I < FullDeviceIDs.size(); ++I) {
167     cl_int CreateContextResult;
168     Contexts[I] = clCreateContext(nullptr, 1, &FullDeviceIDs[I].DeviceID,
169                                   nullptr, nullptr, &CreateContextResult);
170     if (CreateContextResult)
171       return getOpenCLError(CreateContextResult, "clCreateContext");
172 
173     cl_int CreateCommandQueueResult;
174     CommandQueues[I] = clCreateCommandQueue(
175         Contexts[I], FullDeviceIDs[I].DeviceID, CL_QUEUE_PROFILING_ENABLE,
176         &CreateCommandQueueResult);
177     if (CreateCommandQueueResult)
178       return getOpenCLError(CreateCommandQueueResult, "clCreateCommandQueue");
179   }
180 
181   return OpenCLPlatform(std::move(FullDeviceIDs), std::move(Contexts),
182                         std::move(CommandQueues));
183 }
184 
getDeviceCount()185 Expected<int> OpenCLPlatform::getDeviceCount() { return FullDeviceIDs.size(); }
186 
openCLDestroyStream(void * H)187 static void openCLDestroyStream(void *H) {
188   logOpenCLWarning(clReleaseCommandQueue(static_cast<cl_command_queue>(H)),
189                    "clReleaseCommandQueue");
190 }
191 
createStream(int DeviceIndex)192 Expected<Stream> OpenCLPlatform::createStream(int DeviceIndex) {
193   cl_int Result;
194   cl_command_queue Queue = clCreateCommandQueue(
195       Contexts[DeviceIndex], FullDeviceIDs[DeviceIndex].DeviceID,
196       CL_QUEUE_PROFILING_ENABLE, &Result);
197   if (Result)
198     return getOpenCLError(Result, "clCreateCommandQueue");
199   return constructStream(this, DeviceIndex, Queue, openCLDestroyStream);
200 }
201 
openCLEventDestroy(void * H)202 static void openCLEventDestroy(void *H) {
203   cl_event *CLEvent = static_cast<cl_event *>(H);
204   logOpenCLWarning(clReleaseEvent(*CLEvent), "clReleaseEvent");
205   delete CLEvent;
206 }
207 
streamSync(void * Stream)208 Status OpenCLPlatform::streamSync(void *Stream) {
209   return getOpenCLError(clFinish(static_cast<cl_command_queue>(Stream)),
210                         "clFinish");
211 }
212 
streamWaitOnEvent(void * Stream,void * Event)213 Status OpenCLPlatform::streamWaitOnEvent(void *Stream, void *Event) {
214   cl_event *CLEvent = static_cast<cl_event *>(Event);
215   return getOpenCLError(
216       clEnqueueBarrierWithWaitList(static_cast<cl_command_queue>(Stream), 1,
217                                    CLEvent, nullptr),
218       "clEnqueueMarkerWithWaitList");
219 }
220 
createEvent(int DeviceIndex)221 Expected<Event> OpenCLPlatform::createEvent(int DeviceIndex) {
222   cl_int Result;
223   cl_event Event = clCreateUserEvent(Contexts[DeviceIndex], &Result);
224   if (Result)
225     return getOpenCLError(Result, "clCreateUserEvent");
226   if (cl_int Result = clSetUserEventStatus(Event, CL_COMPLETE))
227     return getOpenCLError(Result, "clSetUserEventStatus");
228   return constructEvent(this, DeviceIndex, new cl_event(Event),
229                         openCLEventDestroy);
230 }
231 
openCLDestroyProgram(void * H)232 static void openCLDestroyProgram(void *H) {
233   logOpenCLWarning(clReleaseProgram(static_cast<cl_program>(H)),
234                    "clReleaseProgram");
235 }
236 
237 Expected<Program>
createProgramFromSource(Span<const char> Source,int DeviceIndex)238 OpenCLPlatform::createProgramFromSource(Span<const char> Source,
239                                         int DeviceIndex) {
240   cl_int Error;
241   const char *CSource = Source.data();
242   size_t SourceSize = Source.size();
243   cl_program Program = clCreateProgramWithSource(Contexts[DeviceIndex], 1,
244                                                  &CSource, &SourceSize, &Error);
245   if (Error)
246     return getOpenCLError(Error, "clCreateProgramWithSource");
247   cl_device_id DeviceID = FullDeviceIDs[DeviceIndex].DeviceID;
248   if (cl_int Error =
249           clBuildProgram(Program, 1, &DeviceID, nullptr, nullptr, nullptr))
250     return getOpenCLError(Error, "clBuildProgram");
251   return constructProgram(this, Program, openCLDestroyProgram);
252 }
253 
rawMallocD(ptrdiff_t ByteCount,int DeviceIndex)254 Expected<void *> OpenCLPlatform::rawMallocD(ptrdiff_t ByteCount,
255                                             int DeviceIndex) {
256   cl_int Result;
257   cl_mem Memory = clCreateBuffer(Contexts[DeviceIndex], CL_MEM_READ_WRITE,
258                                  ByteCount, nullptr, &Result);
259   if (Result)
260     return getOpenCLError(Result, "clCreateBuffer");
261   return reinterpret_cast<void *>(Memory);
262 }
263 
openCLDestroyDeviceMemory(void * H)264 static void openCLDestroyDeviceMemory(void *H) {
265   logOpenCLWarning(clReleaseMemObject(static_cast<cl_mem>(H)),
266                    "clReleaseMemObject");
267 }
268 
getDeviceMemoryHandleDestructor()269 HandleDestructor OpenCLPlatform::getDeviceMemoryHandleDestructor() {
270   return openCLDestroyDeviceMemory;
271 }
272 
getDeviceMemorySpanHandle(void * BaseHandle,size_t ByteSize,size_t ByteOffset)273 void *OpenCLPlatform::getDeviceMemorySpanHandle(void *BaseHandle,
274                                                 size_t ByteSize,
275                                                 size_t ByteOffset) {
276   cl_int Error;
277   cl_buffer_region Region;
278   Region.origin = ByteOffset;
279   Region.size = ByteSize;
280   cl_mem SubBuffer =
281       clCreateSubBuffer(static_cast<cl_mem>(BaseHandle), 0,
282                         CL_BUFFER_CREATE_TYPE_REGION, &Region, &Error);
283   logOpenCLWarning(Error, "clCreateSubBuffer");
284   if (Error)
285     return nullptr;
286   return SubBuffer;
287 }
288 
rawDestroyDeviceMemorySpanHandle(void * Handle)289 void OpenCLPlatform::rawDestroyDeviceMemorySpanHandle(void *Handle) {
290   openCLDestroyDeviceMemory(Handle);
291 }
292 
293 Expected<void *>
rawGetDeviceSymbolAddress(const void *,int)294 OpenCLPlatform::rawGetDeviceSymbolAddress(const void * /*Symbol*/,
295                                           int /*DeviceIndex*/) {
296   // This doesn't seem to have any equivalent in OpenCL.
297   return Status("not implemented");
298 }
299 
300 Expected<ptrdiff_t>
rawGetDeviceSymbolSize(const void *,int)301 OpenCLPlatform::rawGetDeviceSymbolSize(const void * /*Symbol*/,
302                                        int /*DeviceIndex*/) {
303   // This doesn't seem to have any equivalent in OpenCL.
304   return Status("not implemented");
305 }
306 
noOpHandleDestructor(void *)307 static void noOpHandleDestructor(void *) {}
308 
rawRegisterHostMem(const void *,ptrdiff_t)309 Status OpenCLPlatform::rawRegisterHostMem(const void * /*Memory*/,
310                                           ptrdiff_t /*ByteCount*/) {
311   // TODO(jhen): Do we want to do something to pin the memory here?
312   return Status();
313 }
314 
getUnregisterHostMemoryHandleDestructor()315 HandleDestructor OpenCLPlatform::getUnregisterHostMemoryHandleDestructor() {
316   // TODO(jhen): Do we want to unpin the memory here?
317   return noOpHandleDestructor;
318 }
319 
rawMallocRegisteredH(ptrdiff_t ByteCount)320 Expected<void *> OpenCLPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
321   // TODO(jhen): Do we want to do something to pin the memory here?
322   return std::malloc(ByteCount);
323 }
324 
freeMemoryHandleDestructor(void * Memory)325 static void freeMemoryHandleDestructor(void *Memory) {
326   // TODO(jhen): Do we want to unpin the memory here?
327   std::free(Memory);
328 }
329 
getFreeHostMemoryHandleDestructor()330 HandleDestructor OpenCLPlatform::getFreeHostMemoryHandleDestructor() {
331   return freeMemoryHandleDestructor;
332 }
333 
asyncCopyDToD(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)334 Status OpenCLPlatform::asyncCopyDToD(const void *DeviceSrc,
335                                      ptrdiff_t DeviceSrcByteOffset,
336                                      void *DeviceDst,
337                                      ptrdiff_t DeviceDstByteOffset,
338                                      ptrdiff_t ByteCount, void *Stream) {
339   return getOpenCLError(
340       clEnqueueCopyBuffer(static_cast<cl_command_queue>(Stream),
341                           static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
342                           static_cast<cl_mem>(DeviceDst), DeviceSrcByteOffset,
343                           DeviceDstByteOffset, ByteCount, 0, nullptr, nullptr),
344       "clEnqueueCopyBuffer");
345 }
346 
asyncCopyDToH(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * HostDst,ptrdiff_t ByteCount,void * Stream)347 Status OpenCLPlatform::asyncCopyDToH(const void *DeviceSrc,
348                                      ptrdiff_t DeviceSrcByteOffset,
349                                      void *HostDst, ptrdiff_t ByteCount,
350                                      void *Stream) {
351   return getOpenCLError(
352       clEnqueueReadBuffer(static_cast<cl_command_queue>(Stream),
353                           static_cast<cl_mem>(const_cast<void *>(DeviceSrc)),
354                           CL_TRUE, DeviceSrcByteOffset, ByteCount, HostDst, 0,
355                           nullptr, nullptr),
356       "clEnqueueReadBuffer");
357 }
358 
asyncCopyHToD(const void * HostSrc,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)359 Status OpenCLPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
360                                      ptrdiff_t DeviceDstByteOffset,
361                                      ptrdiff_t ByteCount, void *Stream) {
362   return getOpenCLError(
363       clEnqueueWriteBuffer(static_cast<cl_command_queue>(Stream),
364                            static_cast<cl_mem>(DeviceDst), CL_TRUE,
365                            DeviceDstByteOffset, ByteCount, HostSrc, 0, nullptr,
366                            nullptr),
367       "clEnqueueWriteBuffer");
368 }
369 
asyncMemsetD(void * DeviceDst,ptrdiff_t ByteOffset,ptrdiff_t ByteCount,char ByteValue,void * Stream)370 Status OpenCLPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
371                                     ptrdiff_t ByteCount, char ByteValue,
372                                     void *Stream) {
373   return getOpenCLError(
374       clEnqueueFillBuffer(static_cast<cl_command_queue>(Stream),
375                           static_cast<cl_mem>(DeviceDst), &ByteValue, 1,
376                           ByteOffset, ByteCount, 0, nullptr, nullptr),
377       "clEnqueueFillBuffer");
378 }
379 
380 struct StreamCallbackUserData {
StreamCallbackUserDataacxxel::__anonacf1525c0111::StreamCallbackUserData381   StreamCallbackUserData(Stream &TheStream, StreamCallback Function,
382                          cl_event EndEvent)
383       : TheStream(TheStream), TheFunction(std::move(Function)),
384         EndEvent(EndEvent) {}
385 
386   Stream &TheStream;
387   StreamCallback TheFunction;
388   cl_event EndEvent;
389 };
390 
391 // A function with the right signature to pass to clSetEventCallback.
openCLStreamCallbackShim(cl_event,cl_int EventCommandExecStatus,void * UserData)392 void CL_CALLBACK openCLStreamCallbackShim(cl_event,
393                                           cl_int EventCommandExecStatus,
394                                           void *UserData) {
395   std::unique_ptr<StreamCallbackUserData> Data(
396       static_cast<StreamCallbackUserData *>(UserData));
397   Data->TheFunction(
398       Data->TheStream,
399       getOpenCLError(EventCommandExecStatus, "stream callback error state"));
400   if (cl_int Result = clSetUserEventStatus(Data->EndEvent, CL_COMPLETE))
401     logOpenCLWarning(Result, "clSetUserEventStatus");
402   if (cl_int Result = clReleaseEvent(Data->EndEvent))
403     logOpenCLWarning(Result, "clReleaseEvent");
404 }
405 
addStreamCallback(Stream & TheStream,StreamCallback Callback)406 Status OpenCLPlatform::addStreamCallback(Stream &TheStream,
407                                          StreamCallback Callback) {
408   cl_int Result;
409   cl_event StartEvent =
410       clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
411   if (Result)
412     return getOpenCLError(Result, "clCreateUserEvent");
413   cl_event EndEvent =
414       clCreateUserEvent(Contexts[TheStream.getDeviceIndex()], &Result);
415   if (Result)
416     return getOpenCLError(Result, "clCreateUserEvent");
417   cl_event StartBarrierEvent;
418   if (cl_int Result = clEnqueueBarrierWithWaitList(
419           static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
420           &StartEvent, &StartBarrierEvent))
421     return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
422 
423   if (cl_int Result = clEnqueueBarrierWithWaitList(
424           static_cast<cl_command_queue>(getStreamHandle(TheStream)), 1,
425           &EndEvent, nullptr))
426     return getOpenCLError(Result, "clEnqueueBarrierWithWaitList");
427 
428   std::unique_ptr<StreamCallbackUserData> UserData(
429       new StreamCallbackUserData(TheStream, std::move(Callback), EndEvent));
430   if (cl_int Result =
431           clSetEventCallback(StartBarrierEvent, CL_RUNNING,
432                              openCLStreamCallbackShim, UserData.release()))
433     return getOpenCLError(Result, "clSetEventCallback");
434 
435   if (cl_int Result = clSetUserEventStatus(StartEvent, CL_COMPLETE))
436     return getOpenCLError(Result, "clSetUserEventStatus");
437 
438   if (cl_int Result = clReleaseEvent(StartBarrierEvent))
439     return getOpenCLError(Result, "clReleaseEvent");
440 
441   return getOpenCLError(clReleaseEvent(StartEvent), "clReleaseEvent");
442 }
443 
enqueueEvent(void * Event,void * Stream)444 Status OpenCLPlatform::enqueueEvent(void *Event, void *Stream) {
445   cl_event *CLEvent = static_cast<cl_event *>(Event);
446   cl_event OldEvent = *CLEvent;
447   cl_event NewEvent;
448   if (cl_int Result = clEnqueueMarkerWithWaitList(
449           static_cast<cl_command_queue>(Stream), 0, nullptr, &NewEvent))
450     return getOpenCLError(Result, "clEnqueueMarkerWithWaitList");
451   *CLEvent = NewEvent;
452   return getOpenCLError(clReleaseEvent(OldEvent), "clReleaseEvent");
453 }
454 
eventIsDone(void * Event)455 bool OpenCLPlatform::eventIsDone(void *Event) {
456   cl_event *CLEvent = static_cast<cl_event *>(Event);
457   cl_int EventStatus;
458   logOpenCLWarning(clGetEventInfo(*CLEvent, CL_EVENT_COMMAND_EXECUTION_STATUS,
459                                   sizeof(EventStatus), &EventStatus, nullptr),
460                    "clGetEventInfo");
461   return EventStatus == CL_COMPLETE || EventStatus < 0;
462 }
463 
eventSync(void * Event)464 Status OpenCLPlatform::eventSync(void *Event) {
465   cl_event *CLEvent = static_cast<cl_event *>(Event);
466   return getOpenCLError(clWaitForEvents(1, CLEvent), "clWaitForEvents");
467 }
468 
getSecondsBetweenEvents(void * StartEvent,void * EndEvent)469 Expected<float> OpenCLPlatform::getSecondsBetweenEvents(void *StartEvent,
470                                                         void *EndEvent) {
471   cl_event *CLStartEvent = static_cast<cl_event *>(StartEvent);
472   cl_event *CLEndEvent = static_cast<cl_event *>(EndEvent);
473 
474   cl_profiling_info ParamName = CL_PROFILING_COMMAND_END;
475   cl_ulong StartNanoseconds;
476   cl_ulong EndNanoseconds;
477   if (cl_int Result =
478           clGetEventProfilingInfo(*CLStartEvent, ParamName, sizeof(cl_ulong),
479                                   &StartNanoseconds, nullptr))
480     return getOpenCLError(Result, "clGetEventProfilingInfo");
481   if (cl_int Result = clGetEventProfilingInfo(
482           *CLEndEvent, ParamName, sizeof(cl_ulong), &EndNanoseconds, nullptr))
483     return getOpenCLError(Result, "clGetEventProfilingInfo");
484   return (EndNanoseconds - StartNanoseconds) * 1e-12;
485 }
486 
rawCreateKernel(void * Program,const std::string & Name)487 Expected<void *> OpenCLPlatform::rawCreateKernel(void *Program,
488                                                  const std::string &Name) {
489 
490   cl_int Error;
491   cl_kernel Kernel =
492       clCreateKernel(static_cast<cl_program>(Program), Name.c_str(), &Error);
493   if (Error)
494     return getOpenCLError(Error, "clCreateKernel");
495   return Kernel;
496 }
497 
openCLDestroyKernel(void * H)498 static void openCLDestroyKernel(void *H) {
499   logOpenCLWarning(clReleaseKernel(static_cast<cl_kernel>(H)),
500                    "clReleaseKernel");
501 }
502 
getKernelHandleDestructor()503 HandleDestructor OpenCLPlatform::getKernelHandleDestructor() {
504   return openCLDestroyKernel;
505 }
506 
rawEnqueueKernelLaunch(void * Stream,void * Kernel,KernelLaunchDimensions LaunchDimensions,Span<void * > Arguments,Span<size_t> ArgumentSizes,size_t SharedMemoryBytes)507 Status OpenCLPlatform::rawEnqueueKernelLaunch(
508     void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
509     Span<void *> Arguments, Span<size_t> ArgumentSizes,
510     size_t SharedMemoryBytes) {
511   if (SharedMemoryBytes != 0)
512     return Status("OpenCL kernel launches only accept zero for the shared "
513                   "memory byte size");
514   cl_kernel TheKernel = static_cast<cl_kernel>(Kernel);
515   for (int I = 0; I < Arguments.size(); ++I)
516     if (cl_int Error =
517             clSetKernelArg(TheKernel, I, ArgumentSizes[I], Arguments[I]))
518       return getOpenCLError(Error, "clSetKernelArg");
519   size_t LocalWorkSize[] = {LaunchDimensions.BlockX, LaunchDimensions.BlockY,
520                             LaunchDimensions.BlockZ};
521   size_t GlobalWorkSize[] = {LaunchDimensions.BlockX * LaunchDimensions.GridX,
522                              LaunchDimensions.BlockY * LaunchDimensions.GridY,
523                              LaunchDimensions.BlockZ * LaunchDimensions.GridZ};
524   return getOpenCLError(
525       clEnqueueNDRangeKernel(static_cast<cl_command_queue>(Stream), TheKernel,
526                              3, nullptr, GlobalWorkSize, LocalWorkSize, 0,
527                              nullptr, nullptr),
528       "clEnqueueNDRangeKernel");
529 }
530 
531 } // namespace
532 
533 namespace opencl {
534 
535 /// Gets an OpenCLPlatform instance and returns it as an unowned pointer to a
536 /// Platform.
getPlatform()537 Expected<Platform *> getPlatform() {
538   static auto MaybePlatform = []() -> Expected<OpenCLPlatform *> {
539     Expected<OpenCLPlatform> CreationResult = OpenCLPlatform::create();
540     if (CreationResult.isError())
541       return CreationResult.getError();
542     else
543       return new OpenCLPlatform(CreationResult.takeValue());
544   }();
545   return MaybePlatform;
546 }
547 
548 } // namespace opencl
549 
550 } // namespace acxxel
551