1 //===--- cuda_acxxel.cpp - CUDA implementation of the Acxxel API ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// This file defines the standard CUDA implementation of the Acxxel API.
10 ///
11 //===----------------------------------------------------------------------===//
12 
13 #include "acxxel.h"
14 
15 #include "cuda.h"
16 #include "cuda_runtime.h"
17 
18 #include <array>
19 #include <cassert>
20 #include <sstream>
21 #include <vector>
22 
23 namespace acxxel {
24 
25 namespace {
26 
getCUErrorMessage(CUresult Result)27 static std::string getCUErrorMessage(CUresult Result) {
28   if (!Result)
29     return "success";
30   const char *ErrorName = "UNKNOWN_ERROR_NAME";
31   const char *ErrorDescription = "UNKNOWN_ERROR_DESCRIPTION";
32   cuGetErrorName(Result, &ErrorName);
33   cuGetErrorString(Result, &ErrorDescription);
34   std::ostringstream OutStream;
35   OutStream << "CUDA driver error: code = " << Result
36             << ", name = " << ErrorName
37             << ", description = " << ErrorDescription;
38   return OutStream.str();
39 }
40 
getCUError(CUresult Result,const std::string & Message)41 static Status getCUError(CUresult Result, const std::string &Message) {
42   if (!Result)
43     return Status();
44   std::ostringstream OutStream;
45   OutStream << getCUErrorMessage(Result) << ", message = " << Message;
46   return Status(OutStream.str());
47 }
48 
getCUDAErrorMessage(cudaError_t E)49 static std::string getCUDAErrorMessage(cudaError_t E) {
50   if (!E)
51     return "success";
52   std::ostringstream OutStream;
53   OutStream << "CUDA runtime error: code = " << E
54             << ", name = " << cudaGetErrorName(E)
55             << ", description = " << cudaGetErrorString(E);
56   return OutStream.str();
57 }
58 
getCUDAError(cudaError_t E,const std::string & Message)59 static Status getCUDAError(cudaError_t E, const std::string &Message) {
60   if (!E)
61     return Status();
62   std::ostringstream OutStream;
63   OutStream << getCUDAErrorMessage(E) << ", message = " << Message;
64   return Status(OutStream.str());
65 }
66 
logCUWarning(CUresult Result,const std::string & Message)67 static void logCUWarning(CUresult Result, const std::string &Message) {
68   if (Result) {
69     std::ostringstream OutStream;
70     OutStream << Message << ": " << getCUErrorMessage(Result);
71     logWarning(OutStream.str());
72   }
73 }
74 
75 /// A CUDA Platform implementation.
76 class CUDAPlatform : public Platform {
77 public:
78   ~CUDAPlatform() override = default;
79 
80   static Expected<CUDAPlatform> create();
81 
82   Expected<int> getDeviceCount() override;
83 
84   Expected<Stream> createStream(int DeviceIndex) override;
85 
86   Status streamSync(void *Stream) override;
87 
88   Status streamWaitOnEvent(void *Stream, void *Event) override;
89 
90   Expected<Event> createEvent(int DeviceIndex) override;
91 
92 protected:
93   Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
94   HandleDestructor getDeviceMemoryHandleDestructor() override;
95   void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
96                                   size_t ByteOffset) override;
97   virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
98 
99   Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
100                                              int DeviceIndex) override;
101   Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
102                                              int DeviceIndex) override;
103 
104   Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
105   HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
106 
107   Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
108   HandleDestructor getFreeHostMemoryHandleDestructor() override;
109 
110   Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
111                        void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
112                        ptrdiff_t ByteCount, void *Stream) override;
113   Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
114                        void *HostDst, ptrdiff_t ByteCount,
115                        void *Stream) override;
116   Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
117                        ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
118                        void *Stream) override;
119 
120   Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
121                       ptrdiff_t ByteCount, char ByteValue,
122                       void *Stream) override;
123 
124   Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
125 
126   Expected<Program> createProgramFromSource(Span<const char> Source,
127                                             int DeviceIndex) override;
128 
129   Status enqueueEvent(void *Event, void *Stream) override;
130   bool eventIsDone(void *Event) override;
131   Status eventSync(void *Event) override;
132   Expected<float> getSecondsBetweenEvents(void *StartEvent,
133                                           void *EndEvent) override;
134 
135   Expected<void *> rawCreateKernel(void *Program,
136                                    const std::string &Name) override;
137   HandleDestructor getKernelHandleDestructor() override;
138 
139   Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
140                                 KernelLaunchDimensions LaunchDimensions,
141                                 Span<void *> Arguments,
142                                 Span<size_t> ArgumentSizes,
143                                 size_t SharedMemoryBytes) override;
144 
145 private:
CUDAPlatform(const std::vector<CUcontext> & Contexts)146   explicit CUDAPlatform(const std::vector<CUcontext> &Contexts)
147       : TheContexts(Contexts) {}
148 
setContext(int DeviceIndex)149   Status setContext(int DeviceIndex) {
150     if (DeviceIndex < 0 ||
151         static_cast<size_t>(DeviceIndex) >= TheContexts.size())
152       return Status("invalid deivce index " + std::to_string(DeviceIndex));
153     return getCUError(cuCtxSetCurrent(TheContexts[DeviceIndex]),
154                       "cuCtxSetCurrent");
155   }
156 
157   // Vector of contexts for each device.
158   std::vector<CUcontext> TheContexts;
159 };
160 
create()161 Expected<CUDAPlatform> CUDAPlatform::create() {
162   std::vector<CUcontext> Contexts;
163   if (CUresult Result = cuInit(0))
164     return getCUError(Result, "cuInit");
165 
166   int DeviceCount = 0;
167   if (CUresult Result = cuDeviceGetCount(&DeviceCount))
168     return getCUError(Result, "cuDeviceGetCount");
169 
170   for (int I = 0; I < DeviceCount; ++I) {
171     CUdevice Device;
172     if (CUresult Result = cuDeviceGet(&Device, I))
173       return getCUError(Result, "cuDeviceGet");
174     CUcontext Context;
175     if (CUresult Result = cuDevicePrimaryCtxRetain(&Context, Device))
176       return getCUError(Result, "cuDevicePrimaryCtxRetain");
177     if (CUresult Result = cuCtxSetCurrent(Context))
178       return getCUError(Result, "cuCtxSetCurrent");
179     Contexts.emplace_back(Context);
180   }
181 
182   return CUDAPlatform(Contexts);
183 }
184 
getDeviceCount()185 Expected<int> CUDAPlatform::getDeviceCount() {
186   int Count = 0;
187   if (CUresult Result = cuDeviceGetCount(&Count))
188     return getCUError(Result, "cuDeviceGetCount");
189   return Count;
190 }
191 
cudaDestroyStream(void * H)192 static void cudaDestroyStream(void *H) {
193   logCUWarning(cuStreamDestroy(static_cast<CUstream_st *>(H)),
194                "cuStreamDestroy");
195 }
196 
createStream(int DeviceIndex)197 Expected<Stream> CUDAPlatform::createStream(int DeviceIndex) {
198   Status S = setContext(DeviceIndex);
199   if (S.isError())
200     return S;
201   unsigned int Flags = CU_STREAM_DEFAULT;
202   CUstream Handle;
203   if (CUresult Result = cuStreamCreate(&Handle, Flags))
204     return getCUError(Result, "cuStreamCreate");
205   return constructStream(this, DeviceIndex, Handle, cudaDestroyStream);
206 }
207 
streamSync(void * Stream)208 Status CUDAPlatform::streamSync(void *Stream) {
209   return getCUError(cuStreamSynchronize(static_cast<CUstream_st *>(Stream)),
210                     "cuStreamSynchronize");
211 }
212 
streamWaitOnEvent(void * Stream,void * Event)213 Status CUDAPlatform::streamWaitOnEvent(void *Stream, void *Event) {
214   // CUDA docs says flags must be 0.
215   unsigned int Flags = 0u;
216   return getCUError(cuStreamWaitEvent(static_cast<CUstream_st *>(Stream),
217                                       static_cast<CUevent_st *>(Event), Flags),
218                     "cuStreamWaitEvent");
219 }
220 
cudaDestroyEvent(void * H)221 static void cudaDestroyEvent(void *H) {
222   logCUWarning(cuEventDestroy(static_cast<CUevent_st *>(H)), "cuEventDestroy");
223 }
224 
createEvent(int DeviceIndex)225 Expected<Event> CUDAPlatform::createEvent(int DeviceIndex) {
226   Status S = setContext(DeviceIndex);
227   if (S.isError())
228     return S;
229   unsigned int Flags = CU_EVENT_DEFAULT;
230   CUevent Handle;
231   if (CUresult Result = cuEventCreate(&Handle, Flags))
232     return getCUError(Result, "cuEventCreate");
233   return constructEvent(this, DeviceIndex, Handle, cudaDestroyEvent);
234 }
235 
enqueueEvent(void * Event,void * Stream)236 Status CUDAPlatform::enqueueEvent(void *Event, void *Stream) {
237   return getCUError(cuEventRecord(static_cast<CUevent_st *>(Event),
238                                   static_cast<CUstream_st *>(Stream)),
239                     "cuEventRecord");
240 }
241 
eventIsDone(void * Event)242 bool CUDAPlatform::eventIsDone(void *Event) {
243   return cuEventQuery(static_cast<CUevent_st *>(Event)) != CUDA_ERROR_NOT_READY;
244 }
245 
eventSync(void * Event)246 Status CUDAPlatform::eventSync(void *Event) {
247   return getCUError(cuEventSynchronize(static_cast<CUevent_st *>(Event)),
248                     "cuEventSynchronize");
249 }
250 
getSecondsBetweenEvents(void * StartEvent,void * EndEvent)251 Expected<float> CUDAPlatform::getSecondsBetweenEvents(void *StartEvent,
252                                                       void *EndEvent) {
253   float Milliseconds;
254   if (CUresult Result = cuEventElapsedTime(
255           &Milliseconds, static_cast<CUevent_st *>(StartEvent),
256           static_cast<CUevent_st *>(EndEvent)))
257     return getCUError(Result, "cuEventElapsedTime");
258   return Milliseconds * 1e-6;
259 }
260 
rawMallocD(ptrdiff_t ByteCount,int DeviceIndex)261 Expected<void *> CUDAPlatform::rawMallocD(ptrdiff_t ByteCount,
262                                           int DeviceIndex) {
263   Status S = setContext(DeviceIndex);
264   if (S.isError())
265     return S;
266   if (!ByteCount)
267     return nullptr;
268   CUdeviceptr Pointer;
269   if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
270     return getCUError(Result, "cuMemAlloc");
271   return reinterpret_cast<void *>(Pointer);
272 }
273 
cudaDestroyDeviceMemory(void * H)274 static void cudaDestroyDeviceMemory(void *H) {
275   logCUWarning(cuMemFree(reinterpret_cast<CUdeviceptr>(H)), "cuMemFree");
276 }
277 
getDeviceMemoryHandleDestructor()278 HandleDestructor CUDAPlatform::getDeviceMemoryHandleDestructor() {
279   return cudaDestroyDeviceMemory;
280 }
281 
getDeviceMemorySpanHandle(void * BaseHandle,size_t,size_t ByteOffset)282 void *CUDAPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t,
283                                               size_t ByteOffset) {
284   return static_cast<char *>(BaseHandle) + ByteOffset;
285 }
286 
rawDestroyDeviceMemorySpanHandle(void *)287 void CUDAPlatform::rawDestroyDeviceMemorySpanHandle(void *) {
288   // Do nothing for this platform.
289 }
290 
rawGetDeviceSymbolAddress(const void * Symbol,int DeviceIndex)291 Expected<void *> CUDAPlatform::rawGetDeviceSymbolAddress(const void *Symbol,
292                                                          int DeviceIndex) {
293   Status S = setContext(DeviceIndex);
294   if (S.isError())
295     return S;
296   void *Address;
297   if (cudaError_t Status = cudaGetSymbolAddress(&Address, Symbol))
298     return getCUDAError(Status, "cudaGetSymbolAddress");
299   return Address;
300 }
301 
rawGetDeviceSymbolSize(const void * Symbol,int DeviceIndex)302 Expected<ptrdiff_t> CUDAPlatform::rawGetDeviceSymbolSize(const void *Symbol,
303                                                          int DeviceIndex) {
304   Status S = setContext(DeviceIndex);
305   if (S.isError())
306     return S;
307   size_t Size;
308   if (cudaError_t Status = cudaGetSymbolSize(&Size, Symbol))
309     return getCUDAError(Status, "cudaGetSymbolSize");
310   return Size;
311 }
312 
offsetVoidPtr(const void * Ptr,ptrdiff_t ByteOffset)313 static const void *offsetVoidPtr(const void *Ptr, ptrdiff_t ByteOffset) {
314   return static_cast<const void *>(static_cast<const char *>(Ptr) + ByteOffset);
315 }
316 
offsetVoidPtr(void * Ptr,ptrdiff_t ByteOffset)317 static void *offsetVoidPtr(void *Ptr, ptrdiff_t ByteOffset) {
318   return static_cast<void *>(static_cast<char *>(Ptr) + ByteOffset);
319 }
320 
rawRegisterHostMem(const void * Memory,ptrdiff_t ByteCount)321 Status CUDAPlatform::rawRegisterHostMem(const void *Memory,
322                                         ptrdiff_t ByteCount) {
323   unsigned int Flags = 0;
324   return getCUError(
325       cuMemHostRegister(const_cast<void *>(Memory), ByteCount, Flags),
326       "cuMemHostRegiser");
327 }
328 
cudaUnregisterHostMemoryHandleDestructor(void * H)329 static void cudaUnregisterHostMemoryHandleDestructor(void *H) {
330   logCUWarning(cuMemHostUnregister(H), "cuMemHostUnregister");
331 }
332 
getUnregisterHostMemoryHandleDestructor()333 HandleDestructor CUDAPlatform::getUnregisterHostMemoryHandleDestructor() {
334   return cudaUnregisterHostMemoryHandleDestructor;
335 }
336 
rawMallocRegisteredH(ptrdiff_t ByteCount)337 Expected<void *> CUDAPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
338   unsigned int Flags = 0;
339   void *Memory;
340   if (CUresult Result = cuMemHostAlloc(&Memory, ByteCount, Flags))
341     return getCUError(Result, "cuMemHostAlloc");
342   return Memory;
343 }
344 
cudaFreeHostMemoryHandleDestructor(void * H)345 static void cudaFreeHostMemoryHandleDestructor(void *H) {
346   logCUWarning(cuMemFreeHost(H), "cuMemFreeHost");
347 }
348 
getFreeHostMemoryHandleDestructor()349 HandleDestructor CUDAPlatform::getFreeHostMemoryHandleDestructor() {
350   return cudaFreeHostMemoryHandleDestructor;
351 }
352 
asyncCopyDToD(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)353 Status CUDAPlatform::asyncCopyDToD(const void *DeviceSrc,
354                                    ptrdiff_t DeviceSrcByteOffset,
355                                    void *DeviceDst,
356                                    ptrdiff_t DeviceDstByteOffset,
357                                    ptrdiff_t ByteCount, void *Stream) {
358   return getCUError(
359       cuMemcpyDtoDAsync(reinterpret_cast<CUdeviceptr>(
360                             offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
361                         reinterpret_cast<CUdeviceptr>(
362                             offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
363                         ByteCount, static_cast<CUstream_st *>(Stream)),
364       "cuMemcpyDtoDAsync");
365 }
366 
asyncCopyDToH(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * HostDst,ptrdiff_t ByteCount,void * Stream)367 Status CUDAPlatform::asyncCopyDToH(const void *DeviceSrc,
368                                    ptrdiff_t DeviceSrcByteOffset, void *HostDst,
369                                    ptrdiff_t ByteCount, void *Stream) {
370   return getCUError(
371       cuMemcpyDtoHAsync(HostDst, reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
372                                      DeviceSrc, DeviceSrcByteOffset)),
373                         ByteCount, static_cast<CUstream_st *>(Stream)),
374       "cuMemcpyDtoHAsync");
375 }
376 
asyncCopyHToD(const void * HostSrc,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)377 Status CUDAPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
378                                    ptrdiff_t DeviceDstByteOffset,
379                                    ptrdiff_t ByteCount, void *Stream) {
380   return getCUError(
381       cuMemcpyHtoDAsync(reinterpret_cast<CUdeviceptr>(
382                             offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
383                         HostSrc, ByteCount, static_cast<CUstream_st *>(Stream)),
384       "cuMemcpyHtoDAsync");
385 }
386 
asyncMemsetD(void * DeviceDst,ptrdiff_t ByteOffset,ptrdiff_t ByteCount,char ByteValue,void * Stream)387 Status CUDAPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
388                                   ptrdiff_t ByteCount, char ByteValue,
389                                   void *Stream) {
390   return getCUError(
391       cuMemsetD8Async(
392           reinterpret_cast<CUdeviceptr>(offsetVoidPtr(DeviceDst, ByteOffset)),
393           ByteValue, ByteCount, static_cast<CUstream_st *>(Stream)),
394       "cuMemsetD8Async");
395 }
396 
397 struct StreamCallbackUserData {
StreamCallbackUserDataacxxel::__anon21f1c6380111::StreamCallbackUserData398   StreamCallbackUserData(Stream &Stream, StreamCallback Function)
399       : TheStream(Stream), TheFunction(std::move(Function)) {}
400 
401   Stream &TheStream;
402   StreamCallback TheFunction;
403 };
404 
cuStreamCallbackShim(CUstream HStream,CUresult Status,void * UserData)405 static void CUDA_CB cuStreamCallbackShim(CUstream HStream, CUresult Status,
406                                          void *UserData) {
407   std::unique_ptr<StreamCallbackUserData> Data(
408       static_cast<StreamCallbackUserData *>(UserData));
409   Stream &TheStream = Data->TheStream;
410   assert(static_cast<CUstream_st *>(TheStream) == HStream);
411   Data->TheFunction(TheStream,
412                     getCUError(Status, "stream callback error state"));
413 }
414 
addStreamCallback(Stream & Stream,StreamCallback Callback)415 Status CUDAPlatform::addStreamCallback(Stream &Stream,
416                                        StreamCallback Callback) {
417   // CUDA docs say flags must always be 0 here.
418   unsigned int Flags = 0u;
419   std::unique_ptr<StreamCallbackUserData> UserData(
420       new StreamCallbackUserData(Stream, std::move(Callback)));
421   return getCUError(cuStreamAddCallback(Stream, cuStreamCallbackShim,
422                                         UserData.release(), Flags),
423                     "cuStreamAddCallback");
424 }
425 
cudaDestroyProgram(void * H)426 static void cudaDestroyProgram(void *H) {
427   logCUWarning(cuModuleUnload(static_cast<CUmod_st *>(H)), "cuModuleUnload");
428 }
429 
createProgramFromSource(Span<const char> Source,int DeviceIndex)430 Expected<Program> CUDAPlatform::createProgramFromSource(Span<const char> Source,
431                                                         int DeviceIndex) {
432   Status S = setContext(DeviceIndex);
433   if (S.isError())
434     return S;
435   CUmodule Module;
436   constexpr int LogBufferSizeBytes = 1024;
437   char InfoLogBuffer[LogBufferSizeBytes];
438   char ErrorLogBuffer[LogBufferSizeBytes];
439   constexpr size_t OptionsCount = 4;
440   std::array<CUjit_option, OptionsCount> OptionNames = {
441       {CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
442        CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}};
443   std::array<void *, OptionsCount> OptionValues = {
444       {InfoLogBuffer, const_cast<int *>(&LogBufferSizeBytes), ErrorLogBuffer,
445        const_cast<int *>(&LogBufferSizeBytes)}};
446   if (CUresult Result =
447           cuModuleLoadDataEx(&Module, Source.data(), OptionsCount,
448                              OptionNames.data(), OptionValues.data())) {
449     InfoLogBuffer[LogBufferSizeBytes - 1] = '\0';
450     ErrorLogBuffer[LogBufferSizeBytes - 1] = '\0';
451     std::ostringstream OutStream;
452     OutStream << "Error creating program from source: "
453               << getCUErrorMessage(Result)
454               << "\nINFO MESSAGES\n================\n"
455               << InfoLogBuffer << "\nERROR MESSAGES\n==================\n"
456               << ErrorLogBuffer;
457     return Status(OutStream.str());
458   }
459   return constructProgram(this, Module, cudaDestroyProgram);
460 }
461 
rawCreateKernel(void * Program,const std::string & Name)462 Expected<void *> CUDAPlatform::rawCreateKernel(void *Program,
463                                                const std::string &Name) {
464   CUmodule Module = static_cast<CUmodule>(Program);
465   CUfunction Kernel;
466   if (CUresult Result = cuModuleGetFunction(&Kernel, Module, Name.c_str()))
467     return getCUError(Result, "cuModuleGetFunction");
468   return Kernel;
469 }
470 
cudaDestroyKernel(void *)471 static void cudaDestroyKernel(void *) {
472   // Do nothing.
473 }
474 
getKernelHandleDestructor()475 HandleDestructor CUDAPlatform::getKernelHandleDestructor() {
476   return cudaDestroyKernel;
477 }
478 
rawEnqueueKernelLaunch(void * Stream,void * Kernel,KernelLaunchDimensions LaunchDimensions,Span<void * > Arguments,Span<size_t>,size_t SharedMemoryBytes)479 Status CUDAPlatform::rawEnqueueKernelLaunch(
480     void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
481     Span<void *> Arguments, Span<size_t>, size_t SharedMemoryBytes) {
482   return getCUError(
483       cuLaunchKernel(static_cast<CUfunction>(Kernel), LaunchDimensions.GridX,
484                      LaunchDimensions.GridY, LaunchDimensions.GridZ,
485                      LaunchDimensions.BlockX, LaunchDimensions.BlockY,
486                      LaunchDimensions.BlockZ, SharedMemoryBytes,
487                      static_cast<CUstream>(Stream), Arguments.data(), nullptr),
488       "cuLaunchKernel");
489 }
490 
491 } // namespace
492 
493 namespace cuda {
494 
495 /// Gets the CUDAPlatform instance and returns it as an unowned pointer to a
496 /// Platform.
getPlatform()497 Expected<Platform *> getPlatform() {
498   static auto MaybePlatform = []() -> Expected<CUDAPlatform *> {
499     Expected<CUDAPlatform> CreationResult = CUDAPlatform::create();
500     if (CreationResult.isError())
501       return CreationResult.getError();
502     else
503       return new CUDAPlatform(CreationResult.takeValue());
504   }();
505   return MaybePlatform;
506 }
507 
508 } // namespace cuda
509 
510 } // namespace acxxel
511