1 //===--- cuda_acxxel.cpp - CUDA implementation of the Acxxel API ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// This file defines the standard CUDA implementation of the Acxxel API.
10 ///
11 //===----------------------------------------------------------------------===//
12
13 #include "acxxel.h"
14
15 #include "cuda.h"
16 #include "cuda_runtime.h"
17
18 #include <array>
19 #include <cassert>
20 #include <sstream>
21 #include <vector>
22
23 namespace acxxel {
24
25 namespace {
26
getCUErrorMessage(CUresult Result)27 static std::string getCUErrorMessage(CUresult Result) {
28 if (!Result)
29 return "success";
30 const char *ErrorName = "UNKNOWN_ERROR_NAME";
31 const char *ErrorDescription = "UNKNOWN_ERROR_DESCRIPTION";
32 cuGetErrorName(Result, &ErrorName);
33 cuGetErrorString(Result, &ErrorDescription);
34 std::ostringstream OutStream;
35 OutStream << "CUDA driver error: code = " << Result
36 << ", name = " << ErrorName
37 << ", description = " << ErrorDescription;
38 return OutStream.str();
39 }
40
getCUError(CUresult Result,const std::string & Message)41 static Status getCUError(CUresult Result, const std::string &Message) {
42 if (!Result)
43 return Status();
44 std::ostringstream OutStream;
45 OutStream << getCUErrorMessage(Result) << ", message = " << Message;
46 return Status(OutStream.str());
47 }
48
getCUDAErrorMessage(cudaError_t E)49 static std::string getCUDAErrorMessage(cudaError_t E) {
50 if (!E)
51 return "success";
52 std::ostringstream OutStream;
53 OutStream << "CUDA runtime error: code = " << E
54 << ", name = " << cudaGetErrorName(E)
55 << ", description = " << cudaGetErrorString(E);
56 return OutStream.str();
57 }
58
getCUDAError(cudaError_t E,const std::string & Message)59 static Status getCUDAError(cudaError_t E, const std::string &Message) {
60 if (!E)
61 return Status();
62 std::ostringstream OutStream;
63 OutStream << getCUDAErrorMessage(E) << ", message = " << Message;
64 return Status(OutStream.str());
65 }
66
logCUWarning(CUresult Result,const std::string & Message)67 static void logCUWarning(CUresult Result, const std::string &Message) {
68 if (Result) {
69 std::ostringstream OutStream;
70 OutStream << Message << ": " << getCUErrorMessage(Result);
71 logWarning(OutStream.str());
72 }
73 }
74
75 /// A CUDA Platform implementation.
76 class CUDAPlatform : public Platform {
77 public:
78 ~CUDAPlatform() override = default;
79
80 static Expected<CUDAPlatform> create();
81
82 Expected<int> getDeviceCount() override;
83
84 Expected<Stream> createStream(int DeviceIndex) override;
85
86 Status streamSync(void *Stream) override;
87
88 Status streamWaitOnEvent(void *Stream, void *Event) override;
89
90 Expected<Event> createEvent(int DeviceIndex) override;
91
92 protected:
93 Expected<void *> rawMallocD(ptrdiff_t ByteCount, int DeviceIndex) override;
94 HandleDestructor getDeviceMemoryHandleDestructor() override;
95 void *getDeviceMemorySpanHandle(void *BaseHandle, size_t ByteSize,
96 size_t ByteOffset) override;
97 virtual void rawDestroyDeviceMemorySpanHandle(void *Handle) override;
98
99 Expected<void *> rawGetDeviceSymbolAddress(const void *Symbol,
100 int DeviceIndex) override;
101 Expected<ptrdiff_t> rawGetDeviceSymbolSize(const void *Symbol,
102 int DeviceIndex) override;
103
104 Status rawRegisterHostMem(const void *Memory, ptrdiff_t ByteCount) override;
105 HandleDestructor getUnregisterHostMemoryHandleDestructor() override;
106
107 Expected<void *> rawMallocRegisteredH(ptrdiff_t ByteCount) override;
108 HandleDestructor getFreeHostMemoryHandleDestructor() override;
109
110 Status asyncCopyDToD(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
111 void *DeviceDst, ptrdiff_t DeviceDstByteOffset,
112 ptrdiff_t ByteCount, void *Stream) override;
113 Status asyncCopyDToH(const void *DeviceSrc, ptrdiff_t DeviceSrcByteOffset,
114 void *HostDst, ptrdiff_t ByteCount,
115 void *Stream) override;
116 Status asyncCopyHToD(const void *HostSrc, void *DeviceDst,
117 ptrdiff_t DeviceDstByteOffset, ptrdiff_t ByteCount,
118 void *Stream) override;
119
120 Status asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
121 ptrdiff_t ByteCount, char ByteValue,
122 void *Stream) override;
123
124 Status addStreamCallback(Stream &Stream, StreamCallback Callback) override;
125
126 Expected<Program> createProgramFromSource(Span<const char> Source,
127 int DeviceIndex) override;
128
129 Status enqueueEvent(void *Event, void *Stream) override;
130 bool eventIsDone(void *Event) override;
131 Status eventSync(void *Event) override;
132 Expected<float> getSecondsBetweenEvents(void *StartEvent,
133 void *EndEvent) override;
134
135 Expected<void *> rawCreateKernel(void *Program,
136 const std::string &Name) override;
137 HandleDestructor getKernelHandleDestructor() override;
138
139 Status rawEnqueueKernelLaunch(void *Stream, void *Kernel,
140 KernelLaunchDimensions LaunchDimensions,
141 Span<void *> Arguments,
142 Span<size_t> ArgumentSizes,
143 size_t SharedMemoryBytes) override;
144
145 private:
CUDAPlatform(const std::vector<CUcontext> & Contexts)146 explicit CUDAPlatform(const std::vector<CUcontext> &Contexts)
147 : TheContexts(Contexts) {}
148
setContext(int DeviceIndex)149 Status setContext(int DeviceIndex) {
150 if (DeviceIndex < 0 ||
151 static_cast<size_t>(DeviceIndex) >= TheContexts.size())
152 return Status("invalid deivce index " + std::to_string(DeviceIndex));
153 return getCUError(cuCtxSetCurrent(TheContexts[DeviceIndex]),
154 "cuCtxSetCurrent");
155 }
156
157 // Vector of contexts for each device.
158 std::vector<CUcontext> TheContexts;
159 };
160
create()161 Expected<CUDAPlatform> CUDAPlatform::create() {
162 std::vector<CUcontext> Contexts;
163 if (CUresult Result = cuInit(0))
164 return getCUError(Result, "cuInit");
165
166 int DeviceCount = 0;
167 if (CUresult Result = cuDeviceGetCount(&DeviceCount))
168 return getCUError(Result, "cuDeviceGetCount");
169
170 for (int I = 0; I < DeviceCount; ++I) {
171 CUdevice Device;
172 if (CUresult Result = cuDeviceGet(&Device, I))
173 return getCUError(Result, "cuDeviceGet");
174 CUcontext Context;
175 if (CUresult Result = cuDevicePrimaryCtxRetain(&Context, Device))
176 return getCUError(Result, "cuDevicePrimaryCtxRetain");
177 if (CUresult Result = cuCtxSetCurrent(Context))
178 return getCUError(Result, "cuCtxSetCurrent");
179 Contexts.emplace_back(Context);
180 }
181
182 return CUDAPlatform(Contexts);
183 }
184
getDeviceCount()185 Expected<int> CUDAPlatform::getDeviceCount() {
186 int Count = 0;
187 if (CUresult Result = cuDeviceGetCount(&Count))
188 return getCUError(Result, "cuDeviceGetCount");
189 return Count;
190 }
191
cudaDestroyStream(void * H)192 static void cudaDestroyStream(void *H) {
193 logCUWarning(cuStreamDestroy(static_cast<CUstream_st *>(H)),
194 "cuStreamDestroy");
195 }
196
createStream(int DeviceIndex)197 Expected<Stream> CUDAPlatform::createStream(int DeviceIndex) {
198 Status S = setContext(DeviceIndex);
199 if (S.isError())
200 return S;
201 unsigned int Flags = CU_STREAM_DEFAULT;
202 CUstream Handle;
203 if (CUresult Result = cuStreamCreate(&Handle, Flags))
204 return getCUError(Result, "cuStreamCreate");
205 return constructStream(this, DeviceIndex, Handle, cudaDestroyStream);
206 }
207
streamSync(void * Stream)208 Status CUDAPlatform::streamSync(void *Stream) {
209 return getCUError(cuStreamSynchronize(static_cast<CUstream_st *>(Stream)),
210 "cuStreamSynchronize");
211 }
212
streamWaitOnEvent(void * Stream,void * Event)213 Status CUDAPlatform::streamWaitOnEvent(void *Stream, void *Event) {
214 // CUDA docs says flags must be 0.
215 unsigned int Flags = 0u;
216 return getCUError(cuStreamWaitEvent(static_cast<CUstream_st *>(Stream),
217 static_cast<CUevent_st *>(Event), Flags),
218 "cuStreamWaitEvent");
219 }
220
cudaDestroyEvent(void * H)221 static void cudaDestroyEvent(void *H) {
222 logCUWarning(cuEventDestroy(static_cast<CUevent_st *>(H)), "cuEventDestroy");
223 }
224
createEvent(int DeviceIndex)225 Expected<Event> CUDAPlatform::createEvent(int DeviceIndex) {
226 Status S = setContext(DeviceIndex);
227 if (S.isError())
228 return S;
229 unsigned int Flags = CU_EVENT_DEFAULT;
230 CUevent Handle;
231 if (CUresult Result = cuEventCreate(&Handle, Flags))
232 return getCUError(Result, "cuEventCreate");
233 return constructEvent(this, DeviceIndex, Handle, cudaDestroyEvent);
234 }
235
enqueueEvent(void * Event,void * Stream)236 Status CUDAPlatform::enqueueEvent(void *Event, void *Stream) {
237 return getCUError(cuEventRecord(static_cast<CUevent_st *>(Event),
238 static_cast<CUstream_st *>(Stream)),
239 "cuEventRecord");
240 }
241
eventIsDone(void * Event)242 bool CUDAPlatform::eventIsDone(void *Event) {
243 return cuEventQuery(static_cast<CUevent_st *>(Event)) != CUDA_ERROR_NOT_READY;
244 }
245
eventSync(void * Event)246 Status CUDAPlatform::eventSync(void *Event) {
247 return getCUError(cuEventSynchronize(static_cast<CUevent_st *>(Event)),
248 "cuEventSynchronize");
249 }
250
getSecondsBetweenEvents(void * StartEvent,void * EndEvent)251 Expected<float> CUDAPlatform::getSecondsBetweenEvents(void *StartEvent,
252 void *EndEvent) {
253 float Milliseconds;
254 if (CUresult Result = cuEventElapsedTime(
255 &Milliseconds, static_cast<CUevent_st *>(StartEvent),
256 static_cast<CUevent_st *>(EndEvent)))
257 return getCUError(Result, "cuEventElapsedTime");
258 return Milliseconds * 1e-6;
259 }
260
rawMallocD(ptrdiff_t ByteCount,int DeviceIndex)261 Expected<void *> CUDAPlatform::rawMallocD(ptrdiff_t ByteCount,
262 int DeviceIndex) {
263 Status S = setContext(DeviceIndex);
264 if (S.isError())
265 return S;
266 if (!ByteCount)
267 return nullptr;
268 CUdeviceptr Pointer;
269 if (CUresult Result = cuMemAlloc(&Pointer, ByteCount))
270 return getCUError(Result, "cuMemAlloc");
271 return reinterpret_cast<void *>(Pointer);
272 }
273
cudaDestroyDeviceMemory(void * H)274 static void cudaDestroyDeviceMemory(void *H) {
275 logCUWarning(cuMemFree(reinterpret_cast<CUdeviceptr>(H)), "cuMemFree");
276 }
277
getDeviceMemoryHandleDestructor()278 HandleDestructor CUDAPlatform::getDeviceMemoryHandleDestructor() {
279 return cudaDestroyDeviceMemory;
280 }
281
getDeviceMemorySpanHandle(void * BaseHandle,size_t,size_t ByteOffset)282 void *CUDAPlatform::getDeviceMemorySpanHandle(void *BaseHandle, size_t,
283 size_t ByteOffset) {
284 return static_cast<char *>(BaseHandle) + ByteOffset;
285 }
286
rawDestroyDeviceMemorySpanHandle(void *)287 void CUDAPlatform::rawDestroyDeviceMemorySpanHandle(void *) {
288 // Do nothing for this platform.
289 }
290
rawGetDeviceSymbolAddress(const void * Symbol,int DeviceIndex)291 Expected<void *> CUDAPlatform::rawGetDeviceSymbolAddress(const void *Symbol,
292 int DeviceIndex) {
293 Status S = setContext(DeviceIndex);
294 if (S.isError())
295 return S;
296 void *Address;
297 if (cudaError_t Status = cudaGetSymbolAddress(&Address, Symbol))
298 return getCUDAError(Status, "cudaGetSymbolAddress");
299 return Address;
300 }
301
rawGetDeviceSymbolSize(const void * Symbol,int DeviceIndex)302 Expected<ptrdiff_t> CUDAPlatform::rawGetDeviceSymbolSize(const void *Symbol,
303 int DeviceIndex) {
304 Status S = setContext(DeviceIndex);
305 if (S.isError())
306 return S;
307 size_t Size;
308 if (cudaError_t Status = cudaGetSymbolSize(&Size, Symbol))
309 return getCUDAError(Status, "cudaGetSymbolSize");
310 return Size;
311 }
312
offsetVoidPtr(const void * Ptr,ptrdiff_t ByteOffset)313 static const void *offsetVoidPtr(const void *Ptr, ptrdiff_t ByteOffset) {
314 return static_cast<const void *>(static_cast<const char *>(Ptr) + ByteOffset);
315 }
316
offsetVoidPtr(void * Ptr,ptrdiff_t ByteOffset)317 static void *offsetVoidPtr(void *Ptr, ptrdiff_t ByteOffset) {
318 return static_cast<void *>(static_cast<char *>(Ptr) + ByteOffset);
319 }
320
rawRegisterHostMem(const void * Memory,ptrdiff_t ByteCount)321 Status CUDAPlatform::rawRegisterHostMem(const void *Memory,
322 ptrdiff_t ByteCount) {
323 unsigned int Flags = 0;
324 return getCUError(
325 cuMemHostRegister(const_cast<void *>(Memory), ByteCount, Flags),
326 "cuMemHostRegiser");
327 }
328
cudaUnregisterHostMemoryHandleDestructor(void * H)329 static void cudaUnregisterHostMemoryHandleDestructor(void *H) {
330 logCUWarning(cuMemHostUnregister(H), "cuMemHostUnregister");
331 }
332
getUnregisterHostMemoryHandleDestructor()333 HandleDestructor CUDAPlatform::getUnregisterHostMemoryHandleDestructor() {
334 return cudaUnregisterHostMemoryHandleDestructor;
335 }
336
rawMallocRegisteredH(ptrdiff_t ByteCount)337 Expected<void *> CUDAPlatform::rawMallocRegisteredH(ptrdiff_t ByteCount) {
338 unsigned int Flags = 0;
339 void *Memory;
340 if (CUresult Result = cuMemHostAlloc(&Memory, ByteCount, Flags))
341 return getCUError(Result, "cuMemHostAlloc");
342 return Memory;
343 }
344
cudaFreeHostMemoryHandleDestructor(void * H)345 static void cudaFreeHostMemoryHandleDestructor(void *H) {
346 logCUWarning(cuMemFreeHost(H), "cuMemFreeHost");
347 }
348
getFreeHostMemoryHandleDestructor()349 HandleDestructor CUDAPlatform::getFreeHostMemoryHandleDestructor() {
350 return cudaFreeHostMemoryHandleDestructor;
351 }
352
asyncCopyDToD(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)353 Status CUDAPlatform::asyncCopyDToD(const void *DeviceSrc,
354 ptrdiff_t DeviceSrcByteOffset,
355 void *DeviceDst,
356 ptrdiff_t DeviceDstByteOffset,
357 ptrdiff_t ByteCount, void *Stream) {
358 return getCUError(
359 cuMemcpyDtoDAsync(reinterpret_cast<CUdeviceptr>(
360 offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
361 reinterpret_cast<CUdeviceptr>(
362 offsetVoidPtr(DeviceSrc, DeviceSrcByteOffset)),
363 ByteCount, static_cast<CUstream_st *>(Stream)),
364 "cuMemcpyDtoDAsync");
365 }
366
asyncCopyDToH(const void * DeviceSrc,ptrdiff_t DeviceSrcByteOffset,void * HostDst,ptrdiff_t ByteCount,void * Stream)367 Status CUDAPlatform::asyncCopyDToH(const void *DeviceSrc,
368 ptrdiff_t DeviceSrcByteOffset, void *HostDst,
369 ptrdiff_t ByteCount, void *Stream) {
370 return getCUError(
371 cuMemcpyDtoHAsync(HostDst, reinterpret_cast<CUdeviceptr>(offsetVoidPtr(
372 DeviceSrc, DeviceSrcByteOffset)),
373 ByteCount, static_cast<CUstream_st *>(Stream)),
374 "cuMemcpyDtoHAsync");
375 }
376
asyncCopyHToD(const void * HostSrc,void * DeviceDst,ptrdiff_t DeviceDstByteOffset,ptrdiff_t ByteCount,void * Stream)377 Status CUDAPlatform::asyncCopyHToD(const void *HostSrc, void *DeviceDst,
378 ptrdiff_t DeviceDstByteOffset,
379 ptrdiff_t ByteCount, void *Stream) {
380 return getCUError(
381 cuMemcpyHtoDAsync(reinterpret_cast<CUdeviceptr>(
382 offsetVoidPtr(DeviceDst, DeviceDstByteOffset)),
383 HostSrc, ByteCount, static_cast<CUstream_st *>(Stream)),
384 "cuMemcpyHtoDAsync");
385 }
386
asyncMemsetD(void * DeviceDst,ptrdiff_t ByteOffset,ptrdiff_t ByteCount,char ByteValue,void * Stream)387 Status CUDAPlatform::asyncMemsetD(void *DeviceDst, ptrdiff_t ByteOffset,
388 ptrdiff_t ByteCount, char ByteValue,
389 void *Stream) {
390 return getCUError(
391 cuMemsetD8Async(
392 reinterpret_cast<CUdeviceptr>(offsetVoidPtr(DeviceDst, ByteOffset)),
393 ByteValue, ByteCount, static_cast<CUstream_st *>(Stream)),
394 "cuMemsetD8Async");
395 }
396
397 struct StreamCallbackUserData {
StreamCallbackUserDataacxxel::__anon21f1c6380111::StreamCallbackUserData398 StreamCallbackUserData(Stream &Stream, StreamCallback Function)
399 : TheStream(Stream), TheFunction(std::move(Function)) {}
400
401 Stream &TheStream;
402 StreamCallback TheFunction;
403 };
404
cuStreamCallbackShim(CUstream HStream,CUresult Status,void * UserData)405 static void CUDA_CB cuStreamCallbackShim(CUstream HStream, CUresult Status,
406 void *UserData) {
407 std::unique_ptr<StreamCallbackUserData> Data(
408 static_cast<StreamCallbackUserData *>(UserData));
409 Stream &TheStream = Data->TheStream;
410 assert(static_cast<CUstream_st *>(TheStream) == HStream);
411 Data->TheFunction(TheStream,
412 getCUError(Status, "stream callback error state"));
413 }
414
addStreamCallback(Stream & Stream,StreamCallback Callback)415 Status CUDAPlatform::addStreamCallback(Stream &Stream,
416 StreamCallback Callback) {
417 // CUDA docs say flags must always be 0 here.
418 unsigned int Flags = 0u;
419 std::unique_ptr<StreamCallbackUserData> UserData(
420 new StreamCallbackUserData(Stream, std::move(Callback)));
421 return getCUError(cuStreamAddCallback(Stream, cuStreamCallbackShim,
422 UserData.release(), Flags),
423 "cuStreamAddCallback");
424 }
425
cudaDestroyProgram(void * H)426 static void cudaDestroyProgram(void *H) {
427 logCUWarning(cuModuleUnload(static_cast<CUmod_st *>(H)), "cuModuleUnload");
428 }
429
createProgramFromSource(Span<const char> Source,int DeviceIndex)430 Expected<Program> CUDAPlatform::createProgramFromSource(Span<const char> Source,
431 int DeviceIndex) {
432 Status S = setContext(DeviceIndex);
433 if (S.isError())
434 return S;
435 CUmodule Module;
436 constexpr int LogBufferSizeBytes = 1024;
437 char InfoLogBuffer[LogBufferSizeBytes];
438 char ErrorLogBuffer[LogBufferSizeBytes];
439 constexpr size_t OptionsCount = 4;
440 std::array<CUjit_option, OptionsCount> OptionNames = {
441 {CU_JIT_INFO_LOG_BUFFER, CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES,
442 CU_JIT_ERROR_LOG_BUFFER, CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES}};
443 std::array<void *, OptionsCount> OptionValues = {
444 {InfoLogBuffer, const_cast<int *>(&LogBufferSizeBytes), ErrorLogBuffer,
445 const_cast<int *>(&LogBufferSizeBytes)}};
446 if (CUresult Result =
447 cuModuleLoadDataEx(&Module, Source.data(), OptionsCount,
448 OptionNames.data(), OptionValues.data())) {
449 InfoLogBuffer[LogBufferSizeBytes - 1] = '\0';
450 ErrorLogBuffer[LogBufferSizeBytes - 1] = '\0';
451 std::ostringstream OutStream;
452 OutStream << "Error creating program from source: "
453 << getCUErrorMessage(Result)
454 << "\nINFO MESSAGES\n================\n"
455 << InfoLogBuffer << "\nERROR MESSAGES\n==================\n"
456 << ErrorLogBuffer;
457 return Status(OutStream.str());
458 }
459 return constructProgram(this, Module, cudaDestroyProgram);
460 }
461
rawCreateKernel(void * Program,const std::string & Name)462 Expected<void *> CUDAPlatform::rawCreateKernel(void *Program,
463 const std::string &Name) {
464 CUmodule Module = static_cast<CUmodule>(Program);
465 CUfunction Kernel;
466 if (CUresult Result = cuModuleGetFunction(&Kernel, Module, Name.c_str()))
467 return getCUError(Result, "cuModuleGetFunction");
468 return Kernel;
469 }
470
cudaDestroyKernel(void *)471 static void cudaDestroyKernel(void *) {
472 // Do nothing.
473 }
474
getKernelHandleDestructor()475 HandleDestructor CUDAPlatform::getKernelHandleDestructor() {
476 return cudaDestroyKernel;
477 }
478
rawEnqueueKernelLaunch(void * Stream,void * Kernel,KernelLaunchDimensions LaunchDimensions,Span<void * > Arguments,Span<size_t>,size_t SharedMemoryBytes)479 Status CUDAPlatform::rawEnqueueKernelLaunch(
480 void *Stream, void *Kernel, KernelLaunchDimensions LaunchDimensions,
481 Span<void *> Arguments, Span<size_t>, size_t SharedMemoryBytes) {
482 return getCUError(
483 cuLaunchKernel(static_cast<CUfunction>(Kernel), LaunchDimensions.GridX,
484 LaunchDimensions.GridY, LaunchDimensions.GridZ,
485 LaunchDimensions.BlockX, LaunchDimensions.BlockY,
486 LaunchDimensions.BlockZ, SharedMemoryBytes,
487 static_cast<CUstream>(Stream), Arguments.data(), nullptr),
488 "cuLaunchKernel");
489 }
490
491 } // namespace
492
493 namespace cuda {
494
495 /// Gets the CUDAPlatform instance and returns it as an unowned pointer to a
496 /// Platform.
getPlatform()497 Expected<Platform *> getPlatform() {
498 static auto MaybePlatform = []() -> Expected<CUDAPlatform *> {
499 Expected<CUDAPlatform> CreationResult = CUDAPlatform::create();
500 if (CreationResult.isError())
501 return CreationResult.getError();
502 else
503 return new CUDAPlatform(CreationResult.takeValue());
504 }();
505 return MaybePlatform;
506 }
507
508 } // namespace cuda
509
510 } // namespace acxxel
511