1 /* 2 * Copyright 1993-2015 NVIDIA Corporation. All rights reserved. 3 * 4 * Please refer to the NVIDIA end user license agreement (EULA) associated 5 * with this source code for terms and conditions that govern your use of 6 * this software. Any use, reproduction, disclosure, or distribution of 7 * this software and related documentation outside the terms of the EULA 8 * is strictly prohibited. 9 * 10 */ 11 12 #ifndef __cuda_cuda_h__ 13 #define __cuda_cuda_h__ 14 15 #include <stdlib.h> 16 17 #ifndef __CUDA_API_VERSION 18 #define __CUDA_API_VERSION 4000 19 #endif 20 21 /** 22 * \defgroup CUDA_DRIVER CUDA Driver API 23 * 24 * This section describes the low-level CUDA driver application programming 25 * interface. 26 * 27 * @{ 28 */ 29 30 /** 31 * \defgroup CUDA_TYPES Data types used by CUDA driver 32 * @{ 33 */ 34 35 /** 36 * CUDA API version number 37 */ 38 #define CUDA_VERSION 4000 /* 4.0 */ 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 /** 45 * CUDA device pointer 46 */ 47 #if __CUDA_API_VERSION >= 3020 48 49 #if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64) || defined(__aarch64__) 50 typedef unsigned long long CUdeviceptr; 51 #else 52 typedef unsigned int CUdeviceptr; 53 #endif 54 55 #endif /* __CUDA_API_VERSION >= 3020 */ 56 57 typedef int CUdevice; /**< CUDA device */ 58 typedef struct CUctx_st *CUcontext; /**< CUDA context */ 59 typedef struct CUmod_st *CUmodule; /**< CUDA module */ 60 typedef struct CUfunc_st *CUfunction; /**< CUDA function */ 61 typedef struct CUarray_st *CUarray; /**< CUDA array */ 62 typedef struct CUtexref_st *CUtexref; /**< CUDA texture reference */ 63 typedef struct CUsurfref_st *CUsurfref; /**< CUDA surface reference */ 64 typedef struct CUevent_st *CUevent; /**< CUDA event */ 65 typedef struct CUstream_st *CUstream; /**< CUDA stream */ 66 typedef struct CUgraphicsResource_st *CUgraphicsResource; /**< CUDA graphics interop resource */ 67 68 typedef struct CUuuid_st /**< CUDA definition of UUID */ 69 { 70 char bytes[16]; 71 } CUuuid; 72 73 /** 74 * Context creation flags 75 */ 76 typedef enum CUctx_flags_enum 77 { 78 CU_CTX_SCHED_AUTO = 0x00, /**< Automatic scheduling */ 79 CU_CTX_SCHED_SPIN = 0x01, /**< Set spin as default scheduling */ 80 CU_CTX_SCHED_YIELD = 0x02, /**< Set yield as default scheduling */ 81 CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling */ 82 CU_CTX_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling \deprecated */ 83 CU_CTX_MAP_HOST = 0x08, /**< Support mapped pinned allocations */ 84 CU_CTX_LMEM_RESIZE_TO_MAX = 0x10, /**< Keep local memory allocation after launch */ 85 #if __CUDA_API_VERSION < 4000 86 CU_CTX_SCHED_MASK = 0x03, 87 CU_CTX_FLAGS_MASK = 0x1f 88 #else 89 CU_CTX_SCHED_MASK = 0x07, 90 CU_CTX_PRIMARY = 0x20, /**< Initialize and return the primary context */ 91 CU_CTX_FLAGS_MASK = 0x3f 92 #endif 93 } CUctx_flags; 94 95 /** 96 * Event creation flags 97 */ 98 typedef enum CUevent_flags_enum 99 { 100 CU_EVENT_DEFAULT = 0, /**< Default event flag */ 101 CU_EVENT_BLOCKING_SYNC = 1, /**< Event uses blocking synchronization */ 102 CU_EVENT_DISABLE_TIMING = 2 /**< Event will not record timing data */ 103 } CUevent_flags; 104 105 /** 106 * Array formats 107 */ 108 typedef enum CUarray_format_enum 109 { 110 CU_AD_FORMAT_UNSIGNED_INT8 = 0x01, /**< Unsigned 8-bit integers */ 111 CU_AD_FORMAT_UNSIGNED_INT16 = 0x02, /**< Unsigned 16-bit integers */ 112 CU_AD_FORMAT_UNSIGNED_INT32 = 0x03, /**< Unsigned 32-bit integers */ 113 CU_AD_FORMAT_SIGNED_INT8 = 0x08, /**< Signed 8-bit integers */ 114 CU_AD_FORMAT_SIGNED_INT16 = 0x09, /**< Signed 16-bit integers */ 115 CU_AD_FORMAT_SIGNED_INT32 = 0x0a, /**< Signed 32-bit integers */ 116 CU_AD_FORMAT_HALF = 0x10, /**< 16-bit floating point */ 117 CU_AD_FORMAT_FLOAT = 0x20 /**< 32-bit floating point */ 118 } CUarray_format; 119 120 /** 121 * Texture reference addressing modes 122 */ 123 typedef enum CUaddress_mode_enum 124 { 125 CU_TR_ADDRESS_MODE_WRAP = 0, /**< Wrapping address mode */ 126 CU_TR_ADDRESS_MODE_CLAMP = 1, /**< Clamp to edge address mode */ 127 CU_TR_ADDRESS_MODE_MIRROR = 2, /**< Mirror address mode */ 128 CU_TR_ADDRESS_MODE_BORDER = 3 /**< Border address mode */ 129 } CUaddress_mode; 130 131 /** 132 * Texture reference filtering modes 133 */ 134 typedef enum CUfilter_mode_enum 135 { 136 CU_TR_FILTER_MODE_POINT = 0, /**< Point filter mode */ 137 CU_TR_FILTER_MODE_LINEAR = 1 /**< Linear filter mode */ 138 } CUfilter_mode; 139 140 /** 141 * Device properties 142 */ 143 typedef enum CUdevice_attribute_enum 144 { 145 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 1, /**< Maximum number of threads per block */ 146 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X = 2, /**< Maximum block dimension X */ 147 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y = 3, /**< Maximum block dimension Y */ 148 CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z = 4, /**< Maximum block dimension Z */ 149 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X = 5, /**< Maximum grid dimension X */ 150 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y = 6, /**< Maximum grid dimension Y */ 151 CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z = 7, /**< Maximum grid dimension Z */ 152 CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK = 8, /**< Maximum shared memory available per block in bytes */ 153 CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK = 8, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK */ 154 CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */ 155 CU_DEVICE_ATTRIBUTE_WARP_SIZE = 10, /**< Warp size in threads */ 156 CU_DEVICE_ATTRIBUTE_MAX_PITCH = 11, /**< Maximum pitch in bytes allowed by memory copies */ 157 CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK = 12, /**< Maximum number of 32-bit registers available per block */ 158 CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK */ 159 CU_DEVICE_ATTRIBUTE_CLOCK_RATE = 13, /**< Peak clock frequency in kilohertz */ 160 CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14, /**< Alignment requirement for textures */ 161 CU_DEVICE_ATTRIBUTE_GPU_OVERLAP = 15, /**< Device can possibly copy memory and execute a kernel concurrently */ 162 CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT = 16, /**< Number of multiprocessors on device */ 163 CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT = 17, /**< Specifies whether there is a run time limit on kernels */ 164 CU_DEVICE_ATTRIBUTE_INTEGRATED = 18, /**< Device is integrated with host memory */ 165 CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY = 19, /**< Device can map host memory into CUDA address space */ 166 CU_DEVICE_ATTRIBUTE_COMPUTE_MODE = 20, /**< Compute mode (See ::CUcomputemode for details) */ 167 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH = 21, /**< Maximum 1D texture width */ 168 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH = 22, /**< Maximum 2D texture width */ 169 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT = 23, /**< Maximum 2D texture height */ 170 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH = 24, /**< Maximum 3D texture width */ 171 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT = 25, /**< Maximum 3D texture height */ 172 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH = 26, /**< Maximum 3D texture depth */ 173 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH = 27, /**< Maximum texture array width */ 174 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT = 28, /**< Maximum texture array height */ 175 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximum slices in a texture array */ 176 CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT = 30, /**< Alignment requirement for surfaces */ 177 CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS = 31, /**< Device can possibly execute multiple kernels concurrently */ 178 CU_DEVICE_ATTRIBUTE_ECC_ENABLED = 32, /**< Device has ECC support enabled */ 179 CU_DEVICE_ATTRIBUTE_PCI_BUS_ID = 33, /**< PCI bus ID of the device */ 180 CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID = 34, /**< PCI device ID of the device */ 181 CU_DEVICE_ATTRIBUTE_TCC_DRIVER = 35 /**< Device is using TCC driver model */ 182 #if __CUDA_API_VERSION >= 4000 183 , CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE = 36, /**< Peak memory clock frequency in kilohertz */ 184 CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH = 37, /**< Global memory bus width in bits */ 185 CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE = 38, /**< Size of L2 cache in bytes */ 186 CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR = 39, /**< Maximum resident threads per multiprocessor */ 187 CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT = 40, /**< Number of asynchronous engines */ 188 CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41, /**< Device uses shares a unified address space with the host */ 189 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH = 42, /**< Maximum 1D layered texture width */ 190 CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS = 43 /**< Maximum layers in a 1D layered texture */ 191 #endif 192 } CUdevice_attribute; 193 194 /** 195 * Legacy device properties 196 */ 197 typedef struct CUdevprop_st 198 { 199 int maxThreadsPerBlock; /**< Maximum number of threads per block */ 200 int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */ 201 int maxGridSize[3]; /**< Maximum size of each dimension of a grid */ 202 int sharedMemPerBlock; /**< Shared memory available per block in bytes */ 203 int totalConstantMemory; /**< Constant memory available on device in bytes */ 204 int SIMDWidth; /**< Warp size in threads */ 205 int memPitch; /**< Maximum pitch in bytes allowed by memory copies */ 206 int regsPerBlock; /**< 32-bit registers available per block */ 207 int clockRate; /**< Clock frequency in kilohertz */ 208 int textureAlign; /**< Alignment requirement for textures */ 209 } CUdevprop; 210 211 /** 212 * Function properties 213 */ 214 typedef enum CUfunction_attribute_enum 215 { 216 /** 217 * The maximum number of threads per block, beyond which a launch of the 218 * function would fail. This number depends on both the function and the 219 * device on which the function is currently loaded. 220 */ 221 CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK = 0, 222 223 /** 224 * The size in bytes of statically-allocated shared memory required by 225 * this function. This does not include dynamically-allocated shared 226 * memory requested by the user at runtime. 227 */ 228 CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES = 1, 229 230 /** 231 * The size in bytes of user-allocated constant memory required by this 232 * function. 233 */ 234 CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES = 2, 235 236 /** 237 * The size in bytes of local memory used by each thread of this function. 238 */ 239 CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES = 3, 240 241 /** 242 * The number of registers used by each thread of this function. 243 */ 244 CU_FUNC_ATTRIBUTE_NUM_REGS = 4, 245 246 /** 247 * The PTX virtual architecture version for which the function was 248 * compiled. This value is the major PTX version * 10 + the minor PTX 249 * version, so a PTX version 1.3 function would return the value 13. 250 * Note that this may return the undefined value of 0 for cubins 251 * compiled prior to CUDA 3.0. 252 */ 253 CU_FUNC_ATTRIBUTE_PTX_VERSION = 5, 254 255 /** 256 * The binary architecture version for which the function was compiled. 257 * This value is the major binary version * 10 + the minor binary version, 258 * so a binary version 1.3 function would return the value 13. Note that 259 * this will return a value of 10 for legacy cubins that do not have a 260 * properly-encoded binary architecture version. 261 */ 262 CU_FUNC_ATTRIBUTE_BINARY_VERSION = 6, 263 264 CU_FUNC_ATTRIBUTE_MAX 265 } CUfunction_attribute; 266 267 /** 268 * Function cache configurations 269 */ 270 typedef enum CUfunc_cache_enum 271 { 272 CU_FUNC_CACHE_PREFER_NONE = 0x00, /**< no preference for shared memory or L1 (default) */ 273 CU_FUNC_CACHE_PREFER_SHARED = 0x01, /**< prefer larger shared memory and smaller L1 cache */ 274 CU_FUNC_CACHE_PREFER_L1 = 0x02 /**< prefer larger L1 cache and smaller shared memory */ 275 } CUfunc_cache; 276 277 /** 278 * Memory types 279 */ 280 typedef enum CUmemorytype_enum 281 { 282 CU_MEMORYTYPE_HOST = 0x01, /**< Host memory */ 283 CU_MEMORYTYPE_DEVICE = 0x02, /**< Device memory */ 284 CU_MEMORYTYPE_ARRAY = 0x03 /**< Array memory */ 285 #if __CUDA_API_VERSION >= 4000 286 , CU_MEMORYTYPE_UNIFIED = 0x04 /**< Unified device or host memory */ 287 #endif 288 } CUmemorytype; 289 290 /** 291 * Compute Modes 292 */ 293 typedef enum CUcomputemode_enum 294 { 295 CU_COMPUTEMODE_DEFAULT = 0, /**< Default compute mode (Multiple contexts allowed per device) */ 296 CU_COMPUTEMODE_EXCLUSIVE = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time) */ 297 CU_COMPUTEMODE_PROHIBITED = 2 /**< Compute-prohibited mode (No contexts can be created on this device at this time) */ 298 #if __CUDA_API_VERSION >= 4000 299 , CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3 /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */ 300 #endif 301 } CUcomputemode; 302 303 /** 304 * Online compiler options 305 */ 306 typedef enum CUjit_option_enum 307 { 308 /** 309 * Max number of registers that a thread may use.\n 310 * Option type: unsigned int 311 */ 312 CU_JIT_MAX_REGISTERS = 0, 313 314 /** 315 * IN: Specifies minimum number of threads per block to target compilation 316 * for\n 317 * OUT: Returns the number of threads the compiler actually targeted. 318 * This restricts the resource utilization fo the compiler (e.g. max 319 * registers) such that a block with the given number of threads should be 320 * able to launch based on register limitations. Note, this option does not 321 * currently take into account any other resource limitations, such as 322 * shared memory utilization.\n 323 * Option type: unsigned int 324 */ 325 CU_JIT_THREADS_PER_BLOCK, 326 327 /** 328 * Returns a float value in the option of the wall clock time, in 329 * milliseconds, spent creating the cubin\n 330 * Option type: float 331 */ 332 CU_JIT_WALL_TIME, 333 334 /** 335 * Pointer to a buffer in which to print any log messsages from PTXAS 336 * that are informational in nature (the buffer size is specified via 337 * option ::CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES) \n 338 * Option type: char* 339 */ 340 CU_JIT_INFO_LOG_BUFFER, 341 342 /** 343 * IN: Log buffer size in bytes. Log messages will be capped at this size 344 * (including null terminator)\n 345 * OUT: Amount of log buffer filled with messages\n 346 * Option type: unsigned int 347 */ 348 CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES, 349 350 /** 351 * Pointer to a buffer in which to print any log messages from PTXAS that 352 * reflect errors (the buffer size is specified via option 353 * ::CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES)\n 354 * Option type: char* 355 */ 356 CU_JIT_ERROR_LOG_BUFFER, 357 358 /** 359 * IN: Log buffer size in bytes. Log messages will be capped at this size 360 * (including null terminator)\n 361 * OUT: Amount of log buffer filled with messages\n 362 * Option type: unsigned int 363 */ 364 CU_JIT_ERROR_LOG_BUFFER_SIZE_BYTES, 365 366 /** 367 * Level of optimizations to apply to generated code (0 - 4), with 4 368 * being the default and highest level of optimizations.\n 369 * Option type: unsigned int 370 */ 371 CU_JIT_OPTIMIZATION_LEVEL, 372 373 /** 374 * No option value required. Determines the target based on the current 375 * attached context (default)\n 376 * Option type: No option value needed 377 */ 378 CU_JIT_TARGET_FROM_CUCONTEXT, 379 380 /** 381 * Target is chosen based on supplied ::CUjit_target_enum.\n 382 * Option type: unsigned int for enumerated type ::CUjit_target_enum 383 */ 384 CU_JIT_TARGET, 385 386 /** 387 * Specifies choice of fallback strategy if matching cubin is not found. 388 * Choice is based on supplied ::CUjit_fallback_enum.\n 389 * Option type: unsigned int for enumerated type ::CUjit_fallback_enum 390 */ 391 CU_JIT_FALLBACK_STRATEGY 392 393 } CUjit_option; 394 395 /** 396 * Online compilation targets 397 */ 398 typedef enum CUjit_target_enum 399 { 400 CU_TARGET_COMPUTE_10 = 0, /**< Compute device class 1.0 */ 401 CU_TARGET_COMPUTE_11, /**< Compute device class 1.1 */ 402 CU_TARGET_COMPUTE_12, /**< Compute device class 1.2 */ 403 CU_TARGET_COMPUTE_13, /**< Compute device class 1.3 */ 404 CU_TARGET_COMPUTE_20, /**< Compute device class 2.0 */ 405 CU_TARGET_COMPUTE_21 /**< Compute device class 2.1 */ 406 } CUjit_target; 407 408 /** 409 * Cubin matching fallback strategies 410 */ 411 typedef enum CUjit_fallback_enum 412 { 413 CU_PREFER_PTX = 0, /**< Prefer to compile ptx */ 414 415 CU_PREFER_BINARY /**< Prefer to fall back to compatible binary code */ 416 417 } CUjit_fallback; 418 419 /** 420 * Flags to register a graphics resource 421 */ 422 typedef enum CUgraphicsRegisterFlags_enum 423 { 424 CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00, 425 CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01, 426 CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02, 427 CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST = 0x04 428 } CUgraphicsRegisterFlags; 429 430 /** 431 * Flags for mapping and unmapping interop resources 432 */ 433 typedef enum CUgraphicsMapResourceFlags_enum 434 { 435 CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00, 436 CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01, 437 CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02 438 } CUgraphicsMapResourceFlags; 439 440 /** 441 * Array indices for cube faces 442 */ 443 typedef enum CUarray_cubemap_face_enum 444 { 445 CU_CUBEMAP_FACE_POSITIVE_X = 0x00, /**< Positive X face of cubemap */ 446 CU_CUBEMAP_FACE_NEGATIVE_X = 0x01, /**< Negative X face of cubemap */ 447 CU_CUBEMAP_FACE_POSITIVE_Y = 0x02, /**< Positive Y face of cubemap */ 448 CU_CUBEMAP_FACE_NEGATIVE_Y = 0x03, /**< Negative Y face of cubemap */ 449 CU_CUBEMAP_FACE_POSITIVE_Z = 0x04, /**< Positive Z face of cubemap */ 450 CU_CUBEMAP_FACE_NEGATIVE_Z = 0x05 /**< Negative Z face of cubemap */ 451 } CUarray_cubemap_face; 452 453 /** 454 * Limits 455 */ 456 typedef enum CUlimit_enum 457 { 458 CU_LIMIT_STACK_SIZE = 0x00, /**< GPU thread stack size */ 459 CU_LIMIT_PRINTF_FIFO_SIZE = 0x01, /**< GPU printf FIFO size */ 460 CU_LIMIT_MALLOC_HEAP_SIZE = 0x02 /**< GPU malloc heap size */ 461 } CUlimit; 462 463 /** 464 * Error codes 465 */ 466 typedef enum cudaError_enum 467 { 468 /** 469 * The API call returned with no errors. In the case of query calls, this 470 * can also mean that the operation being queried is complete (see 471 * ::cuEventQuery() and ::cuStreamQuery()). 472 */ 473 CUDA_SUCCESS = 0, 474 475 /** 476 * This indicates that one or more of the parameters passed to the API call 477 * is not within an acceptable range of values. 478 */ 479 CUDA_ERROR_INVALID_VALUE = 1, 480 481 /** 482 * The API call failed because it was unable to allocate enough memory to 483 * perform the requested operation. 484 */ 485 CUDA_ERROR_OUT_OF_MEMORY = 2, 486 487 /** 488 * This indicates that the CUDA driver has not been initialized with 489 * ::cuInit() or that initialization has failed. 490 */ 491 CUDA_ERROR_NOT_INITIALIZED = 3, 492 493 /** 494 * This indicates that the CUDA driver is in the process of shutting down. 495 */ 496 CUDA_ERROR_DEINITIALIZED = 4, 497 498 /** 499 * This indicates profiling APIs are called while application is running 500 * in visual profiler mode. 501 */ 502 CUDA_ERROR_PROFILER_DISABLED = 5, 503 /** 504 * This indicates profiling has not been initialized for this context. 505 * Call cuProfilerInitialize() to resolve this. 506 */ 507 CUDA_ERROR_PROFILER_NOT_INITIALIZED = 6, 508 /** 509 * This indicates profiler has already been started and probably 510 * cuProfilerStart() is incorrectly called. 511 */ 512 CUDA_ERROR_PROFILER_ALREADY_STARTED = 7, 513 /** 514 * This indicates profiler has already been stopped and probably 515 * cuProfilerStop() is incorrectly called. 516 */ 517 CUDA_ERROR_PROFILER_ALREADY_STOPPED = 8, 518 /** 519 * This indicates that no CUDA-capable devices were detected by the installed 520 * CUDA driver. 521 */ 522 CUDA_ERROR_NO_DEVICE = 100, 523 524 /** 525 * This indicates that the device ordinal supplied by the user does not 526 * correspond to a valid CUDA device. 527 */ 528 CUDA_ERROR_INVALID_DEVICE = 101, 529 530 531 /** 532 * This indicates that the device kernel image is invalid. This can also 533 * indicate an invalid CUDA module. 534 */ 535 CUDA_ERROR_INVALID_IMAGE = 200, 536 537 /** 538 * This most frequently indicates that there is no context bound to the 539 * current thread. This can also be returned if the context passed to an 540 * API call is not a valid handle (such as a context that has had 541 * ::cuCtxDestroy() invoked on it). This can also be returned if a user 542 * mixes different API versions (i.e. 3010 context with 3020 API calls). 543 * See ::cuCtxGetApiVersion() for more details. 544 */ 545 CUDA_ERROR_INVALID_CONTEXT = 201, 546 547 /** 548 * This indicated that the context being supplied as a parameter to the 549 * API call was already the active context. 550 * \deprecated 551 * This error return is deprecated as of CUDA 3.2. It is no longer an 552 * error to attempt to push the active context via ::cuCtxPushCurrent(). 553 */ 554 CUDA_ERROR_CONTEXT_ALREADY_CURRENT = 202, 555 556 /** 557 * This indicates that a map or register operation has failed. 558 */ 559 CUDA_ERROR_MAP_FAILED = 205, 560 561 /** 562 * This indicates that an unmap or unregister operation has failed. 563 */ 564 CUDA_ERROR_UNMAP_FAILED = 206, 565 566 /** 567 * This indicates that the specified array is currently mapped and thus 568 * cannot be destroyed. 569 */ 570 CUDA_ERROR_ARRAY_IS_MAPPED = 207, 571 572 /** 573 * This indicates that the resource is already mapped. 574 */ 575 CUDA_ERROR_ALREADY_MAPPED = 208, 576 577 /** 578 * This indicates that there is no kernel image available that is suitable 579 * for the device. This can occur when a user specifies code generation 580 * options for a particular CUDA source file that do not include the 581 * corresponding device configuration. 582 */ 583 CUDA_ERROR_NO_BINARY_FOR_GPU = 209, 584 585 /** 586 * This indicates that a resource has already been acquired. 587 */ 588 CUDA_ERROR_ALREADY_ACQUIRED = 210, 589 590 /** 591 * This indicates that a resource is not mapped. 592 */ 593 CUDA_ERROR_NOT_MAPPED = 211, 594 595 /** 596 * This indicates that a mapped resource is not available for access as an 597 * array. 598 */ 599 CUDA_ERROR_NOT_MAPPED_AS_ARRAY = 212, 600 601 /** 602 * This indicates that a mapped resource is not available for access as a 603 * pointer. 604 */ 605 CUDA_ERROR_NOT_MAPPED_AS_POINTER = 213, 606 607 /** 608 * This indicates that an uncorrectable ECC error was detected during 609 * execution. 610 */ 611 CUDA_ERROR_ECC_UNCORRECTABLE = 214, 612 613 /** 614 * This indicates that the ::CUlimit passed to the API call is not 615 * supported by the active device. 616 */ 617 CUDA_ERROR_UNSUPPORTED_LIMIT = 215, 618 619 /** 620 * This indicates that the ::CUcontext passed to the API call can 621 * only be bound to a single CPU thread at a time but is already 622 * bound to a CPU thread. 623 */ 624 CUDA_ERROR_CONTEXT_ALREADY_IN_USE = 216, 625 626 /** 627 * This indicates that the device kernel source is invalid. 628 */ 629 CUDA_ERROR_INVALID_SOURCE = 300, 630 631 /** 632 * This indicates that the file specified was not found. 633 */ 634 CUDA_ERROR_FILE_NOT_FOUND = 301, 635 636 /** 637 * This indicates that a link to a shared object failed to resolve. 638 */ 639 CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302, 640 641 /** 642 * This indicates that initialization of a shared object failed. 643 */ 644 CUDA_ERROR_SHARED_OBJECT_INIT_FAILED = 303, 645 646 /** 647 * This indicates that an OS call failed. 648 */ 649 CUDA_ERROR_OPERATING_SYSTEM = 304, 650 651 652 /** 653 * This indicates that a resource handle passed to the API call was not 654 * valid. Resource handles are opaque types like ::CUstream and ::CUevent. 655 */ 656 CUDA_ERROR_INVALID_HANDLE = 400, 657 658 659 /** 660 * This indicates that a named symbol was not found. Examples of symbols 661 * are global/constant variable names, texture names, and surface names. 662 */ 663 CUDA_ERROR_NOT_FOUND = 500, 664 665 666 /** 667 * This indicates that asynchronous operations issued previously have not 668 * completed yet. This result is not actually an error, but must be indicated 669 * differently than ::CUDA_SUCCESS (which indicates completion). Calls that 670 * may return this value include ::cuEventQuery() and ::cuStreamQuery(). 671 */ 672 CUDA_ERROR_NOT_READY = 600, 673 674 675 /** 676 * An exception occurred on the device while executing a kernel. Common 677 * causes include dereferencing an invalid device pointer and accessing 678 * out of bounds shared memory. The context cannot be used, so it must 679 * be destroyed (and a new one should be created). All existing device 680 * memory allocations from this context are invalid and must be 681 * reconstructed if the program is to continue using CUDA. 682 */ 683 CUDA_ERROR_LAUNCH_FAILED = 700, 684 685 /** 686 * This indicates that a launch did not occur because it did not have 687 * appropriate resources. This error usually indicates that the user has 688 * attempted to pass too many arguments to the device kernel, or the 689 * kernel launch specifies too many threads for the kernel's register 690 * count. Passing arguments of the wrong size (i.e. a 64-bit pointer 691 * when a 32-bit int is expected) is equivalent to passing too many 692 * arguments and can also result in this error. 693 */ 694 CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES = 701, 695 696 /** 697 * This indicates that the device kernel took too long to execute. This can 698 * only occur if timeouts are enabled - see the device attribute 699 * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The 700 * context cannot be used (and must be destroyed similar to 701 * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from 702 * this context are invalid and must be reconstructed if the program is to 703 * continue using CUDA. 704 */ 705 CUDA_ERROR_LAUNCH_TIMEOUT = 702, 706 707 /** 708 * This error indicates a kernel launch that uses an incompatible texturing 709 * mode. 710 */ 711 CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING = 703, 712 713 /** 714 * This error indicates that a call to ::cuCtxEnablePeerAccess() is 715 * trying to re-enable peer access to a context which has already 716 * had peer access to it enabled. 717 */ 718 CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED = 704, 719 720 /** 721 * This error indicates that a call to ::cuMemPeerRegister is trying to 722 * register memory from a context which has not had peer access 723 * enabled yet via ::cuCtxEnablePeerAccess(), or that 724 * ::cuCtxDisablePeerAccess() is trying to disable peer access 725 * which has not been enabled yet. 726 */ 727 CUDA_ERROR_PEER_ACCESS_NOT_ENABLED = 705, 728 729 /** 730 * This error indicates that a call to ::cuMemPeerRegister is trying to 731 * register already-registered memory. 732 */ 733 CUDA_ERROR_PEER_MEMORY_ALREADY_REGISTERED = 706, 734 735 /** 736 * This error indicates that a call to ::cuMemPeerUnregister is trying to 737 * unregister memory that has not been registered. 738 */ 739 CUDA_ERROR_PEER_MEMORY_NOT_REGISTERED = 707, 740 741 /** 742 * This error indicates that ::cuCtxCreate was called with the flag 743 * ::CU_CTX_PRIMARY on a device which already has initialized its 744 * primary context. 745 */ 746 CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE = 708, 747 748 /** 749 * This error indicates that the context current to the calling thread 750 * has been destroyed using ::cuCtxDestroy, or is a primary context which 751 * has not yet been initialized. 752 */ 753 CUDA_ERROR_CONTEXT_IS_DESTROYED = 709, 754 755 /** 756 * This indicates that an unknown internal error has occurred. 757 */ 758 CUDA_ERROR_UNKNOWN = 999 759 } CUresult; 760 761 #if __CUDA_API_VERSION >= 4000 762 /** 763 * If set, host memory is portable between CUDA contexts. 764 * Flag for ::cuMemHostAlloc() 765 */ 766 #define CU_MEMHOSTALLOC_PORTABLE 0x01 767 768 /** 769 * If set, host memory is mapped into CUDA address space and 770 * ::cuMemHostGetDevicePointer() may be called on the host pointer. 771 * Flag for ::cuMemHostAlloc() 772 */ 773 #define CU_MEMHOSTALLOC_DEVICEMAP 0x02 774 775 /** 776 * If set, host memory is allocated as write-combined - fast to write, 777 * faster to DMA, slow to read except via SSE4 streaming load instruction 778 * (MOVNTDQA). 779 * Flag for ::cuMemHostAlloc() 780 */ 781 #define CU_MEMHOSTALLOC_WRITECOMBINED 0x04 782 783 /** 784 * If set, host memory is portable between CUDA contexts. 785 * Flag for ::cuMemHostRegister() 786 */ 787 #define CU_MEMHOSTREGISTER_PORTABLE 0x01 788 789 /** 790 * If set, host memory is mapped into CUDA address space and 791 * ::cuMemHostGetDevicePointer() may be called on the host pointer. 792 * Flag for ::cuMemHostRegister() 793 */ 794 #define CU_MEMHOSTREGISTER_DEVICEMAP 0x02 795 796 /** 797 * If set, peer memory is mapped into CUDA address space and 798 * ::cuMemPeerGetDevicePointer() may be called on the host pointer. 799 * Flag for ::cuMemPeerRegister() 800 */ 801 #define CU_MEMPEERREGISTER_DEVICEMAP 0x02 802 #endif 803 804 #if __CUDA_API_VERSION >= 3020 805 /** 806 * 2D memory copy parameters 807 */ 808 typedef struct CUDA_MEMCPY2D_st 809 { 810 size_t srcXInBytes; /**< Source X in bytes */ 811 size_t srcY; /**< Source Y */ 812 813 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 814 const void *srcHost; /**< Source host pointer */ 815 CUdeviceptr srcDevice; /**< Source device pointer */ 816 CUarray srcArray; /**< Source array reference */ 817 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 818 819 size_t dstXInBytes; /**< Destination X in bytes */ 820 size_t dstY; /**< Destination Y */ 821 822 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 823 void *dstHost; /**< Destination host pointer */ 824 CUdeviceptr dstDevice; /**< Destination device pointer */ 825 CUarray dstArray; /**< Destination array reference */ 826 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 827 828 size_t WidthInBytes; /**< Width of 2D memory copy in bytes */ 829 size_t Height; /**< Height of 2D memory copy */ 830 } CUDA_MEMCPY2D; 831 832 /** 833 * 3D memory copy parameters 834 */ 835 typedef struct CUDA_MEMCPY3D_st 836 { 837 size_t srcXInBytes; /**< Source X in bytes */ 838 size_t srcY; /**< Source Y */ 839 size_t srcZ; /**< Source Z */ 840 size_t srcLOD; /**< Source LOD */ 841 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 842 const void *srcHost; /**< Source host pointer */ 843 CUdeviceptr srcDevice; /**< Source device pointer */ 844 CUarray srcArray; /**< Source array reference */ 845 void *reserved0; /**< Must be NULL */ 846 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 847 size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 848 849 size_t dstXInBytes; /**< Destination X in bytes */ 850 size_t dstY; /**< Destination Y */ 851 size_t dstZ; /**< Destination Z */ 852 size_t dstLOD; /**< Destination LOD */ 853 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 854 void *dstHost; /**< Destination host pointer */ 855 CUdeviceptr dstDevice; /**< Destination device pointer */ 856 CUarray dstArray; /**< Destination array reference */ 857 void *reserved1; /**< Must be NULL */ 858 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 859 size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 860 861 size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ 862 size_t Height; /**< Height of 3D memory copy */ 863 size_t Depth; /**< Depth of 3D memory copy */ 864 } CUDA_MEMCPY3D; 865 866 /** 867 * 3D memory cross-context copy parameters 868 */ 869 typedef struct CUDA_MEMCPY3D_PEER_st 870 { 871 size_t srcXInBytes; /**< Source X in bytes */ 872 size_t srcY; /**< Source Y */ 873 size_t srcZ; /**< Source Z */ 874 size_t srcLOD; /**< Source LOD */ 875 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 876 const void *srcHost; /**< Source host pointer */ 877 CUdeviceptr srcDevice; /**< Source device pointer */ 878 CUarray srcArray; /**< Source array reference */ 879 CUcontext srcContext; /**< Source context (ignored with srcMemoryType is ::CU_MEMORYTYPE_ARRAY) */ 880 size_t srcPitch; /**< Source pitch (ignored when src is array) */ 881 size_t srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 882 883 size_t dstXInBytes; /**< Destination X in bytes */ 884 size_t dstY; /**< Destination Y */ 885 size_t dstZ; /**< Destination Z */ 886 size_t dstLOD; /**< Destination LOD */ 887 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 888 void *dstHost; /**< Destination host pointer */ 889 CUdeviceptr dstDevice; /**< Destination device pointer */ 890 CUarray dstArray; /**< Destination array reference */ 891 CUcontext dstContext; /**< Destination context (ignored with dstMemoryType is ::CU_MEMORYTYPE_ARRAY) */ 892 size_t dstPitch; /**< Destination pitch (ignored when dst is array) */ 893 size_t dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 894 895 size_t WidthInBytes; /**< Width of 3D memory copy in bytes */ 896 size_t Height; /**< Height of 3D memory copy */ 897 size_t Depth; /**< Depth of 3D memory copy */ 898 } CUDA_MEMCPY3D_PEER; 899 900 /** 901 * Array descriptor 902 */ 903 typedef struct CUDA_ARRAY_DESCRIPTOR_st 904 { 905 size_t Width; /**< Width of array */ 906 size_t Height; /**< Height of array */ 907 908 CUarray_format Format; /**< Array format */ 909 unsigned int NumChannels; /**< Channels per array element */ 910 } CUDA_ARRAY_DESCRIPTOR; 911 912 /** 913 * 3D array descriptor 914 */ 915 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st 916 { 917 size_t Width; /**< Width of 3D array */ 918 size_t Height; /**< Height of 3D array */ 919 size_t Depth; /**< Depth of 3D array */ 920 921 CUarray_format Format; /**< Array format */ 922 unsigned int NumChannels; /**< Channels per array element */ 923 unsigned int Flags; /**< Flags */ 924 } CUDA_ARRAY3D_DESCRIPTOR; 925 926 #endif /* __CUDA_API_VERSION >= 3020 */ 927 928 /** 929 * If set, the CUDA array is a collection of layers, where each layer is either a 1D 930 * or a 2D array and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies the number 931 * of layers, not the depth of a 3D array. 932 */ 933 #define CUDA_ARRAY3D_LAYERED 0x01 934 935 /** 936 * Deprecated, use CUDA_ARRAY3D_LAYERED 937 */ 938 #define CUDA_ARRAY3D_2DARRAY 0x01 939 940 /** 941 * This flag must be set in order to bind a surface reference 942 * to the CUDA array 943 */ 944 #define CUDA_ARRAY3D_SURFACE_LDST 0x02 945 946 /** 947 * Override the texref format with a format inferred from the array. 948 * Flag for ::cuTexRefSetArray() 949 */ 950 #define CU_TRSA_OVERRIDE_FORMAT 0x01 951 952 /** 953 * Read the texture as integers rather than promoting the values to floats 954 * in the range [0,1]. 955 * Flag for ::cuTexRefSetFlags() 956 */ 957 #define CU_TRSF_READ_AS_INTEGER 0x01 958 959 /** 960 * Use normalized texture coordinates in the range [0,1) instead of [0,dim). 961 * Flag for ::cuTexRefSetFlags() 962 */ 963 #define CU_TRSF_NORMALIZED_COORDINATES 0x02 964 965 /** 966 * Perform sRGB->linear conversion during texture read. 967 * Flag for ::cuTexRefSetFlags() 968 */ 969 #define CU_TRSF_SRGB 0x10 970 971 /** 972 * End of array terminator for the \p extra parameter to 973 * ::cuLaunchKernel 974 */ 975 #define CU_LAUNCH_PARAM_END ((void*)0x00) 976 977 /** 978 * Indicator that the next value in the \p extra parameter to 979 * ::cuLaunchKernel will be a pointer to a buffer containing all kernel 980 * parameters used for launching kernel \p f. This buffer needs to 981 * honor all alignment/padding requirements of the individual parameters. 982 * If ::CU_LAUNCH_PARAM_BUFFER_SIZE is not also specified in the 983 * \p extra array, then ::CU_LAUNCH_PARAM_BUFFER_POINTER will have no 984 * effect. 985 */ 986 #define CU_LAUNCH_PARAM_BUFFER_POINTER ((void*)0x01) 987 988 /** 989 * Indicator that the next value in the \p extra parameter to 990 * ::cuLaunchKernel will be a pointer to a size_t which contains the 991 * size of the buffer specified with ::CU_LAUNCH_PARAM_BUFFER_POINTER. 992 * It is required that ::CU_LAUNCH_PARAM_BUFFER_POINTER also be specified 993 * in the \p extra array if the value associated with 994 * ::CU_LAUNCH_PARAM_BUFFER_SIZE is not zero. 995 */ 996 #define CU_LAUNCH_PARAM_BUFFER_SIZE ((void*)0x02) 997 998 /** 999 * For texture references loaded into the module, use default texunit from 1000 * texture reference. 1001 */ 1002 #define CU_PARAM_TR_DEFAULT -1 1003 1004 /** 1005 * CUDA API made obselete at API version 3020 1006 */ 1007 #if defined(__CUDA_API_VERSION_INTERNAL) 1008 #define CUdeviceptr CUdeviceptr_v1 1009 #define CUDA_MEMCPY2D_st CUDA_MEMCPY2D_v1_st 1010 #define CUDA_MEMCPY2D CUDA_MEMCPY2D_v1 1011 #define CUDA_MEMCPY3D_st CUDA_MEMCPY3D_v1_st 1012 #define CUDA_MEMCPY3D CUDA_MEMCPY3D_v1 1013 #define CUDA_ARRAY_DESCRIPTOR_st CUDA_ARRAY_DESCRIPTOR_v1_st 1014 #define CUDA_ARRAY_DESCRIPTOR CUDA_ARRAY_DESCRIPTOR_v1 1015 #define CUDA_ARRAY3D_DESCRIPTOR_st CUDA_ARRAY3D_DESCRIPTOR_v1_st 1016 #define CUDA_ARRAY3D_DESCRIPTOR CUDA_ARRAY3D_DESCRIPTOR_v1 1017 #endif /* CUDA_FORCE_LEGACY32_INTERNAL */ 1018 1019 #if defined(__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 1020 typedef unsigned int CUdeviceptr; 1021 1022 typedef struct CUDA_MEMCPY2D_st 1023 { 1024 unsigned int srcXInBytes; /**< Source X in bytes */ 1025 unsigned int srcY; /**< Source Y */ 1026 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 1027 const void *srcHost; /**< Source host pointer */ 1028 CUdeviceptr srcDevice; /**< Source device pointer */ 1029 CUarray srcArray; /**< Source array reference */ 1030 unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ 1031 1032 unsigned int dstXInBytes; /**< Destination X in bytes */ 1033 unsigned int dstY; /**< Destination Y */ 1034 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 1035 void *dstHost; /**< Destination host pointer */ 1036 CUdeviceptr dstDevice; /**< Destination device pointer */ 1037 CUarray dstArray; /**< Destination array reference */ 1038 unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ 1039 1040 unsigned int WidthInBytes; /**< Width of 2D memory copy in bytes */ 1041 unsigned int Height; /**< Height of 2D memory copy */ 1042 } CUDA_MEMCPY2D; 1043 1044 typedef struct CUDA_MEMCPY3D_st 1045 { 1046 unsigned int srcXInBytes; /**< Source X in bytes */ 1047 unsigned int srcY; /**< Source Y */ 1048 unsigned int srcZ; /**< Source Z */ 1049 unsigned int srcLOD; /**< Source LOD */ 1050 CUmemorytype srcMemoryType; /**< Source memory type (host, device, array) */ 1051 const void *srcHost; /**< Source host pointer */ 1052 CUdeviceptr srcDevice; /**< Source device pointer */ 1053 CUarray srcArray; /**< Source array reference */ 1054 void *reserved0; /**< Must be NULL */ 1055 unsigned int srcPitch; /**< Source pitch (ignored when src is array) */ 1056 unsigned int srcHeight; /**< Source height (ignored when src is array; may be 0 if Depth==1) */ 1057 1058 unsigned int dstXInBytes; /**< Destination X in bytes */ 1059 unsigned int dstY; /**< Destination Y */ 1060 unsigned int dstZ; /**< Destination Z */ 1061 unsigned int dstLOD; /**< Destination LOD */ 1062 CUmemorytype dstMemoryType; /**< Destination memory type (host, device, array) */ 1063 void *dstHost; /**< Destination host pointer */ 1064 CUdeviceptr dstDevice; /**< Destination device pointer */ 1065 CUarray dstArray; /**< Destination array reference */ 1066 void *reserved1; /**< Must be NULL */ 1067 unsigned int dstPitch; /**< Destination pitch (ignored when dst is array) */ 1068 unsigned int dstHeight; /**< Destination height (ignored when dst is array; may be 0 if Depth==1) */ 1069 1070 unsigned int WidthInBytes; /**< Width of 3D memory copy in bytes */ 1071 unsigned int Height; /**< Height of 3D memory copy */ 1072 unsigned int Depth; /**< Depth of 3D memory copy */ 1073 } CUDA_MEMCPY3D; 1074 1075 typedef struct CUDA_ARRAY_DESCRIPTOR_st 1076 { 1077 unsigned int Width; /**< Width of array */ 1078 unsigned int Height; /**< Height of array */ 1079 1080 CUarray_format Format; /**< Array format */ 1081 unsigned int NumChannels; /**< Channels per array element */ 1082 } CUDA_ARRAY_DESCRIPTOR; 1083 1084 typedef struct CUDA_ARRAY3D_DESCRIPTOR_st 1085 { 1086 unsigned int Width; /**< Width of 3D array */ 1087 unsigned int Height; /**< Height of 3D array */ 1088 unsigned int Depth; /**< Depth of 3D array */ 1089 1090 CUarray_format Format; /**< Array format */ 1091 unsigned int NumChannels; /**< Channels per array element */ 1092 unsigned int Flags; /**< Flags */ 1093 } CUDA_ARRAY3D_DESCRIPTOR; 1094 1095 #endif /* (__CUDA_API_VERSION_INTERNAL) || __CUDA_API_VERSION < 3020 */ 1096 1097 /* 1098 * If set, the CUDA array contains an array of 2D slices 1099 * and the Depth member of CUDA_ARRAY3D_DESCRIPTOR specifies 1100 * the number of slices, not the depth of a 3D array. 1101 */ 1102 #define CUDA_ARRAY3D_2DARRAY 0x01 1103 1104 /** 1105 * This flag must be set in order to bind a surface reference 1106 * to the CUDA array 1107 */ 1108 #define CUDA_ARRAY3D_SURFACE_LDST 0x02 1109 1110 /** 1111 * Override the texref format with a format inferred from the array. 1112 * Flag for ::cuTexRefSetArray() 1113 */ 1114 #define CU_TRSA_OVERRIDE_FORMAT 0x01 1115 1116 /** 1117 * Read the texture as integers rather than promoting the values to floats 1118 * in the range [0,1]. 1119 * Flag for ::cuTexRefSetFlags() 1120 */ 1121 #define CU_TRSF_READ_AS_INTEGER 0x01 1122 1123 /** 1124 * Use normalized texture coordinates in the range [0,1) instead of [0,dim). 1125 * Flag for ::cuTexRefSetFlags() 1126 */ 1127 #define CU_TRSF_NORMALIZED_COORDINATES 0x02 1128 1129 /** 1130 * Perform sRGB->linear conversion during texture read. 1131 * Flag for ::cuTexRefSetFlags() 1132 */ 1133 #define CU_TRSF_SRGB 0x10 1134 1135 /** 1136 * For texture references loaded into the module, use default texunit from 1137 * texture reference. 1138 */ 1139 #define CU_PARAM_TR_DEFAULT -1 1140 1141 /** @} */ /* END CUDA_TYPES */ 1142 1143 #if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64) 1144 #define CUDAAPI __stdcall 1145 #else 1146 #define CUDAAPI 1147 #endif 1148 1149 /** 1150 * \defgroup CUDA_INITIALIZE Initialization 1151 * 1152 * This section describes the initialization functions of the low-level CUDA 1153 * driver application programming interface. 1154 * 1155 * @{ 1156 */ 1157 1158 /********************************* 1159 ** Initialization 1160 *********************************/ 1161 typedef CUresult CUDAAPI tcuInit(unsigned int Flags); 1162 1163 /********************************* 1164 ** Driver Version Query 1165 *********************************/ 1166 typedef CUresult CUDAAPI tcuDriverGetVersion(int *driverVersion); 1167 1168 /************************************ 1169 ** 1170 ** Device management 1171 ** 1172 ***********************************/ 1173 1174 typedef CUresult CUDAAPI tcuDeviceGet(CUdevice *device, int ordinal); 1175 typedef CUresult CUDAAPI tcuDeviceGetCount(int *count); 1176 typedef CUresult CUDAAPI tcuDeviceGetName(char *name, int len, CUdevice dev); 1177 typedef CUresult CUDAAPI tcuDeviceComputeCapability(int *major, int *minor, CUdevice dev); 1178 #if __CUDA_API_VERSION >= 3020 1179 typedef CUresult CUDAAPI tcuDeviceTotalMem(size_t *bytes, CUdevice dev); 1180 #else 1181 typedef CUresult CUDAAPI tcuDeviceTotalMem(unsigned int *bytes, CUdevice dev); 1182 #endif 1183 1184 typedef CUresult CUDAAPI tcuDeviceGetProperties(CUdevprop *prop, CUdevice dev); 1185 typedef CUresult CUDAAPI tcuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev); 1186 1187 /************************************ 1188 ** 1189 ** Context management 1190 ** 1191 ***********************************/ 1192 typedef CUresult CUDAAPI tcuCtxCreate(CUcontext *pctx, unsigned int flags, CUdevice dev); 1193 typedef CUresult CUDAAPI tcuCtxDestroy(CUcontext ctx); 1194 typedef CUresult CUDAAPI tcuCtxAttach(CUcontext *pctx, unsigned int flags); 1195 typedef CUresult CUDAAPI tcuCtxDetach(CUcontext ctx); 1196 typedef CUresult CUDAAPI tcuCtxPushCurrent(CUcontext ctx); 1197 typedef CUresult CUDAAPI tcuCtxPopCurrent(CUcontext *pctx); 1198 1199 typedef CUresult CUDAAPI tcuCtxSetCurrent(CUcontext ctx); 1200 typedef CUresult CUDAAPI tcuCtxGetCurrent(CUcontext *pctx); 1201 1202 typedef CUresult CUDAAPI tcuCtxGetDevice(CUdevice *device); 1203 typedef CUresult CUDAAPI tcuCtxSynchronize(void); 1204 1205 1206 /************************************ 1207 ** 1208 ** Module management 1209 ** 1210 ***********************************/ 1211 typedef CUresult CUDAAPI tcuModuleLoad(CUmodule *module, const char *fname); 1212 typedef CUresult CUDAAPI tcuModuleLoadData(CUmodule *module, const void *image); 1213 typedef CUresult CUDAAPI tcuModuleLoadDataEx(CUmodule *module, const void *image, unsigned int numOptions, CUjit_option *options, void **optionValues); 1214 typedef CUresult CUDAAPI tcuModuleLoadFatBinary(CUmodule *module, const void *fatCubin); 1215 typedef CUresult CUDAAPI tcuModuleUnload(CUmodule hmod); 1216 typedef CUresult CUDAAPI tcuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, const char *name); 1217 1218 #if __CUDA_API_VERSION >= 3020 1219 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, CUmodule hmod, const char *name); 1220 #else 1221 typedef CUresult CUDAAPI tcuModuleGetGlobal(CUdeviceptr *dptr, unsigned int *bytes, CUmodule hmod, const char *name); 1222 #endif 1223 1224 typedef CUresult CUDAAPI tcuModuleGetTexRef(CUtexref *pTexRef, CUmodule hmod, const char *name); 1225 typedef CUresult CUDAAPI tcuModuleGetSurfRef(CUsurfref *pSurfRef, CUmodule hmod, const char *name); 1226 1227 /************************************ 1228 ** 1229 ** Memory management 1230 ** 1231 ***********************************/ 1232 #if __CUDA_API_VERSION >= 3020 1233 typedef CUresult CUDAAPI tcuMemGetInfo(size_t *free, size_t *total); 1234 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, size_t bytesize); 1235 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, CUdeviceptr dptr); 1236 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, 1237 size_t *pPitch, 1238 size_t WidthInBytes, 1239 size_t Height, 1240 // size of biggest r/w to be performed by kernels on this memory 1241 // 4, 8 or 16 bytes 1242 unsigned int ElementSizeBytes 1243 ); 1244 #else 1245 typedef CUresult CUDAAPI tcuMemGetInfo(unsigned int *free, unsigned int *total); 1246 typedef CUresult CUDAAPI tcuMemAlloc(CUdeviceptr *dptr, unsigned int bytesize); 1247 typedef CUresult CUDAAPI tcuMemGetAddressRange(CUdeviceptr *pbase, unsigned int *psize, CUdeviceptr dptr); 1248 typedef CUresult CUDAAPI tcuMemAllocPitch(CUdeviceptr *dptr, 1249 unsigned int *pPitch, 1250 unsigned int WidthInBytes, 1251 unsigned int Height, 1252 // size of biggest r/w to be performed by kernels on this memory 1253 // 4, 8 or 16 bytes 1254 unsigned int ElementSizeBytes 1255 ); 1256 #endif 1257 1258 typedef CUresult CUDAAPI tcuMemFree(CUdeviceptr dptr); 1259 1260 #if __CUDA_API_VERSION >= 3020 1261 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, size_t bytesize); 1262 #else 1263 typedef CUresult CUDAAPI tcuMemAllocHost(void **pp, unsigned int bytesize); 1264 #endif 1265 1266 typedef CUresult CUDAAPI tcuMemFreeHost(void *p); 1267 typedef CUresult CUDAAPI tcuMemHostAlloc(void **pp, size_t bytesize, unsigned int Flags); 1268 1269 typedef CUresult CUDAAPI tcuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, unsigned int Flags); 1270 typedef CUresult CUDAAPI tcuMemHostGetFlags(unsigned int *pFlags, void *p); 1271 1272 typedef CUresult CUDAAPI tcuMemHostRegister(void *p, size_t bytesize, unsigned int Flags); 1273 typedef CUresult CUDAAPI tcuMemHostUnregister(void *p);; 1274 typedef CUresult CUDAAPI tcuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount); 1275 typedef CUresult CUDAAPI tcuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, CUdeviceptr srcDevice, CUcontext srcContext, size_t ByteCount); 1276 1277 /************************************ 1278 ** 1279 ** Synchronous Memcpy 1280 ** 1281 ** Intra-device memcpy's done with these functions may execute in parallel with the CPU, 1282 ** but if host memory is involved, they wait until the copy is done before returning. 1283 ** 1284 ***********************************/ 1285 // 1D functions 1286 #if __CUDA_API_VERSION >= 3020 1287 // system <-> device memory 1288 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, size_t ByteCount); 1289 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, size_t ByteCount); 1290 1291 // device <-> device memory 1292 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, size_t ByteCount); 1293 1294 // device <-> array memory 1295 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, size_t dstOffset, CUdeviceptr srcDevice, size_t ByteCount); 1296 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1297 1298 // system <-> array memory 1299 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, size_t dstOffset, const void *srcHost, size_t ByteCount); 1300 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1301 1302 // array <-> array memory 1303 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, size_t dstOffset, CUarray srcArray, size_t srcOffset, size_t ByteCount); 1304 #else 1305 // system <-> device memory 1306 typedef CUresult CUDAAPI tcuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, unsigned int ByteCount); 1307 typedef CUresult CUDAAPI tcuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, unsigned int ByteCount); 1308 1309 // device <-> device memory 1310 typedef CUresult CUDAAPI tcuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, unsigned int ByteCount); 1311 1312 // device <-> array memory 1313 typedef CUresult CUDAAPI tcuMemcpyDtoA(CUarray dstArray, unsigned int dstOffset, CUdeviceptr srcDevice, unsigned int ByteCount); 1314 typedef CUresult CUDAAPI tcuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1315 1316 // system <-> array memory 1317 typedef CUresult CUDAAPI tcuMemcpyHtoA(CUarray dstArray, unsigned int dstOffset, const void *srcHost, unsigned int ByteCount); 1318 typedef CUresult CUDAAPI tcuMemcpyAtoH(void *dstHost, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1319 1320 // array <-> array memory 1321 typedef CUresult CUDAAPI tcuMemcpyAtoA(CUarray dstArray, unsigned int dstOffset, CUarray srcArray, unsigned int srcOffset, unsigned int ByteCount); 1322 #endif 1323 1324 // 2D memcpy 1325 typedef CUresult CUDAAPI tcuMemcpy2D(const CUDA_MEMCPY2D *pCopy); 1326 typedef CUresult CUDAAPI tcuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy); 1327 1328 // 3D memcpy 1329 typedef CUresult CUDAAPI tcuMemcpy3D(const CUDA_MEMCPY3D *pCopy); 1330 1331 /************************************ 1332 ** 1333 ** Asynchronous Memcpy 1334 ** 1335 ** Any host memory involved must be DMA'able (e.g., allocated with cuMemAllocHost). 1336 ** memcpy's done with these functions execute in parallel with the CPU and, if 1337 ** the hardware is available, may execute in parallel with the GPU. 1338 ** Asynchronous memcpy must be accompanied by appropriate stream synchronization. 1339 ** 1340 ***********************************/ 1341 1342 // 1D functions 1343 #if __CUDA_API_VERSION >= 3020 1344 // system <-> device memory 1345 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, 1346 const void *srcHost, size_t ByteCount, CUstream hStream); 1347 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, 1348 CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); 1349 1350 // device <-> device memory 1351 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, 1352 CUdeviceptr srcDevice, size_t ByteCount, CUstream hStream); 1353 1354 // system <-> array memory 1355 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, 1356 const void *srcHost, size_t ByteCount, CUstream hStream); 1357 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, size_t srcOffset, 1358 size_t ByteCount, CUstream hStream); 1359 #else 1360 // system <-> device memory 1361 typedef CUresult CUDAAPI tcuMemcpyHtoDAsync(CUdeviceptr dstDevice, 1362 const void *srcHost, unsigned int ByteCount, CUstream hStream); 1363 typedef CUresult CUDAAPI tcuMemcpyDtoHAsync(void *dstHost, 1364 CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); 1365 1366 // device <-> device memory 1367 typedef CUresult CUDAAPI tcuMemcpyDtoDAsync(CUdeviceptr dstDevice, 1368 CUdeviceptr srcDevice, unsigned int ByteCount, CUstream hStream); 1369 1370 // system <-> array memory 1371 typedef CUresult CUDAAPI tcuMemcpyHtoAAsync(CUarray dstArray, unsigned int dstOffset, 1372 const void *srcHost, unsigned int ByteCount, CUstream hStream); 1373 typedef CUresult CUDAAPI tcuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, unsigned int srcOffset, 1374 unsigned int ByteCount, CUstream hStream); 1375 #endif 1376 1377 // 2D memcpy 1378 typedef CUresult CUDAAPI tcuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream); 1379 1380 // 3D memcpy 1381 typedef CUresult CUDAAPI tcuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream); 1382 1383 /************************************ 1384 ** 1385 ** Memset 1386 ** 1387 ***********************************/ 1388 #if __CUDA_API_VERSION >= 3020 1389 typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N); 1390 typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, size_t N); 1391 typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N); 1392 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, size_t Width, size_t Height); 1393 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, size_t Width, size_t Height); 1394 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, size_t Width, size_t Height); 1395 #else 1396 typedef CUresult CUDAAPI tcuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, unsigned int N); 1397 typedef CUresult CUDAAPI tcuMemsetD16(CUdeviceptr dstDevice, unsigned short us, unsigned int N); 1398 typedef CUresult CUDAAPI tcuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, unsigned int N); 1399 typedef CUresult CUDAAPI tcuMemsetD2D8(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned char uc, unsigned int Width, unsigned int Height); 1400 typedef CUresult CUDAAPI tcuMemsetD2D16(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned short us, unsigned int Width, unsigned int Height); 1401 typedef CUresult CUDAAPI tcuMemsetD2D32(CUdeviceptr dstDevice, unsigned int dstPitch, unsigned int ui, unsigned int Width, unsigned int Height); 1402 #endif 1403 1404 /************************************ 1405 ** 1406 ** Function management 1407 ** 1408 ***********************************/ 1409 1410 1411 typedef CUresult CUDAAPI tcuFuncSetBlockShape(CUfunction hfunc, int x, int y, int z); 1412 typedef CUresult CUDAAPI tcuFuncSetSharedSize(CUfunction hfunc, unsigned int bytes); 1413 typedef CUresult CUDAAPI tcuFuncGetAttribute(int *pi, CUfunction_attribute attrib, CUfunction hfunc); 1414 typedef CUresult CUDAAPI tcuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config); 1415 typedef CUresult CUDAAPI tcuLaunchKernel(CUfunction f, 1416 unsigned int gridDimX, unsigned int gridDimY, unsigned int gridDimZ, 1417 unsigned int blockDimX, unsigned int blockDimY, unsigned int blockDimZ, 1418 unsigned int sharedMemBytes, 1419 CUstream hStream, void **kernelParams, void **extra); 1420 1421 /************************************ 1422 ** 1423 ** Array management 1424 ** 1425 ***********************************/ 1426 1427 typedef CUresult CUDAAPI tcuArrayCreate(CUarray *pHandle, const CUDA_ARRAY_DESCRIPTOR *pAllocateArray); 1428 typedef CUresult CUDAAPI tcuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, CUarray hArray); 1429 typedef CUresult CUDAAPI tcuArrayDestroy(CUarray hArray); 1430 1431 typedef CUresult CUDAAPI tcuArray3DCreate(CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray); 1432 typedef CUresult CUDAAPI tcuArray3DGetDescriptor(CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray); 1433 1434 1435 /************************************ 1436 ** 1437 ** Texture reference management 1438 ** 1439 ***********************************/ 1440 typedef CUresult CUDAAPI tcuTexRefCreate(CUtexref *pTexRef); 1441 typedef CUresult CUDAAPI tcuTexRefDestroy(CUtexref hTexRef); 1442 1443 typedef CUresult CUDAAPI tcuTexRefSetArray(CUtexref hTexRef, CUarray hArray, unsigned int Flags); 1444 1445 #if __CUDA_API_VERSION >= 3020 1446 typedef CUresult CUDAAPI tcuTexRefSetAddress(size_t *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, size_t bytes); 1447 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, size_t Pitch); 1448 #else 1449 typedef CUresult CUDAAPI tcuTexRefSetAddress(unsigned int *ByteOffset, CUtexref hTexRef, CUdeviceptr dptr, unsigned int bytes); 1450 typedef CUresult CUDAAPI tcuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, CUdeviceptr dptr, unsigned int Pitch); 1451 #endif 1452 1453 typedef CUresult CUDAAPI tcuTexRefSetFormat(CUtexref hTexRef, CUarray_format fmt, int NumPackedComponents); 1454 typedef CUresult CUDAAPI tcuTexRefSetAddressMode(CUtexref hTexRef, int dim, CUaddress_mode am); 1455 typedef CUresult CUDAAPI tcuTexRefSetFilterMode(CUtexref hTexRef, CUfilter_mode fm); 1456 typedef CUresult CUDAAPI tcuTexRefSetFlags(CUtexref hTexRef, unsigned int Flags); 1457 1458 typedef CUresult CUDAAPI tcuTexRefGetAddress(CUdeviceptr *pdptr, CUtexref hTexRef); 1459 typedef CUresult CUDAAPI tcuTexRefGetArray(CUarray *phArray, CUtexref hTexRef); 1460 typedef CUresult CUDAAPI tcuTexRefGetAddressMode(CUaddress_mode *pam, CUtexref hTexRef, int dim); 1461 typedef CUresult CUDAAPI tcuTexRefGetFilterMode(CUfilter_mode *pfm, CUtexref hTexRef); 1462 typedef CUresult CUDAAPI tcuTexRefGetFormat(CUarray_format *pFormat, int *pNumChannels, CUtexref hTexRef); 1463 typedef CUresult CUDAAPI tcuTexRefGetFlags(unsigned int *pFlags, CUtexref hTexRef); 1464 1465 /************************************ 1466 ** 1467 ** Surface reference management 1468 ** 1469 ***********************************/ 1470 typedef CUresult CUDAAPI tcuSurfRefSetArray(CUsurfref hSurfRef, CUarray hArray, unsigned int Flags); 1471 typedef CUresult CUDAAPI tcuSurfRefGetArray(CUarray *phArray, CUsurfref hSurfRef); 1472 1473 /************************************ 1474 ** 1475 ** Parameter management 1476 ** 1477 ***********************************/ 1478 1479 typedef CUresult CUDAAPI tcuParamSetSize(CUfunction hfunc, unsigned int numbytes); 1480 typedef CUresult CUDAAPI tcuParamSeti(CUfunction hfunc, int offset, unsigned int value); 1481 typedef CUresult CUDAAPI tcuParamSetf(CUfunction hfunc, int offset, float value); 1482 typedef CUresult CUDAAPI tcuParamSetv(CUfunction hfunc, int offset, void *ptr, unsigned int numbytes); 1483 typedef CUresult CUDAAPI tcuParamSetTexRef(CUfunction hfunc, int texunit, CUtexref hTexRef); 1484 1485 1486 /************************************ 1487 ** 1488 ** Launch functions 1489 ** 1490 ***********************************/ 1491 1492 typedef CUresult CUDAAPI tcuLaunch(CUfunction f); 1493 typedef CUresult CUDAAPI tcuLaunchGrid(CUfunction f, int grid_width, int grid_height); 1494 typedef CUresult CUDAAPI tcuLaunchGridAsync(CUfunction f, int grid_width, int grid_height, CUstream hStream); 1495 1496 /************************************ 1497 ** 1498 ** Events 1499 ** 1500 ***********************************/ 1501 typedef CUresult CUDAAPI tcuEventCreate(CUevent *phEvent, unsigned int Flags); 1502 typedef CUresult CUDAAPI tcuEventRecord(CUevent hEvent, CUstream hStream); 1503 typedef CUresult CUDAAPI tcuEventQuery(CUevent hEvent); 1504 typedef CUresult CUDAAPI tcuEventSynchronize(CUevent hEvent); 1505 typedef CUresult CUDAAPI tcuEventDestroy(CUevent hEvent); 1506 typedef CUresult CUDAAPI tcuEventElapsedTime(float *pMilliseconds, CUevent hStart, CUevent hEnd); 1507 1508 /************************************ 1509 ** 1510 ** Streams 1511 ** 1512 ***********************************/ 1513 typedef CUresult CUDAAPI tcuStreamCreate(CUstream *phStream, unsigned int Flags); 1514 typedef CUresult CUDAAPI tcuStreamQuery(CUstream hStream); 1515 typedef CUresult CUDAAPI tcuStreamSynchronize(CUstream hStream); 1516 typedef CUresult CUDAAPI tcuStreamDestroy(CUstream hStream); 1517 1518 /************************************ 1519 ** 1520 ** Graphics interop 1521 ** 1522 ***********************************/ 1523 typedef CUresult CUDAAPI tcuGraphicsUnregisterResource(CUgraphicsResource resource); 1524 typedef CUresult CUDAAPI tcuGraphicsSubResourceGetMappedArray(CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, unsigned int mipLevel); 1525 1526 #if __CUDA_API_VERSION >= 3020 1527 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource); 1528 #else 1529 typedef CUresult CUDAAPI tcuGraphicsResourceGetMappedPointer(CUdeviceptr *pDevPtr, unsigned int *pSize, CUgraphicsResource resource); 1530 #endif 1531 1532 typedef CUresult CUDAAPI tcuGraphicsResourceSetMapFlags(CUgraphicsResource resource, unsigned int flags); 1533 typedef CUresult CUDAAPI tcuGraphicsMapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); 1534 typedef CUresult CUDAAPI tcuGraphicsUnmapResources(unsigned int count, CUgraphicsResource *resources, CUstream hStream); 1535 1536 /************************************ 1537 ** 1538 ** Export tables 1539 ** 1540 ***********************************/ 1541 typedef CUresult CUDAAPI tcuGetExportTable(const void **ppExportTable, const CUuuid *pExportTableId); 1542 1543 /************************************ 1544 ** 1545 ** Limits 1546 ** 1547 ***********************************/ 1548 1549 typedef CUresult CUDAAPI tcuCtxSetLimit(CUlimit limit, size_t value); 1550 typedef CUresult CUDAAPI tcuCtxGetLimit(size_t *pvalue, CUlimit limit); 1551 1552 1553 extern tcuDriverGetVersion *cuDriverGetVersion; 1554 extern tcuDeviceGet *cuDeviceGet; 1555 extern tcuDeviceGetCount *cuDeviceGetCount; 1556 extern tcuDeviceGetName *cuDeviceGetName; 1557 extern tcuDeviceComputeCapability *cuDeviceComputeCapability; 1558 extern tcuDeviceGetProperties *cuDeviceGetProperties; 1559 extern tcuDeviceGetAttribute *cuDeviceGetAttribute; 1560 extern tcuCtxDestroy *cuCtxDestroy; 1561 extern tcuCtxAttach *cuCtxAttach; 1562 extern tcuCtxDetach *cuCtxDetach; 1563 extern tcuCtxPushCurrent *cuCtxPushCurrent; 1564 extern tcuCtxPopCurrent *cuCtxPopCurrent; 1565 1566 extern tcuCtxSetCurrent *cuCtxSetCurrent; 1567 extern tcuCtxGetCurrent *cuCtxGetCurrent; 1568 1569 extern tcuCtxGetDevice *cuCtxGetDevice; 1570 extern tcuCtxSynchronize *cuCtxSynchronize; 1571 extern tcuModuleLoad *cuModuleLoad; 1572 extern tcuModuleLoadData *cuModuleLoadData; 1573 extern tcuModuleLoadDataEx *cuModuleLoadDataEx; 1574 extern tcuModuleLoadFatBinary *cuModuleLoadFatBinary; 1575 extern tcuModuleUnload *cuModuleUnload; 1576 extern tcuModuleGetFunction *cuModuleGetFunction; 1577 extern tcuModuleGetTexRef *cuModuleGetTexRef; 1578 extern tcuModuleGetSurfRef *cuModuleGetSurfRef; 1579 extern tcuMemFreeHost *cuMemFreeHost; 1580 extern tcuMemHostAlloc *cuMemHostAlloc; 1581 extern tcuMemHostGetFlags *cuMemHostGetFlags; 1582 1583 extern tcuMemHostRegister *cuMemHostRegister; 1584 extern tcuMemHostUnregister *cuMemHostUnregister; 1585 extern tcuMemcpy *cuMemcpy; 1586 extern tcuMemcpyPeer *cuMemcpyPeer; 1587 1588 extern tcuDeviceTotalMem *cuDeviceTotalMem; 1589 extern tcuCtxCreate *cuCtxCreate; 1590 extern tcuModuleGetGlobal *cuModuleGetGlobal; 1591 extern tcuMemGetInfo *cuMemGetInfo; 1592 extern tcuMemAlloc *cuMemAlloc; 1593 extern tcuMemAllocPitch *cuMemAllocPitch; 1594 extern tcuMemFree *cuMemFree; 1595 extern tcuMemGetAddressRange *cuMemGetAddressRange; 1596 extern tcuMemAllocHost *cuMemAllocHost; 1597 extern tcuMemHostGetDevicePointer *cuMemHostGetDevicePointer; 1598 extern tcuFuncSetBlockShape *cuFuncSetBlockShape; 1599 extern tcuFuncSetSharedSize *cuFuncSetSharedSize; 1600 extern tcuFuncGetAttribute *cuFuncGetAttribute; 1601 extern tcuFuncSetCacheConfig *cuFuncSetCacheConfig; 1602 extern tcuLaunchKernel *cuLaunchKernel; 1603 extern tcuArrayDestroy *cuArrayDestroy; 1604 extern tcuTexRefCreate *cuTexRefCreate; 1605 extern tcuTexRefDestroy *cuTexRefDestroy; 1606 extern tcuTexRefSetArray *cuTexRefSetArray; 1607 extern tcuTexRefSetFormat *cuTexRefSetFormat; 1608 extern tcuTexRefSetAddressMode *cuTexRefSetAddressMode; 1609 extern tcuTexRefSetFilterMode *cuTexRefSetFilterMode; 1610 extern tcuTexRefSetFlags *cuTexRefSetFlags; 1611 extern tcuTexRefGetArray *cuTexRefGetArray; 1612 extern tcuTexRefGetAddressMode *cuTexRefGetAddressMode; 1613 extern tcuTexRefGetFilterMode *cuTexRefGetFilterMode; 1614 extern tcuTexRefGetFormat *cuTexRefGetFormat; 1615 extern tcuTexRefGetFlags *cuTexRefGetFlags; 1616 extern tcuSurfRefSetArray *cuSurfRefSetArray; 1617 extern tcuSurfRefGetArray *cuSurfRefGetArray; 1618 extern tcuParamSetSize *cuParamSetSize; 1619 extern tcuParamSeti *cuParamSeti; 1620 extern tcuParamSetf *cuParamSetf; 1621 extern tcuParamSetv *cuParamSetv; 1622 extern tcuParamSetTexRef *cuParamSetTexRef; 1623 extern tcuLaunch *cuLaunch; 1624 extern tcuLaunchGrid *cuLaunchGrid; 1625 extern tcuLaunchGridAsync *cuLaunchGridAsync; 1626 extern tcuEventCreate *cuEventCreate; 1627 extern tcuEventRecord *cuEventRecord; 1628 extern tcuEventQuery *cuEventQuery; 1629 extern tcuEventSynchronize *cuEventSynchronize; 1630 extern tcuEventDestroy *cuEventDestroy; 1631 extern tcuEventElapsedTime *cuEventElapsedTime; 1632 extern tcuStreamCreate *cuStreamCreate; 1633 extern tcuStreamQuery *cuStreamQuery; 1634 extern tcuStreamSynchronize *cuStreamSynchronize; 1635 extern tcuStreamDestroy *cuStreamDestroy; 1636 extern tcuGraphicsUnregisterResource *cuGraphicsUnregisterResource; 1637 extern tcuGraphicsSubResourceGetMappedArray *cuGraphicsSubResourceGetMappedArray; 1638 extern tcuGraphicsResourceSetMapFlags *cuGraphicsResourceSetMapFlags; 1639 extern tcuGraphicsMapResources *cuGraphicsMapResources; 1640 extern tcuGraphicsUnmapResources *cuGraphicsUnmapResources; 1641 extern tcuGetExportTable *cuGetExportTable; 1642 extern tcuCtxSetLimit *cuCtxSetLimit; 1643 extern tcuCtxGetLimit *cuCtxGetLimit; 1644 1645 // These functions could be using the CUDA 3.2 interface (_v2) 1646 extern tcuMemcpyHtoD *cuMemcpyHtoD; 1647 extern tcuMemcpyDtoH *cuMemcpyDtoH; 1648 extern tcuMemcpyDtoD *cuMemcpyDtoD; 1649 extern tcuMemcpyDtoA *cuMemcpyDtoA; 1650 extern tcuMemcpyAtoD *cuMemcpyAtoD; 1651 extern tcuMemcpyHtoA *cuMemcpyHtoA; 1652 extern tcuMemcpyAtoH *cuMemcpyAtoH; 1653 extern tcuMemcpyAtoA *cuMemcpyAtoA; 1654 extern tcuMemcpy2D *cuMemcpy2D; 1655 extern tcuMemcpy2DUnaligned *cuMemcpy2DUnaligned; 1656 extern tcuMemcpy3D *cuMemcpy3D; 1657 extern tcuMemcpyHtoDAsync *cuMemcpyHtoDAsync; 1658 extern tcuMemcpyDtoHAsync *cuMemcpyDtoHAsync; 1659 extern tcuMemcpyDtoDAsync *cuMemcpyDtoDAsync; 1660 extern tcuMemcpyHtoAAsync *cuMemcpyHtoAAsync; 1661 extern tcuMemcpyAtoHAsync *cuMemcpyAtoHAsync; 1662 extern tcuMemcpy2DAsync *cuMemcpy2DAsync; 1663 extern tcuMemcpy3DAsync *cuMemcpy3DAsync; 1664 extern tcuMemsetD8 *cuMemsetD8; 1665 extern tcuMemsetD16 *cuMemsetD16; 1666 extern tcuMemsetD32 *cuMemsetD32; 1667 extern tcuMemsetD2D8 *cuMemsetD2D8; 1668 extern tcuMemsetD2D16 *cuMemsetD2D16; 1669 extern tcuMemsetD2D32 *cuMemsetD2D32; 1670 extern tcuArrayCreate *cuArrayCreate; 1671 extern tcuArrayGetDescriptor *cuArrayGetDescriptor; 1672 extern tcuArray3DCreate *cuArray3DCreate; 1673 extern tcuArray3DGetDescriptor *cuArray3DGetDescriptor; 1674 extern tcuTexRefSetAddress *cuTexRefSetAddress; 1675 extern tcuTexRefSetAddress2D *cuTexRefSetAddress2D; 1676 extern tcuTexRefGetAddress *cuTexRefGetAddress; 1677 extern tcuGraphicsResourceGetMappedPointer *cuGraphicsResourceGetMappedPointer; 1678 1679 /************************************/ 1680 CUresult CUDAAPI cuInit (unsigned int, int cudaVersion, void *hHandleDriver); 1681 /************************************/ 1682 1683 #ifdef __cplusplus 1684 } 1685 #endif 1686 1687 #endif //__cuda_cuda_h__ 1688