1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #ifndef __OPENCV_CORE_CUDA_HPP__ 45 #define __OPENCV_CORE_CUDA_HPP__ 46 47 #ifndef __cplusplus 48 # error cuda.hpp header must be compiled as C++ 49 #endif 50 51 #include "opencv2/core.hpp" 52 #include "opencv2/core/cuda_types.hpp" 53 54 /** 55 @defgroup cuda CUDA-accelerated Computer Vision 56 @{ 57 @defgroup cudacore Core part 58 @{ 59 @defgroup cudacore_init Initalization and Information 60 @defgroup cudacore_struct Data Structures 61 @} 62 @} 63 */ 64 65 namespace cv { namespace cuda { 66 67 //! @addtogroup cudacore_struct 68 //! @{ 69 70 //=================================================================================== 71 // GpuMat 72 //=================================================================================== 73 74 /** @brief Base storage class for GPU memory with reference counting. 75 76 Its interface matches the Mat interface with the following limitations: 77 78 - no arbitrary dimensions support (only 2D) 79 - no functions that return references to their data (because references on GPU are not valid for 80 CPU) 81 - no expression templates technique support 82 83 Beware that the latter limitation may lead to overloaded matrix operators that cause memory 84 allocations. The GpuMat class is convertible to cuda::PtrStepSz and cuda::PtrStep so it can be 85 passed directly to the kernel. 86 87 @note In contrast with Mat, in most cases GpuMat::isContinuous() == false . This means that rows are 88 aligned to a size depending on the hardware. Single-row GpuMat is always a continuous matrix. 89 90 @note You are not recommended to leave static or global GpuMat variables allocated, that is, to rely 91 on its destructor. The destruction order of such variables and CUDA context is undefined. GPU memory 92 release function returns error if the CUDA context has been destroyed before. 93 94 @sa Mat 95 */ 96 class CV_EXPORTS GpuMat 97 { 98 public: 99 class CV_EXPORTS Allocator 100 { 101 public: ~Allocator()102 virtual ~Allocator() {} 103 104 // allocator must fill data, step and refcount fields 105 virtual bool allocate(GpuMat* mat, int rows, int cols, size_t elemSize) = 0; 106 virtual void free(GpuMat* mat) = 0; 107 }; 108 109 //! default allocator 110 static Allocator* defaultAllocator(); 111 static void setDefaultAllocator(Allocator* allocator); 112 113 //! default constructor 114 explicit GpuMat(Allocator* allocator = defaultAllocator()); 115 116 //! constructs GpuMat of the specified size and type 117 GpuMat(int rows, int cols, int type, Allocator* allocator = defaultAllocator()); 118 GpuMat(Size size, int type, Allocator* allocator = defaultAllocator()); 119 120 //! constucts GpuMat and fills it with the specified value _s 121 GpuMat(int rows, int cols, int type, Scalar s, Allocator* allocator = defaultAllocator()); 122 GpuMat(Size size, int type, Scalar s, Allocator* allocator = defaultAllocator()); 123 124 //! copy constructor 125 GpuMat(const GpuMat& m); 126 127 //! constructor for GpuMat headers pointing to user-allocated data 128 GpuMat(int rows, int cols, int type, void* data, size_t step = Mat::AUTO_STEP); 129 GpuMat(Size size, int type, void* data, size_t step = Mat::AUTO_STEP); 130 131 //! creates a GpuMat header for a part of the bigger matrix 132 GpuMat(const GpuMat& m, Range rowRange, Range colRange); 133 GpuMat(const GpuMat& m, Rect roi); 134 135 //! builds GpuMat from host memory (Blocking call) 136 explicit GpuMat(InputArray arr, Allocator* allocator = defaultAllocator()); 137 138 //! destructor - calls release() 139 ~GpuMat(); 140 141 //! assignment operators 142 GpuMat& operator =(const GpuMat& m); 143 144 //! allocates new GpuMat data unless the GpuMat already has specified size and type 145 void create(int rows, int cols, int type); 146 void create(Size size, int type); 147 148 //! decreases reference counter, deallocate the data when reference counter reaches 0 149 void release(); 150 151 //! swaps with other smart pointer 152 void swap(GpuMat& mat); 153 154 //! pefroms upload data to GpuMat (Blocking call) 155 void upload(InputArray arr); 156 157 //! pefroms upload data to GpuMat (Non-Blocking call) 158 void upload(InputArray arr, Stream& stream); 159 160 //! pefroms download data from device to host memory (Blocking call) 161 void download(OutputArray dst) const; 162 163 //! pefroms download data from device to host memory (Non-Blocking call) 164 void download(OutputArray dst, Stream& stream) const; 165 166 //! returns deep copy of the GpuMat, i.e. the data is copied 167 GpuMat clone() const; 168 169 //! copies the GpuMat content to device memory (Blocking call) 170 void copyTo(OutputArray dst) const; 171 172 //! copies the GpuMat content to device memory (Non-Blocking call) 173 void copyTo(OutputArray dst, Stream& stream) const; 174 175 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Blocking call) 176 void copyTo(OutputArray dst, InputArray mask) const; 177 178 //! copies those GpuMat elements to "m" that are marked with non-zero mask elements (Non-Blocking call) 179 void copyTo(OutputArray dst, InputArray mask, Stream& stream) const; 180 181 //! sets some of the GpuMat elements to s (Blocking call) 182 GpuMat& setTo(Scalar s); 183 184 //! sets some of the GpuMat elements to s (Non-Blocking call) 185 GpuMat& setTo(Scalar s, Stream& stream); 186 187 //! sets some of the GpuMat elements to s, according to the mask (Blocking call) 188 GpuMat& setTo(Scalar s, InputArray mask); 189 190 //! sets some of the GpuMat elements to s, according to the mask (Non-Blocking call) 191 GpuMat& setTo(Scalar s, InputArray mask, Stream& stream); 192 193 //! converts GpuMat to another datatype (Blocking call) 194 void convertTo(OutputArray dst, int rtype) const; 195 196 //! converts GpuMat to another datatype (Non-Blocking call) 197 void convertTo(OutputArray dst, int rtype, Stream& stream) const; 198 199 //! converts GpuMat to another datatype with scaling (Blocking call) 200 void convertTo(OutputArray dst, int rtype, double alpha, double beta = 0.0) const; 201 202 //! converts GpuMat to another datatype with scaling (Non-Blocking call) 203 void convertTo(OutputArray dst, int rtype, double alpha, Stream& stream) const; 204 205 //! converts GpuMat to another datatype with scaling (Non-Blocking call) 206 void convertTo(OutputArray dst, int rtype, double alpha, double beta, Stream& stream) const; 207 208 void assignTo(GpuMat& m, int type=-1) const; 209 210 //! returns pointer to y-th row 211 uchar* ptr(int y = 0); 212 const uchar* ptr(int y = 0) const; 213 214 //! template version of the above method 215 template<typename _Tp> _Tp* ptr(int y = 0); 216 template<typename _Tp> const _Tp* ptr(int y = 0) const; 217 218 template <typename _Tp> operator PtrStepSz<_Tp>() const; 219 template <typename _Tp> operator PtrStep<_Tp>() const; 220 221 //! returns a new GpuMat header for the specified row 222 GpuMat row(int y) const; 223 224 //! returns a new GpuMat header for the specified column 225 GpuMat col(int x) const; 226 227 //! ... for the specified row span 228 GpuMat rowRange(int startrow, int endrow) const; 229 GpuMat rowRange(Range r) const; 230 231 //! ... for the specified column span 232 GpuMat colRange(int startcol, int endcol) const; 233 GpuMat colRange(Range r) const; 234 235 //! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.) 236 GpuMat operator ()(Range rowRange, Range colRange) const; 237 GpuMat operator ()(Rect roi) const; 238 239 //! creates alternative GpuMat header for the same data, with different 240 //! number of channels and/or different number of rows 241 GpuMat reshape(int cn, int rows = 0) const; 242 243 //! locates GpuMat header within a parent GpuMat 244 void locateROI(Size& wholeSize, Point& ofs) const; 245 246 //! moves/resizes the current GpuMat ROI inside the parent GpuMat 247 GpuMat& adjustROI(int dtop, int dbottom, int dleft, int dright); 248 249 //! returns true iff the GpuMat data is continuous 250 //! (i.e. when there are no gaps between successive rows) 251 bool isContinuous() const; 252 253 //! returns element size in bytes 254 size_t elemSize() const; 255 256 //! returns the size of element channel in bytes 257 size_t elemSize1() const; 258 259 //! returns element type 260 int type() const; 261 262 //! returns element type 263 int depth() const; 264 265 //! returns number of channels 266 int channels() const; 267 268 //! returns step/elemSize1() 269 size_t step1() const; 270 271 //! returns GpuMat size : width == number of columns, height == number of rows 272 Size size() const; 273 274 //! returns true if GpuMat data is NULL 275 bool empty() const; 276 277 /*! includes several bit-fields: 278 - the magic signature 279 - continuity flag 280 - depth 281 - number of channels 282 */ 283 int flags; 284 285 //! the number of rows and columns 286 int rows, cols; 287 288 //! a distance between successive rows in bytes; includes the gap if any 289 size_t step; 290 291 //! pointer to the data 292 uchar* data; 293 294 //! pointer to the reference counter; 295 //! when GpuMat points to user-allocated data, the pointer is NULL 296 int* refcount; 297 298 //! helper fields used in locateROI and adjustROI 299 uchar* datastart; 300 const uchar* dataend; 301 302 //! allocator 303 Allocator* allocator; 304 }; 305 306 /** @brief Creates a continuous matrix. 307 308 @param rows Row count. 309 @param cols Column count. 310 @param type Type of the matrix. 311 @param arr Destination matrix. This parameter changes only if it has a proper type and area ( 312 \f$\texttt{rows} \times \texttt{cols}\f$ ). 313 314 Matrix is called continuous if its elements are stored continuously, that is, without gaps at the 315 end of each row. 316 */ 317 CV_EXPORTS void createContinuous(int rows, int cols, int type, OutputArray arr); 318 319 /** @brief Ensures that the size of a matrix is big enough and the matrix has a proper type. 320 321 @param rows Minimum desired number of rows. 322 @param cols Minimum desired number of columns. 323 @param type Desired matrix type. 324 @param arr Destination matrix. 325 326 The function does not reallocate memory if the matrix has proper attributes already. 327 */ 328 CV_EXPORTS void ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr); 329 330 //! BufferPool management (must be called before Stream creation) 331 CV_EXPORTS void setBufferPoolUsage(bool on); 332 CV_EXPORTS void setBufferPoolConfig(int deviceId, size_t stackSize, int stackCount); 333 334 //=================================================================================== 335 // HostMem 336 //=================================================================================== 337 338 /** @brief Class with reference counting wrapping special memory type allocation functions from CUDA. 339 340 Its interface is also Mat-like but with additional memory type parameters. 341 342 - **PAGE_LOCKED** sets a page locked memory type used commonly for fast and asynchronous 343 uploading/downloading data from/to GPU. 344 - **SHARED** specifies a zero copy memory allocation that enables mapping the host memory to GPU 345 address space, if supported. 346 - **WRITE_COMBINED** sets the write combined buffer that is not cached by CPU. Such buffers are 347 used to supply GPU with data when GPU only reads it. The advantage is a better CPU cache 348 utilization. 349 350 @note Allocation size of such memory types is usually limited. For more details, see *CUDA 2.2 351 Pinned Memory APIs* document or *CUDA C Programming Guide*. 352 */ 353 class CV_EXPORTS HostMem 354 { 355 public: 356 enum AllocType { PAGE_LOCKED = 1, SHARED = 2, WRITE_COMBINED = 4 }; 357 358 static MatAllocator* getAllocator(AllocType alloc_type = PAGE_LOCKED); 359 360 explicit HostMem(AllocType alloc_type = PAGE_LOCKED); 361 362 HostMem(const HostMem& m); 363 364 HostMem(int rows, int cols, int type, AllocType alloc_type = PAGE_LOCKED); 365 HostMem(Size size, int type, AllocType alloc_type = PAGE_LOCKED); 366 367 //! creates from host memory with coping data 368 explicit HostMem(InputArray arr, AllocType alloc_type = PAGE_LOCKED); 369 370 ~HostMem(); 371 372 HostMem& operator =(const HostMem& m); 373 374 //! swaps with other smart pointer 375 void swap(HostMem& b); 376 377 //! returns deep copy of the matrix, i.e. the data is copied 378 HostMem clone() const; 379 380 //! allocates new matrix data unless the matrix already has specified size and type. 381 void create(int rows, int cols, int type); 382 void create(Size size, int type); 383 384 //! creates alternative HostMem header for the same data, with different 385 //! number of channels and/or different number of rows 386 HostMem reshape(int cn, int rows = 0) const; 387 388 //! decrements reference counter and released memory if needed. 389 void release(); 390 391 //! returns matrix header with disabled reference counting for HostMem data. 392 Mat createMatHeader() const; 393 394 /** @brief Maps CPU memory to GPU address space and creates the cuda::GpuMat header without reference counting 395 for it. 396 397 This can be done only if memory was allocated with the SHARED flag and if it is supported by the 398 hardware. Laptops often share video and CPU memory, so address spaces can be mapped, which 399 eliminates an extra copy. 400 */ 401 GpuMat createGpuMatHeader() const; 402 403 // Please see cv::Mat for descriptions 404 bool isContinuous() const; 405 size_t elemSize() const; 406 size_t elemSize1() const; 407 int type() const; 408 int depth() const; 409 int channels() const; 410 size_t step1() const; 411 Size size() const; 412 bool empty() const; 413 414 // Please see cv::Mat for descriptions 415 int flags; 416 int rows, cols; 417 size_t step; 418 419 uchar* data; 420 int* refcount; 421 422 uchar* datastart; 423 const uchar* dataend; 424 425 AllocType alloc_type; 426 }; 427 428 /** @brief Page-locks the memory of matrix and maps it for the device(s). 429 430 @param m Input matrix. 431 */ 432 CV_EXPORTS void registerPageLocked(Mat& m); 433 434 /** @brief Unmaps the memory of matrix and makes it pageable again. 435 436 @param m Input matrix. 437 */ 438 CV_EXPORTS void unregisterPageLocked(Mat& m); 439 440 //=================================================================================== 441 // Stream 442 //=================================================================================== 443 444 /** @brief This class encapsulates a queue of asynchronous calls. 445 446 @note Currently, you may face problems if an operation is enqueued twice with different data. Some 447 functions use the constant GPU memory, and next call may update the memory before the previous one 448 has been finished. But calling different operations asynchronously is safe because each operation 449 has its own constant buffer. Memory copy/upload/download/set operations to the buffers you hold are 450 also safe. : 451 */ 452 class CV_EXPORTS Stream 453 { 454 typedef void (Stream::*bool_type)() const; this_type_does_not_support_comparisons() const455 void this_type_does_not_support_comparisons() const {} 456 457 public: 458 typedef void (*StreamCallback)(int status, void* userData); 459 460 //! creates a new asynchronous stream 461 Stream(); 462 463 /** @brief Returns true if the current stream queue is finished. Otherwise, it returns false. 464 */ 465 bool queryIfComplete() const; 466 467 /** @brief Blocks the current CPU thread until all operations in the stream are complete. 468 */ 469 void waitForCompletion(); 470 471 /** @brief Makes a compute stream wait on an event. 472 */ 473 void waitEvent(const Event& event); 474 475 /** @brief Adds a callback to be called on the host after all currently enqueued items in the stream have 476 completed. 477 478 @note Callbacks must not make any CUDA API calls. Callbacks must not perform any synchronization 479 that may depend on outstanding device work or other callbacks that are not mandated to run earlier. 480 Callbacks without a mandated order (in independent streams) execute in undefined order and may be 481 serialized. 482 */ 483 void enqueueHostCallback(StreamCallback callback, void* userData); 484 485 //! return Stream object for default CUDA stream 486 static Stream& Null(); 487 488 //! returns true if stream object is not default (!= 0) 489 operator bool_type() const; 490 491 class Impl; 492 493 private: 494 Ptr<Impl> impl_; 495 Stream(const Ptr<Impl>& impl); 496 497 friend struct StreamAccessor; 498 friend class BufferPool; 499 friend class DefaultDeviceInitializer; 500 }; 501 502 class CV_EXPORTS Event 503 { 504 public: 505 enum CreateFlags 506 { 507 DEFAULT = 0x00, /**< Default event flag */ 508 BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */ 509 DISABLE_TIMING = 0x02, /**< Event will not record timing data */ 510 INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */ 511 }; 512 513 explicit Event(CreateFlags flags = DEFAULT); 514 515 //! records an event 516 void record(Stream& stream = Stream::Null()); 517 518 //! queries an event's status 519 bool queryIfComplete() const; 520 521 //! waits for an event to complete 522 void waitForCompletion(); 523 524 //! computes the elapsed time between events 525 static float elapsedTime(const Event& start, const Event& end); 526 527 class Impl; 528 529 private: 530 Ptr<Impl> impl_; 531 532 friend struct EventAccessor; 533 }; 534 535 //! @} cudacore_struct 536 537 //=================================================================================== 538 // Initialization & Info 539 //=================================================================================== 540 541 //! @addtogroup cudacore_init 542 //! @{ 543 544 /** @brief Returns the number of installed CUDA-enabled devices. 545 546 Use this function before any other CUDA functions calls. If OpenCV is compiled without CUDA support, 547 this function returns 0. 548 */ 549 CV_EXPORTS int getCudaEnabledDeviceCount(); 550 551 /** @brief Sets a device and initializes it for the current thread. 552 553 @param device System index of a CUDA device starting with 0. 554 555 If the call of this function is omitted, a default device is initialized at the fist CUDA usage. 556 */ 557 CV_EXPORTS void setDevice(int device); 558 559 /** @brief Returns the current device index set by cuda::setDevice or initialized by default. 560 */ 561 CV_EXPORTS int getDevice(); 562 563 /** @brief Explicitly destroys and cleans up all resources associated with the current device in the current 564 process. 565 566 Any subsequent API call to this device will reinitialize the device. 567 */ 568 CV_EXPORTS void resetDevice(); 569 570 /** @brief Enumeration providing CUDA computing features. 571 */ 572 enum FeatureSet 573 { 574 FEATURE_SET_COMPUTE_10 = 10, 575 FEATURE_SET_COMPUTE_11 = 11, 576 FEATURE_SET_COMPUTE_12 = 12, 577 FEATURE_SET_COMPUTE_13 = 13, 578 FEATURE_SET_COMPUTE_20 = 20, 579 FEATURE_SET_COMPUTE_21 = 21, 580 FEATURE_SET_COMPUTE_30 = 30, 581 FEATURE_SET_COMPUTE_32 = 32, 582 FEATURE_SET_COMPUTE_35 = 35, 583 FEATURE_SET_COMPUTE_50 = 50, 584 585 GLOBAL_ATOMICS = FEATURE_SET_COMPUTE_11, 586 SHARED_ATOMICS = FEATURE_SET_COMPUTE_12, 587 NATIVE_DOUBLE = FEATURE_SET_COMPUTE_13, 588 WARP_SHUFFLE_FUNCTIONS = FEATURE_SET_COMPUTE_30, 589 DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35 590 }; 591 592 //! checks whether current device supports the given feature 593 CV_EXPORTS bool deviceSupports(FeatureSet feature_set); 594 595 /** @brief Class providing a set of static methods to check what NVIDIA\* card architecture the CUDA module was 596 built for. 597 598 According to the CUDA C Programming Guide Version 3.2: "PTX code produced for some specific compute 599 capability can always be compiled to binary code of greater or equal compute capability". 600 */ 601 class CV_EXPORTS TargetArchs 602 { 603 public: 604 /** @brief The following method checks whether the module was built with the support of the given feature: 605 606 @param feature_set Features to be checked. See :ocvcuda::FeatureSet. 607 */ 608 static bool builtWith(FeatureSet feature_set); 609 610 /** @brief There is a set of methods to check whether the module contains intermediate (PTX) or binary CUDA 611 code for the given architecture(s): 612 613 @param major Major compute capability version. 614 @param minor Minor compute capability version. 615 */ 616 static bool has(int major, int minor); 617 static bool hasPtx(int major, int minor); 618 static bool hasBin(int major, int minor); 619 620 static bool hasEqualOrLessPtx(int major, int minor); 621 static bool hasEqualOrGreater(int major, int minor); 622 static bool hasEqualOrGreaterPtx(int major, int minor); 623 static bool hasEqualOrGreaterBin(int major, int minor); 624 }; 625 626 /** @brief Class providing functionality for querying the specified GPU properties. 627 */ 628 class CV_EXPORTS DeviceInfo 629 { 630 public: 631 //! creates DeviceInfo object for the current GPU 632 DeviceInfo(); 633 634 /** @brief The constructors. 635 636 @param device_id System index of the CUDA device starting with 0. 637 638 Constructs the DeviceInfo object for the specified device. If device_id parameter is missed, it 639 constructs an object for the current device. 640 */ 641 DeviceInfo(int device_id); 642 643 /** @brief Returns system index of the CUDA device starting with 0. 644 */ 645 int deviceID() const; 646 647 //! ASCII string identifying device 648 const char* name() const; 649 650 //! global memory available on device in bytes 651 size_t totalGlobalMem() const; 652 653 //! shared memory available per block in bytes 654 size_t sharedMemPerBlock() const; 655 656 //! 32-bit registers available per block 657 int regsPerBlock() const; 658 659 //! warp size in threads 660 int warpSize() const; 661 662 //! maximum pitch in bytes allowed by memory copies 663 size_t memPitch() const; 664 665 //! maximum number of threads per block 666 int maxThreadsPerBlock() const; 667 668 //! maximum size of each dimension of a block 669 Vec3i maxThreadsDim() const; 670 671 //! maximum size of each dimension of a grid 672 Vec3i maxGridSize() const; 673 674 //! clock frequency in kilohertz 675 int clockRate() const; 676 677 //! constant memory available on device in bytes 678 size_t totalConstMem() const; 679 680 //! major compute capability 681 int majorVersion() const; 682 683 //! minor compute capability 684 int minorVersion() const; 685 686 //! alignment requirement for textures 687 size_t textureAlignment() const; 688 689 //! pitch alignment requirement for texture references bound to pitched memory 690 size_t texturePitchAlignment() const; 691 692 //! number of multiprocessors on device 693 int multiProcessorCount() const; 694 695 //! specified whether there is a run time limit on kernels 696 bool kernelExecTimeoutEnabled() const; 697 698 //! device is integrated as opposed to discrete 699 bool integrated() const; 700 701 //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer 702 bool canMapHostMemory() const; 703 704 enum ComputeMode 705 { 706 ComputeModeDefault, /**< default compute mode (Multiple threads can use cudaSetDevice with this device) */ 707 ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use cudaSetDevice with this device) */ 708 ComputeModeProhibited, /**< compute-prohibited mode (No threads can use cudaSetDevice with this device) */ 709 ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use cudaSetDevice with this device) */ 710 }; 711 712 //! compute mode 713 ComputeMode computeMode() const; 714 715 //! maximum 1D texture size 716 int maxTexture1D() const; 717 718 //! maximum 1D mipmapped texture size 719 int maxTexture1DMipmap() const; 720 721 //! maximum size for 1D textures bound to linear memory 722 int maxTexture1DLinear() const; 723 724 //! maximum 2D texture dimensions 725 Vec2i maxTexture2D() const; 726 727 //! maximum 2D mipmapped texture dimensions 728 Vec2i maxTexture2DMipmap() const; 729 730 //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory 731 Vec3i maxTexture2DLinear() const; 732 733 //! maximum 2D texture dimensions if texture gather operations have to be performed 734 Vec2i maxTexture2DGather() const; 735 736 //! maximum 3D texture dimensions 737 Vec3i maxTexture3D() const; 738 739 //! maximum Cubemap texture dimensions 740 int maxTextureCubemap() const; 741 742 //! maximum 1D layered texture dimensions 743 Vec2i maxTexture1DLayered() const; 744 745 //! maximum 2D layered texture dimensions 746 Vec3i maxTexture2DLayered() const; 747 748 //! maximum Cubemap layered texture dimensions 749 Vec2i maxTextureCubemapLayered() const; 750 751 //! maximum 1D surface size 752 int maxSurface1D() const; 753 754 //! maximum 2D surface dimensions 755 Vec2i maxSurface2D() const; 756 757 //! maximum 3D surface dimensions 758 Vec3i maxSurface3D() const; 759 760 //! maximum 1D layered surface dimensions 761 Vec2i maxSurface1DLayered() const; 762 763 //! maximum 2D layered surface dimensions 764 Vec3i maxSurface2DLayered() const; 765 766 //! maximum Cubemap surface dimensions 767 int maxSurfaceCubemap() const; 768 769 //! maximum Cubemap layered surface dimensions 770 Vec2i maxSurfaceCubemapLayered() const; 771 772 //! alignment requirements for surfaces 773 size_t surfaceAlignment() const; 774 775 //! device can possibly execute multiple kernels concurrently 776 bool concurrentKernels() const; 777 778 //! device has ECC support enabled 779 bool ECCEnabled() const; 780 781 //! PCI bus ID of the device 782 int pciBusID() const; 783 784 //! PCI device ID of the device 785 int pciDeviceID() const; 786 787 //! PCI domain ID of the device 788 int pciDomainID() const; 789 790 //! true if device is a Tesla device using TCC driver, false otherwise 791 bool tccDriver() const; 792 793 //! number of asynchronous engines 794 int asyncEngineCount() const; 795 796 //! device shares a unified address space with the host 797 bool unifiedAddressing() const; 798 799 //! peak memory clock frequency in kilohertz 800 int memoryClockRate() const; 801 802 //! global memory bus width in bits 803 int memoryBusWidth() const; 804 805 //! size of L2 cache in bytes 806 int l2CacheSize() const; 807 808 //! maximum resident threads per multiprocessor 809 int maxThreadsPerMultiProcessor() const; 810 811 //! gets free and total device memory 812 void queryMemory(size_t& totalMemory, size_t& freeMemory) const; 813 size_t freeMemory() const; 814 size_t totalMemory() const; 815 816 /** @brief Provides information on CUDA feature support. 817 818 @param feature_set Features to be checked. See cuda::FeatureSet. 819 820 This function returns true if the device has the specified CUDA feature. Otherwise, it returns false 821 */ 822 bool supports(FeatureSet feature_set) const; 823 824 /** @brief Checks the CUDA module and device compatibility. 825 826 This function returns true if the CUDA module can be run on the specified device. Otherwise, it 827 returns false . 828 */ 829 bool isCompatible() const; 830 831 private: 832 int device_id_; 833 }; 834 835 CV_EXPORTS void printCudaDeviceInfo(int device); 836 CV_EXPORTS void printShortCudaDeviceInfo(int device); 837 838 //! @} cudacore_init 839 840 }} // namespace cv { namespace cuda { 841 842 843 #include "opencv2/core/cuda.inl.hpp" 844 845 #endif /* __OPENCV_CORE_CUDA_HPP__ */ 846