1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #ifndef __OPENCV_CUDA_DATAMOV_UTILS_HPP__ 44 #define __OPENCV_CUDA_DATAMOV_UTILS_HPP__ 45 46 #include "common.hpp" 47 48 /** @file 49 * @deprecated Use @ref cudev instead. 50 */ 51 52 //! @cond IGNORED 53 54 namespace cv { namespace cuda { namespace device 55 { 56 #if defined __CUDA_ARCH__ && __CUDA_ARCH__ >= 200 57 58 // for Fermi memory space is detected automatically 59 template <typename T> struct ForceGlob 60 { Loadcv::cuda::device::ForceGlob61 __device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; } 62 }; 63 64 #else // __CUDA_ARCH__ >= 200 65 66 #if defined(_WIN64) || defined(__LP64__) 67 // 64-bit register modifier for inlined asm 68 #define OPENCV_CUDA_ASM_PTR "l" 69 #else 70 // 32-bit register modifier for inlined asm 71 #define OPENCV_CUDA_ASM_PTR "r" 72 #endif 73 74 template<class T> struct ForceGlob; 75 76 #define OPENCV_CUDA_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \ 77 template <> struct ForceGlob<base_type> \ 78 { \ 79 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ 80 { \ 81 asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ 82 } \ 83 }; 84 85 #define OPENCV_CUDA_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \ 86 template <> struct ForceGlob<base_type> \ 87 { \ 88 __device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \ 89 { \ 90 asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_CUDA_ASM_PTR(ptr + offset)); \ 91 } \ 92 }; 93 94 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(uchar, u8) 95 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(schar, s8) 96 OPENCV_CUDA_DEFINE_FORCE_GLOB_B(char, b8) 97 OPENCV_CUDA_DEFINE_FORCE_GLOB (ushort, u16, h) 98 OPENCV_CUDA_DEFINE_FORCE_GLOB (short, s16, h) 99 OPENCV_CUDA_DEFINE_FORCE_GLOB (uint, u32, r) 100 OPENCV_CUDA_DEFINE_FORCE_GLOB (int, s32, r) 101 OPENCV_CUDA_DEFINE_FORCE_GLOB (float, f32, f) 102 OPENCV_CUDA_DEFINE_FORCE_GLOB (double, f64, d) 103 104 #undef OPENCV_CUDA_DEFINE_FORCE_GLOB 105 #undef OPENCV_CUDA_DEFINE_FORCE_GLOB_B 106 #undef OPENCV_CUDA_ASM_PTR 107 108 #endif // __CUDA_ARCH__ >= 200 109 }}} // namespace cv { namespace cuda { namespace cudev 110 111 //! @endcond 112 113 #endif // __OPENCV_CUDA_DATAMOV_UTILS_HPP__ 114