1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #ifndef OPENCV_CUDA_EMULATION_HPP_ 44 #define OPENCV_CUDA_EMULATION_HPP_ 45 46 #include "common.hpp" 47 #include "warp_reduce.hpp" 48 49 /** @file 50 * @deprecated Use @ref cudev instead. 51 */ 52 53 //! @cond IGNORED 54 55 namespace cv { namespace cuda { namespace device 56 { 57 struct Emulation 58 { 59 syncthreadsOrcv::cuda::device::Emulation60 static __device__ __forceinline__ int syncthreadsOr(int pred) 61 { 62 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 200) 63 // just campilation stab 64 return 0; 65 #else 66 return __syncthreads_or(pred); 67 #endif 68 } 69 70 template<int CTA_SIZE> Ballotcv::cuda::device::Emulation71 static __forceinline__ __device__ int Ballot(int predicate) 72 { 73 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ >= 200) 74 return __ballot(predicate); 75 #else 76 __shared__ volatile int cta_buffer[CTA_SIZE]; 77 78 int tid = threadIdx.x; 79 cta_buffer[tid] = predicate ? (1 << (tid & 31)) : 0; 80 return warp_reduce(cta_buffer); 81 #endif 82 } 83 84 struct smem 85 { 86 enum { TAG_MASK = (1U << ( (sizeof(unsigned int) << 3) - 5U)) - 1U }; 87 88 template<typename T> atomicInccv::cuda::device::Emulation::smem89 static __device__ __forceinline__ T atomicInc(T* address, T val) 90 { 91 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) 92 T count; 93 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); 94 do 95 { 96 count = *address & TAG_MASK; 97 count = tag | (count + 1); 98 *address = count; 99 } while (*address != count); 100 101 return (count & TAG_MASK) - 1; 102 #else 103 return ::atomicInc(address, val); 104 #endif 105 } 106 107 template<typename T> atomicAddcv::cuda::device::Emulation::smem108 static __device__ __forceinline__ T atomicAdd(T* address, T val) 109 { 110 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) 111 T count; 112 unsigned int tag = threadIdx.x << ( (sizeof(unsigned int) << 3) - 5U); 113 do 114 { 115 count = *address & TAG_MASK; 116 count = tag | (count + val); 117 *address = count; 118 } while (*address != count); 119 120 return (count & TAG_MASK) - val; 121 #else 122 return ::atomicAdd(address, val); 123 #endif 124 } 125 126 template<typename T> atomicMincv::cuda::device::Emulation::smem127 static __device__ __forceinline__ T atomicMin(T* address, T val) 128 { 129 #if defined (__CUDA_ARCH__) && (__CUDA_ARCH__ < 120) 130 T count = ::min(*address, val); 131 do 132 { 133 *address = count; 134 } while (*address > count); 135 136 return count; 137 #else 138 return ::atomicMin(address, val); 139 #endif 140 } 141 }; // struct cmem 142 143 struct glob 144 { atomicAddcv::cuda::device::Emulation::glob145 static __device__ __forceinline__ int atomicAdd(int* address, int val) 146 { 147 return ::atomicAdd(address, val); 148 } atomicAddcv::cuda::device::Emulation::glob149 static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val) 150 { 151 return ::atomicAdd(address, val); 152 } atomicAddcv::cuda::device::Emulation::glob153 static __device__ __forceinline__ float atomicAdd(float* address, float val) 154 { 155 #if __CUDA_ARCH__ >= 200 156 return ::atomicAdd(address, val); 157 #else 158 int* address_as_i = (int*) address; 159 int old = *address_as_i, assumed; 160 do { 161 assumed = old; 162 old = ::atomicCAS(address_as_i, assumed, 163 __float_as_int(val + __int_as_float(assumed))); 164 } while (assumed != old); 165 return __int_as_float(old); 166 #endif 167 } atomicAddcv::cuda::device::Emulation::glob168 static __device__ __forceinline__ double atomicAdd(double* address, double val) 169 { 170 #if __CUDA_ARCH__ >= 130 171 unsigned long long int* address_as_ull = (unsigned long long int*) address; 172 unsigned long long int old = *address_as_ull, assumed; 173 do { 174 assumed = old; 175 old = ::atomicCAS(address_as_ull, assumed, 176 __double_as_longlong(val + __longlong_as_double(assumed))); 177 } while (assumed != old); 178 return __longlong_as_double(old); 179 #else 180 (void) address; 181 (void) val; 182 return 0.0; 183 #endif 184 } 185 atomicMincv::cuda::device::Emulation::glob186 static __device__ __forceinline__ int atomicMin(int* address, int val) 187 { 188 return ::atomicMin(address, val); 189 } atomicMincv::cuda::device::Emulation::glob190 static __device__ __forceinline__ float atomicMin(float* address, float val) 191 { 192 #if __CUDA_ARCH__ >= 120 193 int* address_as_i = (int*) address; 194 int old = *address_as_i, assumed; 195 do { 196 assumed = old; 197 old = ::atomicCAS(address_as_i, assumed, 198 __float_as_int(::fminf(val, __int_as_float(assumed)))); 199 } while (assumed != old); 200 return __int_as_float(old); 201 #else 202 (void) address; 203 (void) val; 204 return 0.0f; 205 #endif 206 } atomicMincv::cuda::device::Emulation::glob207 static __device__ __forceinline__ double atomicMin(double* address, double val) 208 { 209 #if __CUDA_ARCH__ >= 130 210 unsigned long long int* address_as_ull = (unsigned long long int*) address; 211 unsigned long long int old = *address_as_ull, assumed; 212 do { 213 assumed = old; 214 old = ::atomicCAS(address_as_ull, assumed, 215 __double_as_longlong(::fmin(val, __longlong_as_double(assumed)))); 216 } while (assumed != old); 217 return __longlong_as_double(old); 218 #else 219 (void) address; 220 (void) val; 221 return 0.0; 222 #endif 223 } 224 atomicMaxcv::cuda::device::Emulation::glob225 static __device__ __forceinline__ int atomicMax(int* address, int val) 226 { 227 return ::atomicMax(address, val); 228 } atomicMaxcv::cuda::device::Emulation::glob229 static __device__ __forceinline__ float atomicMax(float* address, float val) 230 { 231 #if __CUDA_ARCH__ >= 120 232 int* address_as_i = (int*) address; 233 int old = *address_as_i, assumed; 234 do { 235 assumed = old; 236 old = ::atomicCAS(address_as_i, assumed, 237 __float_as_int(::fmaxf(val, __int_as_float(assumed)))); 238 } while (assumed != old); 239 return __int_as_float(old); 240 #else 241 (void) address; 242 (void) val; 243 return 0.0f; 244 #endif 245 } atomicMaxcv::cuda::device::Emulation::glob246 static __device__ __forceinline__ double atomicMax(double* address, double val) 247 { 248 #if __CUDA_ARCH__ >= 130 249 unsigned long long int* address_as_ull = (unsigned long long int*) address; 250 unsigned long long int old = *address_as_ull, assumed; 251 do { 252 assumed = old; 253 old = ::atomicCAS(address_as_ull, assumed, 254 __double_as_longlong(::fmax(val, __longlong_as_double(assumed)))); 255 } while (assumed != old); 256 return __longlong_as_double(old); 257 #else 258 (void) address; 259 (void) val; 260 return 0.0; 261 #endif 262 } 263 }; 264 }; //struct Emulation 265 }}} // namespace cv { namespace cuda { namespace cudev 266 267 //! @endcond 268 269 #endif /* OPENCV_CUDA_EMULATION_HPP_ */ 270