1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #if !defined CUDA_DISABLER 44 45 #include "opencv2/core/cuda/common.hpp" 46 #include "opencv2/core/cuda/vec_traits.hpp" 47 #include "opencv2/core/cuda/vec_math.hpp" 48 #include "opencv2/core/cuda/limits.hpp" 49 #include "opencv2/core/cuda/color.hpp" 50 #include "opencv2/core/cuda/saturate_cast.hpp" 51 52 namespace cv { namespace cuda { namespace device 53 { 54 template <typename T> struct Bayer2BGR; 55 56 template <> struct Bayer2BGR<uchar> 57 { 58 uchar3 res0; 59 uchar3 res1; 60 uchar3 res2; 61 uchar3 res3; 62 applycv::cuda::device::Bayer2BGR63 __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) 64 { 65 uchar4 patch[3][3]; 66 patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x]; 67 patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; 68 patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; 69 70 patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x]; 71 patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)]; 72 patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; 73 74 patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x]; 75 patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; 76 patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)]; 77 78 if ((s_y & 1) ^ start_with_green) 79 { 80 const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1; 81 const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1; 82 83 const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2; 84 const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2; 85 86 const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1; 87 const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1; 88 89 const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2; 90 const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2; 91 92 if ((s_y & 1) ^ blue_last) 93 { 94 res0.x = t1; 95 res0.y = patch[1][1].x; 96 res0.z = t0; 97 98 res1.x = patch[1][1].y; 99 res1.y = t3; 100 res1.z = t2; 101 102 res2.x = t5; 103 res2.y = patch[1][1].z; 104 res2.z = t4; 105 106 res3.x = patch[1][1].w; 107 res3.y = t7; 108 res3.z = t6; 109 } 110 else 111 { 112 res0.x = t0; 113 res0.y = patch[1][1].x; 114 res0.z = t1; 115 116 res1.x = t2; 117 res1.y = t3; 118 res1.z = patch[1][1].y; 119 120 res2.x = t4; 121 res2.y = patch[1][1].z; 122 res2.z = t5; 123 124 res3.x = t6; 125 res3.y = t7; 126 res3.z = patch[1][1].w; 127 } 128 } 129 else 130 { 131 const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2; 132 const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2; 133 134 const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1; 135 const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1; 136 137 const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2; 138 const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2; 139 140 const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1; 141 const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1; 142 143 if ((s_y & 1) ^ blue_last) 144 { 145 res0.x = patch[1][1].x; 146 res0.y = t1; 147 res0.z = t0; 148 149 res1.x = t3; 150 res1.y = patch[1][1].y; 151 res1.z = t2; 152 153 res2.x = patch[1][1].z; 154 res2.y = t5; 155 res2.z = t4; 156 157 res3.x = t7; 158 res3.y = patch[1][1].w; 159 res3.z = t6; 160 } 161 else 162 { 163 res0.x = t0; 164 res0.y = t1; 165 res0.z = patch[1][1].x; 166 167 res1.x = t2; 168 res1.y = patch[1][1].y; 169 res1.z = t3; 170 171 res2.x = t4; 172 res2.y = t5; 173 res2.z = patch[1][1].z; 174 175 res3.x = t6; 176 res3.y = patch[1][1].w; 177 res3.z = t7; 178 } 179 } 180 } 181 }; 182 183 template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix); toDst(const uchar3 & pix)184 template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix) 185 { 186 typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor(); 187 return f(pix); 188 } toDst(const uchar3 & pix)189 template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix) 190 { 191 return pix; 192 } toDst(const uchar3 & pix)193 template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix) 194 { 195 return make_uchar4(pix.x, pix.y, pix.z, 255); 196 } 197 198 template <typename D> Bayer2BGR_8u(const PtrStepSzb src,PtrStep<D> dst,const bool blue_last,const bool start_with_green)199 __global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green) 200 { 201 const int s_x = blockIdx.x * blockDim.x + threadIdx.x; 202 int s_y = blockIdx.y * blockDim.y + threadIdx.y; 203 204 if (s_y >= src.rows || (s_x << 2) >= src.cols) 205 return; 206 207 s_y = ::min(::max(s_y, 1), src.rows - 2); 208 209 Bayer2BGR<uchar> bayer; 210 bayer.apply(src, s_x, s_y, blue_last, start_with_green); 211 212 const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2; 213 const int d_y = blockIdx.y * blockDim.y + threadIdx.y; 214 215 dst(d_y, d_x) = toDst<D>(bayer.res0); 216 if (d_x + 1 < src.cols) 217 dst(d_y, d_x + 1) = toDst<D>(bayer.res1); 218 if (d_x + 2 < src.cols) 219 dst(d_y, d_x + 2) = toDst<D>(bayer.res2); 220 if (d_x + 3 < src.cols) 221 dst(d_y, d_x + 3) = toDst<D>(bayer.res3); 222 } 223 224 template <> struct Bayer2BGR<ushort> 225 { 226 ushort3 res0; 227 ushort3 res1; 228 applycv::cuda::device::Bayer2BGR229 __device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green) 230 { 231 ushort2 patch[3][3]; 232 patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x]; 233 patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)]; 234 patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; 235 236 patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x]; 237 patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)]; 238 patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; 239 240 patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x]; 241 patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)]; 242 patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)]; 243 244 if ((s_y & 1) ^ start_with_green) 245 { 246 const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1; 247 const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1; 248 249 const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2; 250 const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2; 251 252 if ((s_y & 1) ^ blue_last) 253 { 254 res0.x = t1; 255 res0.y = patch[1][1].x; 256 res0.z = t0; 257 258 res1.x = patch[1][1].y; 259 res1.y = t3; 260 res1.z = t2; 261 } 262 else 263 { 264 res0.x = t0; 265 res0.y = patch[1][1].x; 266 res0.z = t1; 267 268 res1.x = t2; 269 res1.y = t3; 270 res1.z = patch[1][1].y; 271 } 272 } 273 else 274 { 275 const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2; 276 const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2; 277 278 const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1; 279 const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1; 280 281 if ((s_y & 1) ^ blue_last) 282 { 283 res0.x = patch[1][1].x; 284 res0.y = t1; 285 res0.z = t0; 286 287 res1.x = t3; 288 res1.y = patch[1][1].y; 289 res1.z = t2; 290 } 291 else 292 { 293 res0.x = t0; 294 res0.y = t1; 295 res0.z = patch[1][1].x; 296 297 res1.x = t2; 298 res1.y = patch[1][1].y; 299 res1.z = t3; 300 } 301 } 302 } 303 }; 304 305 template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix); toDst(const ushort3 & pix)306 template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix) 307 { 308 typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor(); 309 return f(pix); 310 } toDst(const ushort3 & pix)311 template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix) 312 { 313 return pix; 314 } toDst(const ushort3 & pix)315 template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix) 316 { 317 return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max()); 318 } 319 320 template <typename D> Bayer2BGR_16u(const PtrStepSzb src,PtrStep<D> dst,const bool blue_last,const bool start_with_green)321 __global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green) 322 { 323 const int s_x = blockIdx.x * blockDim.x + threadIdx.x; 324 int s_y = blockIdx.y * blockDim.y + threadIdx.y; 325 326 if (s_y >= src.rows || (s_x << 1) >= src.cols) 327 return; 328 329 s_y = ::min(::max(s_y, 1), src.rows - 2); 330 331 Bayer2BGR<ushort> bayer; 332 bayer.apply(src, s_x, s_y, blue_last, start_with_green); 333 334 const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1; 335 const int d_y = blockIdx.y * blockDim.y + threadIdx.y; 336 337 dst(d_y, d_x) = toDst<D>(bayer.res0); 338 if (d_x + 1 < src.cols) 339 dst(d_y, d_x + 1) = toDst<D>(bayer.res1); 340 } 341 342 template <int cn> Bayer2BGR_8u_gpu(PtrStepSzb src,PtrStepSzb dst,bool blue_last,bool start_with_green,cudaStream_t stream)343 void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) 344 { 345 typedef typename TypeVec<uchar, cn>::vec_type dst_t; 346 347 const dim3 block(32, 8); 348 const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y)); 349 350 cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) ); 351 352 Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green); 353 cudaSafeCall( cudaGetLastError() ); 354 355 if (stream == 0) 356 cudaSafeCall( cudaDeviceSynchronize() ); 357 } 358 359 template <int cn> Bayer2BGR_16u_gpu(PtrStepSzb src,PtrStepSzb dst,bool blue_last,bool start_with_green,cudaStream_t stream)360 void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream) 361 { 362 typedef typename TypeVec<ushort, cn>::vec_type dst_t; 363 364 const dim3 block(32, 8); 365 const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y)); 366 367 cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) ); 368 369 Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green); 370 cudaSafeCall( cudaGetLastError() ); 371 372 if (stream == 0) 373 cudaSafeCall( cudaDeviceSynchronize() ); 374 } 375 376 template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 377 template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 378 template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 379 380 template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 381 template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 382 template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream); 383 384 ////////////////////////////////////////////////////////////// 385 // Bayer Demosaicing (Malvar, He, and Cutler) 386 // 387 // by Morgan McGuire, Williams College 388 // http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders 389 // 390 // ported to CUDA 391 392 texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp); 393 394 template <typename DstType> MHCdemosaic(PtrStepSz<DstType> dst,const int2 sourceOffset,const int2 firstRed)395 __global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed) 396 { 397 const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/; 398 const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ; 399 const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/; 400 const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/; 401 const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ; 402 const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/; 403 404 const int x = blockIdx.x * blockDim.x + threadIdx.x; 405 const int y = blockIdx.y * blockDim.y + threadIdx.y; 406 407 if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1) 408 return; 409 410 int2 center; 411 center.x = x + sourceOffset.x; 412 center.y = y + sourceOffset.y; 413 414 int4 xCoord; 415 xCoord.x = center.x - 2; 416 xCoord.y = center.x - 1; 417 xCoord.z = center.x + 1; 418 xCoord.w = center.x + 2; 419 420 int4 yCoord; 421 yCoord.x = center.y - 2; 422 yCoord.y = center.y - 1; 423 yCoord.z = center.y + 1; 424 yCoord.w = center.y + 2; 425 426 float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0) 427 428 float4 Dvec; 429 Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1) 430 Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1) 431 Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1) 432 Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1) 433 434 float4 value; 435 value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0 436 value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0 437 value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0 438 value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0 439 440 // (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1) 441 value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1 442 value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1 443 value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1 444 value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1 445 446 float4 PATTERN; 447 PATTERN.x = kCx * C; 448 PATTERN.y = kCy * C; 449 PATTERN.z = kCz * C; 450 PATTERN.w = PATTERN.z; 451 452 float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w; 453 454 // There are five filter patterns (identity, cross, checker, 455 // theta, phi). Precompute the terms from all of them and then 456 // use swizzles to assign to color channels. 457 // 458 // Channel Matches 459 // x cross (e.g., EE G) 460 // y checker (e.g., EE B) 461 // z theta (e.g., EO R) 462 // w phi (e.g., EO B) 463 464 #define A value.x // A0 + A1 465 #define B value.y // B0 + B1 466 #define E value.z // E0 + E1 467 #define F value.w // F0 + F1 468 469 float3 temp; 470 471 // PATTERN.yzw += (kD.yz * D).xyy; 472 temp.x = kDy * D; 473 temp.y = kDz * D; 474 PATTERN.y += temp.x; 475 PATTERN.z += temp.y; 476 PATTERN.w += temp.y; 477 478 // PATTERN += (kA.xyz * A).xyzx; 479 temp.x = kAx * A; 480 temp.y = kAy * A; 481 temp.z = kAz * A; 482 PATTERN.x += temp.x; 483 PATTERN.y += temp.y; 484 PATTERN.z += temp.z; 485 PATTERN.w += temp.x; 486 487 // PATTERN += (kE.xyw * E).xyxz; 488 temp.x = kEx * E; 489 temp.y = kEy * E; 490 temp.z = kEw * E; 491 PATTERN.x += temp.x; 492 PATTERN.y += temp.y; 493 PATTERN.z += temp.x; 494 PATTERN.w += temp.z; 495 496 // PATTERN.xw += kB.xw * B; 497 PATTERN.x += kBx * B; 498 PATTERN.w += kBw * B; 499 500 // PATTERN.xz += kF.xz * F; 501 PATTERN.x += kFx * F; 502 PATTERN.z += kFz * F; 503 504 // Determine which of four types of pixels we are on. 505 int2 alternate; 506 alternate.x = (x + firstRed.x) % 2; 507 alternate.y = (y + firstRed.y) % 2; 508 509 // in BGR sequence; 510 uchar3 pixelColor = 511 (alternate.y == 0) ? 512 ((alternate.x == 0) ? 513 make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) : 514 make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) : 515 ((alternate.x == 0) ? 516 make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) : 517 make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y))); 518 519 dst(y, x) = toDst<DstType>(pixelColor); 520 } 521 522 template <int cn> MHCdemosaic(PtrStepSzb src,int2 sourceOffset,PtrStepSzb dst,int2 firstRed,cudaStream_t stream)523 void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream) 524 { 525 typedef typename TypeVec<uchar, cn>::vec_type dst_t; 526 527 const dim3 block(32, 8); 528 const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y)); 529 530 bindTexture(&sourceTex, src); 531 532 MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed); 533 cudaSafeCall( cudaGetLastError() ); 534 535 if (stream == 0) 536 cudaSafeCall( cudaDeviceSynchronize() ); 537 } 538 539 template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); 540 template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); 541 template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream); 542 }}} 543 544 #endif /* CUDA_DISABLER */ 545