1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #pragma once 45 46 #ifndef __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__ 47 #define __OPENCV_CUDEV_GRID_PYR_DOWN_DETAIL_HPP__ 48 49 #include "../../common.hpp" 50 #include "../../util/vec_traits.hpp" 51 #include "../../util/saturate_cast.hpp" 52 #include "../../util/type_traits.hpp" 53 #include "../../ptr2d/glob.hpp" 54 #include "../../ptr2d/traits.hpp" 55 56 namespace cv { namespace cudev { 57 58 namespace pyramids_detail 59 { 60 template <class Brd, class SrcPtr, typename DstType> pyrDown(const SrcPtr src,GlobPtr<DstType> dst,const int src_rows,const int src_cols,const int dst_cols)61 __global__ void pyrDown(const SrcPtr src, GlobPtr<DstType> dst, const int src_rows, const int src_cols, const int dst_cols) 62 { 63 typedef typename PtrTraits<SrcPtr>::value_type src_type; 64 typedef typename VecTraits<src_type>::elem_type src_elem_type; 65 typedef typename LargerType<float, src_elem_type>::type work_elem_type; 66 typedef typename MakeVec<work_elem_type, VecTraits<src_type>::cn>::type work_type; 67 68 __shared__ work_type smem[256 + 4]; 69 70 const int x = blockIdx.x * blockDim.x + threadIdx.x; 71 const int y = blockIdx.y; 72 73 const int src_y = 2 * y; 74 75 if (src_y >= 2 && src_y < src_rows - 2 && x >= 2 && x < src_cols - 2) 76 { 77 { 78 work_type sum; 79 80 sum = 0.0625f * src(src_y - 2, x); 81 sum = sum + 0.25f * src(src_y - 1, x); 82 sum = sum + 0.375f * src(src_y , x); 83 sum = sum + 0.25f * src(src_y + 1, x); 84 sum = sum + 0.0625f * src(src_y + 2, x); 85 86 smem[2 + threadIdx.x] = sum; 87 } 88 89 if (threadIdx.x < 2) 90 { 91 const int left_x = x - 2; 92 93 work_type sum; 94 95 sum = 0.0625f * src(src_y - 2, left_x); 96 sum = sum + 0.25f * src(src_y - 1, left_x); 97 sum = sum + 0.375f * src(src_y , left_x); 98 sum = sum + 0.25f * src(src_y + 1, left_x); 99 sum = sum + 0.0625f * src(src_y + 2, left_x); 100 101 smem[threadIdx.x] = sum; 102 } 103 104 if (threadIdx.x > 253) 105 { 106 const int right_x = x + 2; 107 108 work_type sum; 109 110 sum = 0.0625f * src(src_y - 2, right_x); 111 sum = sum + 0.25f * src(src_y - 1, right_x); 112 sum = sum + 0.375f * src(src_y , right_x); 113 sum = sum + 0.25f * src(src_y + 1, right_x); 114 sum = sum + 0.0625f * src(src_y + 2, right_x); 115 116 smem[4 + threadIdx.x] = sum; 117 } 118 } 119 else 120 { 121 { 122 work_type sum; 123 124 sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(x, src_cols)); 125 sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(x, src_cols)); 126 sum = sum + 0.375f * src(src_y , Brd::idx_high(x, src_cols)); 127 sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(x, src_cols)); 128 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(x, src_cols)); 129 130 smem[2 + threadIdx.x] = sum; 131 } 132 133 if (threadIdx.x < 2) 134 { 135 const int left_x = x - 2; 136 137 work_type sum; 138 139 sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols)); 140 sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols)); 141 sum = sum + 0.375f * src(src_y , Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols)); 142 sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols)); 143 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_low(Brd::idx_high(left_x, src_cols), src_cols)); 144 145 smem[threadIdx.x] = sum; 146 } 147 148 if (threadIdx.x > 253) 149 { 150 const int right_x = x + 2; 151 152 work_type sum; 153 154 sum = 0.0625f * src(Brd::idx_low(src_y - 2, src_rows) , Brd::idx_high(right_x, src_cols)); 155 sum = sum + 0.25f * src(Brd::idx_low(src_y - 1, src_rows) , Brd::idx_high(right_x, src_cols)); 156 sum = sum + 0.375f * src(src_y , Brd::idx_high(right_x, src_cols)); 157 sum = sum + 0.25f * src(Brd::idx_high(src_y + 1, src_rows), Brd::idx_high(right_x, src_cols)); 158 sum = sum + 0.0625f * src(Brd::idx_high(src_y + 2, src_rows), Brd::idx_high(right_x, src_cols)); 159 160 smem[4 + threadIdx.x] = sum; 161 } 162 } 163 164 __syncthreads(); 165 166 if (threadIdx.x < 128) 167 { 168 const int tid2 = threadIdx.x * 2; 169 170 work_type sum; 171 172 sum = 0.0625f * smem[2 + tid2 - 2]; 173 sum = sum + 0.25f * smem[2 + tid2 - 1]; 174 sum = sum + 0.375f * smem[2 + tid2 ]; 175 sum = sum + 0.25f * smem[2 + tid2 + 1]; 176 sum = sum + 0.0625f * smem[2 + tid2 + 2]; 177 178 const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2; 179 180 if (dst_x < dst_cols) 181 dst(y, dst_x) = saturate_cast<DstType>(sum); 182 } 183 } 184 185 template <class Brd, class SrcPtr, typename DstType> pyrDown(const SrcPtr & src,const GlobPtr<DstType> & dst,int src_rows,int src_cols,int dst_rows,int dst_cols,cudaStream_t stream)186 __host__ void pyrDown(const SrcPtr& src, const GlobPtr<DstType>& dst, int src_rows, int src_cols, int dst_rows, int dst_cols, cudaStream_t stream) 187 { 188 const dim3 block(256); 189 const dim3 grid(divUp(src_cols, block.x), dst_rows); 190 191 pyrDown<Brd><<<grid, block, 0, stream>>>(src, dst, src_rows, src_cols, dst_cols); 192 CV_CUDEV_SAFE_CALL( cudaGetLastError() ); 193 194 if (stream == 0) 195 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); 196 } 197 } 198 199 }} 200 201 #endif 202