1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #pragma once 45 46 #ifndef __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__ 47 #define __OPENCV_CUDEV_GRID_REDUCE_TO_COLUMN_DETAIL_HPP__ 48 49 #include "../../common.hpp" 50 #include "../../util/saturate_cast.hpp" 51 #include "../../block/reduce.hpp" 52 53 namespace cv { namespace cudev { 54 55 namespace grid_reduce_to_vec_detail 56 { 57 template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor, int cn> struct Reduce; 58 59 template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 1> 60 { callcv::cudev::grid_reduce_to_vec_detail::Reduce61 __device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal) 62 { 63 typename Reductor::template rebind<work_elem_type>::other op; 64 blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op); 65 } 66 }; 67 68 template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 2> 69 { callcv::cudev::grid_reduce_to_vec_detail::Reduce70 __device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal) 71 { 72 typename Reductor::template rebind<work_elem_type>::other op; 73 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op)); 74 } 75 }; 76 77 template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 3> 78 { callcv::cudev::grid_reduce_to_vec_detail::Reduce79 __device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal) 80 { 81 typename Reductor::template rebind<work_elem_type>::other op; 82 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op)); 83 } 84 }; 85 86 template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 4> 87 { callcv::cudev::grid_reduce_to_vec_detail::Reduce88 __device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal) 89 { 90 typename Reductor::template rebind<work_elem_type>::other op; 91 blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op)); 92 } 93 }; 94 95 template <class Reductor, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr> reduceToColumn(const SrcPtr src,ResType * dst,const MaskPtr mask,const int cols)96 __global__ void reduceToColumn(const SrcPtr src, ResType* dst, const MaskPtr mask, const int cols) 97 { 98 typedef typename Reductor::work_type work_type; 99 typedef typename VecTraits<work_type>::elem_type work_elem_type; 100 const int cn = VecTraits<work_type>::cn; 101 102 __shared__ work_elem_type smem[cn][BLOCK_SIZE]; 103 104 const int y = blockIdx.x; 105 106 work_type myVal = Reductor::initialValue(); 107 108 Reductor op; 109 110 for (int x = threadIdx.x; x < cols; x += BLOCK_SIZE) 111 { 112 if (mask(y, x)) 113 { 114 myVal = op(myVal, saturate_cast<work_type>(src(y, x))); 115 } 116 } 117 118 Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal); 119 120 if (threadIdx.x == 0) 121 dst[y] = saturate_cast<ResType>(Reductor::result(myVal, cols)); 122 } 123 124 template <class Reductor, class Policy, class SrcPtr, typename ResType, class MaskPtr> reduceToColumn(const SrcPtr & src,ResType * dst,const MaskPtr & mask,int rows,int cols,cudaStream_t stream)125 __host__ void reduceToColumn(const SrcPtr& src, ResType* dst, const MaskPtr& mask, int rows, int cols, cudaStream_t stream) 126 { 127 const int BLOCK_SIZE_X = Policy::block_size_x; 128 const int BLOCK_SIZE_Y = Policy::block_size_y; 129 130 const int BLOCK_SIZE = BLOCK_SIZE_X * BLOCK_SIZE_Y; 131 132 const dim3 block(BLOCK_SIZE); 133 const dim3 grid(rows); 134 135 reduceToColumn<Reductor, BLOCK_SIZE><<<grid, block, 0, stream>>>(src, dst, mask, cols); 136 CV_CUDEV_SAFE_CALL( cudaGetLastError() ); 137 138 if (stream == 0) 139 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); 140 141 } 142 } 143 144 }} 145 146 #endif 147