1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #pragma once 45 46 #ifndef __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__ 47 #define __OPENCV_CUDEV_GRID_TRANSPOSE_DETAIL_HPP__ 48 49 #include "../../common.hpp" 50 #include "../../util/saturate_cast.hpp" 51 #include "../../ptr2d/glob.hpp" 52 #include "../../ptr2d/traits.hpp" 53 54 namespace cv { namespace cudev { 55 56 namespace transpose_detail 57 { 58 template <int TILE_DIM, int BLOCK_DIM_Y, class SrcPtr, typename DstType> transpose(const SrcPtr src,GlobPtr<DstType> dst,const int rows,const int cols)59 __global__ void transpose(const SrcPtr src, GlobPtr<DstType> dst, const int rows, const int cols) 60 { 61 typedef typename PtrTraits<SrcPtr>::value_type src_type; 62 63 __shared__ src_type tile[TILE_DIM][TILE_DIM + 1]; 64 65 int blockIdx_x, blockIdx_y; 66 67 // do diagonal reordering 68 if (gridDim.x == gridDim.y) 69 { 70 blockIdx_y = blockIdx.x; 71 blockIdx_x = (blockIdx.x + blockIdx.y) % gridDim.x; 72 } 73 else 74 { 75 int bid = blockIdx.x + gridDim.x * blockIdx.y; 76 blockIdx_y = bid % gridDim.y; 77 blockIdx_x = ((bid / gridDim.y) + blockIdx_y) % gridDim.x; 78 } 79 80 int xIndex = blockIdx_x * TILE_DIM + threadIdx.x; 81 int yIndex = blockIdx_y * TILE_DIM + threadIdx.y; 82 83 if (xIndex < cols) 84 { 85 for (int i = 0; i < TILE_DIM; i += BLOCK_DIM_Y) 86 { 87 if (yIndex + i < rows) 88 { 89 tile[threadIdx.y + i][threadIdx.x] = src(yIndex + i, xIndex); 90 } 91 } 92 } 93 94 __syncthreads(); 95 96 xIndex = blockIdx_y * TILE_DIM + threadIdx.x; 97 yIndex = blockIdx_x * TILE_DIM + threadIdx.y; 98 99 if (xIndex < rows) 100 { 101 for (int i = 0; i < TILE_DIM; i += BLOCK_DIM_Y) 102 { 103 if (yIndex + i < cols) 104 { 105 dst(yIndex + i, xIndex) = saturate_cast<DstType>(tile[threadIdx.x][threadIdx.y + i]); 106 } 107 } 108 } 109 } 110 111 template <class Policy, class SrcPtr, typename DstType> transpose(const SrcPtr & src,const GlobPtr<DstType> & dst,int rows,int cols,cudaStream_t stream)112 __host__ void transpose(const SrcPtr& src, const GlobPtr<DstType>& dst, int rows, int cols, cudaStream_t stream) 113 { 114 const dim3 block(Policy::tile_dim, Policy::block_dim_y); 115 const dim3 grid(divUp(cols, block.x), divUp(rows, block.y)); 116 117 transpose<Policy::tile_dim, Policy::block_dim_y><<<grid, block, 0, stream>>>(src, dst, rows, cols); 118 CV_CUDEV_SAFE_CALL( cudaGetLastError() ); 119 120 if (stream == 0) 121 CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); 122 } 123 } 124 125 }} 126 127 #endif 128