1/*M/////////////////////////////////////////////////////////////////////////////////////// 2// 3// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4// 5// By downloading, copying, installing or using the software you agree to this license. 6// If you do not agree to this license, do not download, install, 7// copy or use the software. 8// 9// 10// License Agreement 11// For Open Source Computer Vision Library 12// 13// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. 14// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 15// Third party copyrights are property of their respective owners. 16// 17// @Authors 18// Zhang Chunpeng chunpeng@multicorewareinc.com 19// Dachuan Zhao, dachuan@multicorewareinc.com 20// Yao Wang, yao@multicorewareinc.com 21// Peng Xiao, pengxiao@outlook.com 22// 23// Redistribution and use in source and binary forms, with or without modification, 24// are permitted provided that the following conditions are met: 25// 26// * Redistribution's of source code must retain the above copyright notice, 27// this list of conditions and the following disclaimer. 28// 29// * Redistribution's in binary form must reproduce the above copyright notice, 30// this list of conditions and the following disclaimer in the documentation 31// and/or other materials provided with the distribution. 32// 33// * The name of the copyright holders may not be used to endorse or promote products 34// derived from this software without specific prior written permission. 35// 36// This software is provided by the copyright holders and contributors as is and 37// any express or implied warranties, including, but not limited to, the implied 38// warranties of merchantability and fitness for a particular purpose are disclaimed. 39// In no event shall the Intel Corporation or contributors be liable for any direct, 40// indirect, incidental, special, exemplary, or consequential damages 41// (including, but not limited to, procurement of substitute goods or services; 42// loss of use, data, or profits; or business interruption) however caused 43// and on any theory of liability, whether in contract, strict liability, 44// or tort (including negligence or otherwise) arising in any way out of 45// the use of this software, even if advised of the possibility of such damage. 46// 47//M*/ 48 49/////////////////////////////////////////////////////////////////////// 50//////////////////////// Generic PyrUp ////////////////////////////// 51/////////////////////////////////////////////////////////////////////// 52 53#ifdef DOUBLE_SUPPORT 54#ifdef cl_amd_fp64 55#pragma OPENCL EXTENSION cl_amd_fp64:enable 56#elif defined (cl_khr_fp64) 57#pragma OPENCL EXTENSION cl_khr_fp64:enable 58#endif 59#endif 60 61#if cn != 3 62#define loadpix(addr) *(__global const T*)(addr) 63#define storepix(val, addr) *(__global T*)(addr) = (val) 64#define PIXSIZE ((int)sizeof(T)) 65#else 66#define loadpix(addr) vload3(0, (__global const T1*)(addr)) 67#define storepix(val, addr) vstore3((val), 0, (__global T1*)(addr)) 68#define PIXSIZE ((int)sizeof(T1)*3) 69#endif 70 71#define EXTRAPOLATE(x, maxV) min(maxV - 1, (int) abs(x)) 72 73#define noconvert 74 75__kernel void pyrUp(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 76 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) 77{ 78 const int x = get_global_id(0); 79 const int y = get_global_id(1); 80 81 const int tidx = get_local_id(0); 82 const int tidy = get_local_id(1); 83 84 __local FT s_srcPatch[LOCAL_SIZE/2 + 2][LOCAL_SIZE/2 + 2]; 85 __local FT s_dstPatch[LOCAL_SIZE/2 + 2][LOCAL_SIZE]; 86 87 __global uchar * dstData = dst + dst_offset; 88 __global const uchar * srcData = src + src_offset; 89 90 if( tidx < (LOCAL_SIZE/2 + 2) && tidy < LOCAL_SIZE/2 + 2 ) 91 { 92 int srcx = EXTRAPOLATE(mad24((int)get_group_id(0), LOCAL_SIZE/2, tidx) - 1, src_cols); 93 int srcy = EXTRAPOLATE(mad24((int)get_group_id(1), LOCAL_SIZE/2, tidy) - 1, src_rows); 94 95 s_srcPatch[tidy][tidx] = convertToFT(loadpix(srcData + srcy * src_step + srcx * PIXSIZE)); 96 } 97 98 barrier(CLK_LOCAL_MEM_FENCE); 99 100 FT sum = 0.f; 101 102 const FT co1 = 0.75f; 103 const FT co2 = 0.5f; 104 const FT co3 = 0.125f; 105 106 const FT coef1 = (tidx & 1) == 0 ? co1 : (FT) 0; 107 const FT coef2 = (tidx & 1) == 0 ? co3 : co2; 108 const FT coefy1 = (tidy & 1) == 0 ? co1 : (FT) 0; 109 const FT coefy2 = (tidy & 1) == 0 ? co3 : co2; 110 111 if(tidy < LOCAL_SIZE/2 + 2) 112 { 113 sum = coef2* s_srcPatch[tidy][1 + ((tidx - 1) >> 1)]; 114 sum = mad(coef1, s_srcPatch[tidy][1 + ((tidx ) >> 1)], sum); 115 sum = mad(coef2, s_srcPatch[tidy][1 + ((tidx + 2) >> 1)], sum); 116 117 s_dstPatch[tidy][tidx] = sum; 118 } 119 120 barrier(CLK_LOCAL_MEM_FENCE); 121 122 sum = coefy2* s_dstPatch[1 + ((tidy - 1) >> 1)][tidx]; 123 sum = mad(coefy1, s_dstPatch[1 + ((tidy ) >> 1)][tidx], sum); 124 sum = mad(coefy2, s_dstPatch[1 + ((tidy + 2) >> 1)][tidx], sum); 125 126 if ((x < dst_cols) && (y < dst_rows)) 127 storepix(convertToT(sum), dstData + y * dst_step + x * PIXSIZE); 128} 129 130 131__kernel void pyrUp_unrolled(__global const uchar * src, int src_step, int src_offset, int src_rows, int src_cols, 132 __global uchar * dst, int dst_step, int dst_offset, int dst_rows, int dst_cols) 133{ 134 const int lx = 2*get_local_id(0); 135 const int ly = 2*get_local_id(1); 136 137 __local FT s_srcPatch[LOCAL_SIZE+2][LOCAL_SIZE+2]; 138 __local FT s_dstPatch[LOCAL_SIZE+2][2*LOCAL_SIZE]; 139 140 __global uchar * dstData = dst + dst_offset; 141 __global const uchar * srcData = src + src_offset; 142 143 if( lx < (LOCAL_SIZE+2) && ly < (LOCAL_SIZE+2) ) 144 { 145 int srcx = mad24((int)get_group_id(0), LOCAL_SIZE, lx) - 1; 146 int srcy = mad24((int)get_group_id(1), LOCAL_SIZE, ly) - 1; 147 148 int srcx1 = EXTRAPOLATE(srcx, src_cols); 149 int srcx2 = EXTRAPOLATE(srcx+1, src_cols); 150 int srcy1 = EXTRAPOLATE(srcy, src_rows); 151 int srcy2 = EXTRAPOLATE(srcy+1, src_rows); 152 s_srcPatch[ly][lx] = convertToFT(loadpix(srcData + srcy1 * src_step + srcx1 * PIXSIZE)); 153 s_srcPatch[ly+1][lx] = convertToFT(loadpix(srcData + srcy2 * src_step + srcx1 * PIXSIZE)); 154 s_srcPatch[ly][lx+1] = convertToFT(loadpix(srcData + srcy1 * src_step + srcx2 * PIXSIZE)); 155 s_srcPatch[ly+1][lx+1] = convertToFT(loadpix(srcData + srcy2 * src_step + srcx2 * PIXSIZE)); 156 } 157 158 barrier(CLK_LOCAL_MEM_FENCE); 159 160 FT sum; 161 162 const FT co1 = 0.75f; 163 const FT co2 = 0.5f; 164 const FT co3 = 0.125f; 165 166 // (x,y) 167 sum = co3 * s_srcPatch[1 + (ly >> 1)][1 + ((lx - 2) >> 1)]; 168 sum = mad(co1, s_srcPatch[1 + (ly >> 1)][1 + ((lx ) >> 1)], sum); 169 sum = mad(co3, s_srcPatch[1 + (ly >> 1)][1 + ((lx + 2) >> 1)], sum); 170 171 s_dstPatch[1 + get_local_id(1)][lx] = sum; 172 173 // (x+1,y) 174 sum = co2 * s_srcPatch[1 + (ly >> 1)][1 + ((lx + 1 - 1) >> 1)]; 175 sum = mad(co2, s_srcPatch[1 + (ly >> 1)][1 + ((lx + 1 + 1) >> 1)], sum); 176 s_dstPatch[1 + get_local_id(1)][lx+1] = sum; 177 178 if (ly < 1) 179 { 180 // (x,y) 181 sum = co3 * s_srcPatch[0][1 + ((lx - 2) >> 1)]; 182 sum = mad(co1, s_srcPatch[0][1 + ((lx ) >> 1)], sum); 183 sum = mad(co3, s_srcPatch[0][1 + ((lx + 2) >> 1)], sum); 184 s_dstPatch[0][lx] = sum; 185 186 // (x+1,y) 187 sum = co2 * s_srcPatch[0][1 + ((lx + 1 - 1) >> 1)]; 188 sum = mad(co2, s_srcPatch[0][1 + ((lx + 1 + 1) >> 1)], sum); 189 s_dstPatch[0][lx+1] = sum; 190 } 191 192 if (ly > 2*LOCAL_SIZE-3) 193 { 194 // (x,y) 195 sum = co3 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx - 2) >> 1)]; 196 sum = mad(co1, s_srcPatch[LOCAL_SIZE+1][1 + ((lx ) >> 1)], sum); 197 sum = mad(co3, s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 2) >> 1)], sum); 198 s_dstPatch[LOCAL_SIZE+1][lx] = sum; 199 200 // (x+1,y) 201 sum = co2 * s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 1 - 1) >> 1)]; 202 sum = mad(co2, s_srcPatch[LOCAL_SIZE+1][1 + ((lx + 1 + 1) >> 1)], sum); 203 s_dstPatch[LOCAL_SIZE+1][lx+1] = sum; 204 } 205 206 barrier(CLK_LOCAL_MEM_FENCE); 207 int dst_x = 2*get_global_id(0); 208 int dst_y = 2*get_global_id(1); 209 210 if ((dst_x < dst_cols) && (dst_y < dst_rows)) 211 { 212 // (x,y) 213 sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx]; 214 sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx], sum); 215 sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx], sum); 216 storepix(convertToT(sum), dstData + dst_y * dst_step + dst_x * PIXSIZE); 217 218 // (x+1,y) 219 sum = co3 * s_dstPatch[1 + get_local_id(1) - 1][lx+1]; 220 sum = mad(co1, s_dstPatch[1 + get_local_id(1) ][lx+1], sum); 221 sum = mad(co3, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum); 222 storepix(convertToT(sum), dstData + dst_y * dst_step + (dst_x+1) * PIXSIZE); 223 224 // (x,y+1) 225 sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx]; 226 sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx], sum); 227 storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + dst_x * PIXSIZE); 228 229 // (x+1,y+1) 230 sum = co2 * s_dstPatch[1 + get_local_id(1) ][lx+1]; 231 sum = mad(co2, s_dstPatch[1 + get_local_id(1) + 1][lx+1], sum); 232 storepix(convertToT(sum), dstData + (dst_y+1) * dst_step + (dst_x+1) * PIXSIZE); 233 } 234} 235