1// License Agreement 2// For Open Source Computer Vision Library 3// 4// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. 5// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. 6// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. 7// Third party copyrights are property of their respective owners. 8// 9// @Authors 10// Niko Li, newlife20080214@gmail.com 11// Jia Haipeng, jiahaipeng95@gmail.com 12// Xu Pang, pangxu010@163.com 13// Wenju He, wenju@multicorewareinc.com 14// Redistribution and use in source and binary forms, with or without modification, 15// are permitted provided that the following conditions are met: 16// 17// * Redistribution's of source code must retain the above copyright notice, 18// this list of conditions and the following disclaimer. 19// 20// * Redistribution's in binary form must reproduce the above copyright notice, 21// this list of conditions and the following disclaimer in the documentation 22// and/or other materials provided with the distribution. 23// 24// * The name of the copyright holders may not be used to endorse or promote products 25// derived from this software without specific prior written permission. 26// 27// This software is provided by the copyright holders and contributors as is and 28// any express or implied warranties, including, but not limited to, the implied 29// warranties of merchantability and fitness for a particular purpose are disclaimed. 30// In no event shall the Intel Corporation or contributors be liable for any direct, 31// indirect, incidental, special, exemplary, or consequential damages 32// (including, but not limited to, procurement of substitute goods or services; 33// loss of use, data, or profits; or business interruption) however caused 34// and on any theory of liability, whether in contract, strict liability, 35// or tort (including negligence or otherwise) arising in any way out of 36// the use of this software, even if advised of the possibility of such damage. 37// 38// 39 40#ifndef kercn 41#define kercn 1 42#endif 43 44#ifndef T 45#define T uchar 46#endif 47 48#define noconvert 49 50__kernel void calculate_histogram(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols, 51 __global uchar * histptr, int total) 52{ 53 int lid = get_local_id(0); 54 int id = get_global_id(0) * kercn; 55 int gid = get_group_id(0); 56 57 __local int localhist[BINS]; 58 59 #pragma unroll 60 for (int i = lid; i < BINS; i += WGS) 61 localhist[i] = 0; 62 barrier(CLK_LOCAL_MEM_FENCE); 63 64 __global const uchar * src = src_ptr + src_offset; 65 int src_index; 66 67 for (int grain = HISTS_COUNT * WGS * kercn; id < total; id += grain) 68 { 69#ifdef HAVE_SRC_CONT 70 src_index = id; 71#else 72 src_index = mad24(id / src_cols, src_step, id % src_cols); 73#endif 74 75#if kercn == 1 76 atomic_inc(localhist + convert_int(src[src_index])); 77#elif kercn == 4 78 int value = *(__global const int *)(src + src_index); 79 atomic_inc(localhist + (value & 0xff)); 80 atomic_inc(localhist + ((value >> 8) & 0xff)); 81 atomic_inc(localhist + ((value >> 16) & 0xff)); 82 atomic_inc(localhist + ((value >> 24) & 0xff)); 83#elif kercn >= 2 84 T value = *(__global const T *)(src + src_index); 85 atomic_inc(localhist + value.s0); 86 atomic_inc(localhist + value.s1); 87#if kercn >= 4 88 atomic_inc(localhist + value.s2); 89 atomic_inc(localhist + value.s3); 90#if kercn >= 8 91 atomic_inc(localhist + value.s4); 92 atomic_inc(localhist + value.s5); 93 atomic_inc(localhist + value.s6); 94 atomic_inc(localhist + value.s7); 95#if kercn == 16 96 atomic_inc(localhist + value.s8); 97 atomic_inc(localhist + value.s9); 98 atomic_inc(localhist + value.sA); 99 atomic_inc(localhist + value.sB); 100 atomic_inc(localhist + value.sC); 101 atomic_inc(localhist + value.sD); 102 atomic_inc(localhist + value.sE); 103 atomic_inc(localhist + value.sF); 104#endif 105#endif 106#endif 107#endif 108 } 109 barrier(CLK_LOCAL_MEM_FENCE); 110 111 __global int * hist = (__global int *)(histptr + gid * BINS * (int)sizeof(int)); 112 #pragma unroll 113 for (int i = lid; i < BINS; i += WGS) 114 hist[i] = localhist[i]; 115} 116 117#ifndef HT 118#define HT int 119#endif 120 121#ifndef convertToHT 122#define convertToHT noconvert 123#endif 124 125__kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset) 126{ 127 int lid = get_local_id(0); 128 129 __global HT * hist = (__global HT *)(histptr + hist_offset); 130#if WGS >= BINS 131 HT res = (HT)(0); 132#else 133 #pragma unroll 134 for (int i = lid; i < BINS; i += WGS) 135 hist[i] = (HT)(0); 136#endif 137 138 #pragma unroll 139 for (int i = 0; i < HISTS_COUNT; ++i) 140 { 141 #pragma unroll 142 for (int j = lid; j < BINS; j += WGS) 143#if WGS >= BINS 144 res += convertToHT(ghist[j]); 145#else 146 hist[j] += convertToHT(ghist[j]); 147#endif 148 ghist += BINS; 149 } 150 151#if WGS >= BINS 152 if (lid < BINS) 153 *(__global HT *)(histptr + mad24(lid, hist_step, hist_offset)) = res; 154#endif 155} 156 157__kernel void calcLUT(__global uchar * dst, __global const int * ghist, int total) 158{ 159 int lid = get_local_id(0); 160 __local int sumhist[BINS]; 161 __local float scale; 162 163#if WGS >= BINS 164 int res = 0; 165#else 166 #pragma unroll 167 for (int i = lid; i < BINS; i += WGS) 168 sumhist[i] = 0; 169#endif 170 171 #pragma unroll 172 for (int i = 0; i < HISTS_COUNT; ++i) 173 { 174 #pragma unroll 175 for (int j = lid; j < BINS; j += WGS) 176#if WGS >= BINS 177 res += ghist[j]; 178#else 179 sumhist[j] += ghist[j]; 180#endif 181 ghist += BINS; 182 } 183 184#if WGS >= BINS 185 if (lid < BINS) 186 sumhist[lid] = res; 187#endif 188 barrier(CLK_LOCAL_MEM_FENCE); 189 190 if (lid == 0) 191 { 192 int sum = 0, i = 0; 193 while (!sumhist[i]) 194 ++i; 195 196 if (total == sumhist[i]) 197 { 198 scale = 1; 199 for (int j = 0; j < BINS; ++j) 200 sumhist[i] = i; 201 } 202 else 203 { 204 scale = 255.f / (total - sumhist[i]); 205 206 for (sumhist[i++] = 0; i < BINS; i++) 207 { 208 sum += sumhist[i]; 209 sumhist[i] = sum; 210 } 211 } 212 } 213 barrier(CLK_LOCAL_MEM_FENCE); 214 215 #pragma unroll 216 for (int i = lid; i < BINS; i += WGS) 217 dst[i]= convert_uchar_sat_rte(convert_float(sumhist[i]) * scale); 218} 219