1//                           License Agreement
2//                For Open Source Computer Vision Library
3//
4// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved.
5// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved.
6// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved.
7// Third party copyrights are property of their respective owners.
8//
9// @Authors
10//    Niko Li, newlife20080214@gmail.com
11//    Jia Haipeng, jiahaipeng95@gmail.com
12//    Xu Pang, pangxu010@163.com
13//    Wenju He, wenju@multicorewareinc.com
14// Redistribution and use in source and binary forms, with or without modification,
15// are permitted provided that the following conditions are met:
16//
17//   * Redistribution's of source code must retain the above copyright notice,
18//     this list of conditions and the following disclaimer.
19//
20//   * Redistribution's in binary form must reproduce the above copyright notice,
21//     this list of conditions and the following disclaimer in the documentation
22//     and/or other materials provided with the distribution.
23//
24//   * The name of the copyright holders may not be used to endorse or promote products
25//     derived from this software without specific prior written permission.
26//
27// This software is provided by the copyright holders and contributors as is and
28// any express or implied warranties, including, but not limited to, the implied
29// warranties of merchantability and fitness for a particular purpose are disclaimed.
30// In no event shall the Intel Corporation or contributors be liable for any direct,
31// indirect, incidental, special, exemplary, or consequential damages
32// (including, but not limited to, procurement of substitute goods or services;
33// loss of use, data, or profits; or business interruption) however caused
34// and on any theory of liability, whether in contract, strict liability,
35// or tort (including negligence or otherwise) arising in any way out of
36// the use of this software, even if advised of the possibility of such damage.
37//
38//
39
40#ifndef kercn
41#define kercn 1
42#endif
43
44#ifndef T
45#define T uchar
46#endif
47
48#define noconvert
49
50__kernel void calculate_histogram(__global const uchar * src_ptr, int src_step, int src_offset, int src_rows, int src_cols,
51                                  __global uchar * histptr, int total)
52{
53    int lid = get_local_id(0);
54    int id = get_global_id(0) * kercn;
55    int gid = get_group_id(0);
56
57    __local int localhist[BINS];
58
59    #pragma unroll
60    for (int i = lid; i < BINS; i += WGS)
61        localhist[i] = 0;
62    barrier(CLK_LOCAL_MEM_FENCE);
63
64    __global const uchar * src = src_ptr + src_offset;
65    int src_index;
66
67    for (int grain = HISTS_COUNT * WGS * kercn; id < total; id += grain)
68    {
69#ifdef HAVE_SRC_CONT
70        src_index = id;
71#else
72        src_index = mad24(id / src_cols, src_step, id % src_cols);
73#endif
74
75#if kercn == 1
76        atomic_inc(localhist + convert_int(src[src_index]));
77#elif kercn == 4
78        int value = *(__global const int *)(src + src_index);
79        atomic_inc(localhist + (value & 0xff));
80        atomic_inc(localhist + ((value >> 8) & 0xff));
81        atomic_inc(localhist + ((value >> 16) & 0xff));
82        atomic_inc(localhist + ((value >> 24) & 0xff));
83#elif kercn >= 2
84        T value = *(__global const T *)(src + src_index);
85        atomic_inc(localhist + value.s0);
86        atomic_inc(localhist + value.s1);
87#if kercn >= 4
88        atomic_inc(localhist + value.s2);
89        atomic_inc(localhist + value.s3);
90#if kercn >= 8
91        atomic_inc(localhist + value.s4);
92        atomic_inc(localhist + value.s5);
93        atomic_inc(localhist + value.s6);
94        atomic_inc(localhist + value.s7);
95#if kercn == 16
96        atomic_inc(localhist + value.s8);
97        atomic_inc(localhist + value.s9);
98        atomic_inc(localhist + value.sA);
99        atomic_inc(localhist + value.sB);
100        atomic_inc(localhist + value.sC);
101        atomic_inc(localhist + value.sD);
102        atomic_inc(localhist + value.sE);
103        atomic_inc(localhist + value.sF);
104#endif
105#endif
106#endif
107#endif
108    }
109    barrier(CLK_LOCAL_MEM_FENCE);
110
111    __global int * hist = (__global int *)(histptr + gid * BINS * (int)sizeof(int));
112    #pragma unroll
113    for (int i = lid; i < BINS; i += WGS)
114        hist[i] = localhist[i];
115}
116
117#ifndef HT
118#define HT int
119#endif
120
121#ifndef convertToHT
122#define convertToHT noconvert
123#endif
124
125__kernel void merge_histogram(__global const int * ghist, __global uchar * histptr, int hist_step, int hist_offset)
126{
127    int lid = get_local_id(0);
128
129    __global HT * hist = (__global HT *)(histptr + hist_offset);
130#if WGS >= BINS
131    HT res = (HT)(0);
132#else
133    #pragma unroll
134    for (int i = lid; i < BINS; i += WGS)
135        hist[i] = (HT)(0);
136#endif
137
138    #pragma unroll
139    for (int i = 0; i < HISTS_COUNT; ++i)
140    {
141        #pragma unroll
142        for (int j = lid; j < BINS; j += WGS)
143#if WGS >= BINS
144            res += convertToHT(ghist[j]);
145#else
146            hist[j] += convertToHT(ghist[j]);
147#endif
148        ghist += BINS;
149    }
150
151#if WGS >= BINS
152    if (lid < BINS)
153        *(__global HT *)(histptr + mad24(lid, hist_step, hist_offset)) = res;
154#endif
155}
156
157__kernel void calcLUT(__global uchar * dst, __global const int * ghist, int total)
158{
159    int lid = get_local_id(0);
160    __local int sumhist[BINS];
161    __local float scale;
162
163#if WGS >= BINS
164    int res = 0;
165#else
166    #pragma unroll
167    for (int i = lid; i < BINS; i += WGS)
168        sumhist[i] = 0;
169#endif
170
171    #pragma unroll
172    for (int i = 0; i < HISTS_COUNT; ++i)
173    {
174        #pragma unroll
175        for (int j = lid; j < BINS; j += WGS)
176#if WGS >= BINS
177            res += ghist[j];
178#else
179            sumhist[j] += ghist[j];
180#endif
181        ghist += BINS;
182    }
183
184#if WGS >= BINS
185    if (lid < BINS)
186        sumhist[lid] = res;
187#endif
188    barrier(CLK_LOCAL_MEM_FENCE);
189
190    if (lid == 0)
191    {
192        int sum = 0, i = 0;
193        while (!sumhist[i])
194            ++i;
195
196        if (total == sumhist[i])
197        {
198            scale = 1;
199            for (int j = 0; j < BINS; ++j)
200                sumhist[i] = i;
201        }
202        else
203        {
204            scale = 255.f / (total - sumhist[i]);
205
206            for (sumhist[i++] = 0; i < BINS; i++)
207            {
208                sum += sumhist[i];
209                sumhist[i] = sum;
210            }
211        }
212    }
213    barrier(CLK_LOCAL_MEM_FENCE);
214
215    #pragma unroll
216    for (int i = lid; i < BINS; i += WGS)
217        dst[i]= convert_uchar_sat_rte(convert_float(sumhist[i]) * scale);
218}
219