1/*
2 * function: kernel_gauss
3 * input:    image2d_t as read only
4 * output:   image2d_t as write only
5 * workitem = 4x2 pixel ouptut
6 * GAUSS_RADIUS must be defined in build options.
7 */
8
9#ifndef GAUSS_RADIUS
10#define GAUSS_RADIUS 2
11#endif
12
13#define GAUSS_SCALE (2 * GAUSS_RADIUS + 1)
14
15__kernel void kernel_gauss (__read_only image2d_t input, __write_only image2d_t output, __global float *table)
16{
17    int x = get_global_id (0);
18    int y = get_global_id (1);
19    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;
20
21    float4 in1;
22    int i, j;
23    int index;
24    float4 out1 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
25    float4 out2 = (float4)(0.0f, 0.0f, 0.0f, 0.0f);
26
27    for(i = 0; i < GAUSS_SCALE + 1; i++)
28        for(j = 0; j < GAUSS_SCALE + 3; j++) {
29            in1 = read_imagef (input, sampler, (int2)(4 * x - GAUSS_RADIUS + j, 2 * y - GAUSS_RADIUS + i));
30            //first line
31            if (i < GAUSS_SCALE) {
32                index = i * GAUSS_SCALE + j;
33                out1.x +=  (j < GAUSS_SCALE ? table[index] * in1.x : 0.0f);
34                index -= 1;
35                out1.y += ((j < GAUSS_SCALE + 1) && j > 0 ? table[index] * in1.x : 0.0f);
36                index -= 1;
37                out1.z += ((j < GAUSS_SCALE + 2) && j > 1 ? table[index] * in1.x : 0.0f);
38                index -= 1;
39                out1.w += (j > 2 ? table[index] * in1.x : 0.0f);
40            }
41            //second line
42            if (i > 0) {
43                index = (i - 1) * GAUSS_SCALE + j;
44                out2.x +=  (j < GAUSS_SCALE ? table[index] * in1.x : 0.0f);
45                index -= 1;
46                out2.y += ((j < GAUSS_SCALE + 1) && j > 0 ? table[index] * in1.x : 0.0f);
47                index -= 1;
48                out2.z += ((j < GAUSS_SCALE + 2) && j > 1 ? table[index] * in1.x : 0.0f);
49                index -= 1;
50                out2.w += (j > 2 ? table[index] * in1.x : 0.0f);
51            }
52        }
53
54    write_imagef(output, (int2)(x, 2 * y), out1);
55    write_imagef(output, (int2)(x,  2 * y + 1), out2);
56
57}
58
59