1/*
2 * function: kernel_csc_rgbatonv12
3 * input:    image2d_t as read only
4 * output:   image2d_t as write only
5 * vertical_offset, vertical offset from y to uv
6 */
7
8__kernel void kernel_csc_rgbatonv12 (__read_only image2d_t input, __write_only image2d_t output_y, __write_only image2d_t output_uv, __global float *matrix)
9{
10    int x = get_global_id (0);
11    int y = get_global_id (1);
12    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
13    float4 pixel_in1 = read_imagef(input, sampler, (int2)(2 * x, 2 * y));
14    float4 pixel_in2 = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y));
15    float4 pixel_in3 = read_imagef(input, sampler, (int2)(2 * x, 2 * y + 1));
16    float4 pixel_in4 = read_imagef(input, sampler, (int2)(2 * x + 1, 2 * y + 1));
17    float4 pixel_out_y1, pixel_out_y2, pixel_out_y3, pixel_out_y4, pixel_out_u, pixel_out_v;
18    pixel_out_y1.x = matrix[0] * pixel_in1.x + matrix[1] * pixel_in1.y + matrix[2] * pixel_in1.z;
19    pixel_out_y1.y = 0.0f;
20    pixel_out_y1.z = 0.0f;
21    pixel_out_y1.w = 1.0f;
22    pixel_out_y2.x = matrix[0] * pixel_in2.x + matrix[1] * pixel_in2.y +  matrix[2] * pixel_in2.z;
23    pixel_out_y2.y = 0.0f;
24    pixel_out_y2.z = 0.0f;
25    pixel_out_y2.w = 1.0f;
26    pixel_out_y3.x = matrix[0] * pixel_in3.x + matrix[1] * pixel_in3.y + matrix[2] * pixel_in3.z;
27    pixel_out_y3.y = 0.0f;
28    pixel_out_y3.z = 0.0f;
29    pixel_out_y3.w = 1.0f;
30    pixel_out_y4.x = matrix[0] * pixel_in4.x + matrix[1] * pixel_in4.y + matrix[2] * pixel_in4.z;
31    pixel_out_y4.y = 0.0f;
32    pixel_out_y4.z = 0.0f;
33    pixel_out_y4.w = 1.0f;
34    pixel_out_u.x = matrix[3] * pixel_in1.x + matrix[4] * pixel_in1.y + matrix[5] * pixel_in1.z + 0.5f;
35    pixel_out_u.y = 0.0f;
36    pixel_out_u.z = 0.0f;
37    pixel_out_u.w = 1.0f;
38    pixel_out_v.x = matrix[6] * pixel_in1.x + matrix[7] * pixel_in1.y + matrix[8] * pixel_in1.z + 0.5f;
39    pixel_out_v.y = 0.0f;
40    pixel_out_v.z = 0.0f;
41    pixel_out_v.w = 1.0f;
42    write_imagef(output_y, (int2)(2 * x, 2 * y), pixel_out_y1);
43    write_imagef(output_y, (int2)(2 * x + 1, 2 * y), pixel_out_y2);
44    write_imagef(output_y, (int2)(2 * x, 2 * y + 1), pixel_out_y3);
45    write_imagef(output_y, (int2)(2 * x + 1, 2 * y + 1), pixel_out_y4);
46    write_imagef(output_uv, (int2)(2 * x, y), pixel_out_u);
47    write_imagef(output_uv, (int2)(2 * x + 1, y), pixel_out_v);
48}
49
50
51/*
52 * function: kernel_csc_rgbatolab
53 * input:    image2d_t as read only
54 * output:   image2d_t as write only
55 */
56
57static float lab_fun(float a)
58{
59    if (a > 0.008856f)
60        return pow(a, 1.0f / 3);
61    else
62        return (float)(7.787f * a + 16.0f / 116);
63}
64__kernel void kernel_csc_rgbatolab (__read_only image2d_t input, __write_only image2d_t output)
65{
66    int x = get_global_id (0);
67    int y = get_global_id (1);
68    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
69    float4 pixel_in = read_imagef(input, sampler, (int2)(x, y));
70    float X, Y, Z, L, a, b;
71    X = 0.433910f * pixel_in.x + 0.376220f * pixel_in.y + 0.189860f * pixel_in.z;
72    Y = 0.212649f * pixel_in.x + 0.715169f * pixel_in.y + 0.072182f * pixel_in.z;
73    Z = 0.017756f * pixel_in.x + 0.109478f * pixel_in.y + 0.872915f * pixel_in.z;
74    if(Y > 0.008856f)
75        L = 116 * (pow(Y, 1.0f / 3));
76    else
77        L = 903.3f * Y;
78    a = 500 * (lab_fun(X) - lab_fun(Y));
79    b = 200 * (lab_fun(Y) - lab_fun(Z));
80    write_imagef(output, (int2)(3 * x, y), L);
81    write_imagef(output, (int2)(3 * x + 1, y), a);
82    write_imagef(output, (int2)(3 * x + 2, y), b);
83}
84
85/*
86 * function: kernel_csc_rgba64torgba
87 * input:    image2d_t as read only
88 * output:   image2d_t as write only
89 */
90__kernel void kernel_csc_rgba64torgba (__read_only image2d_t input, __write_only image2d_t output)
91{
92    int x = get_global_id (0);
93    int y = get_global_id (1);
94    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
95    float4 pixel_in = read_imagef(input, sampler, (int2)(x, y));
96    write_imagef(output, (int2)(x, y), pixel_in);
97}
98
99/*
100 * function: kernel_csc_yuyvtorgba
101 * input:    image2d_t as read only
102 * output:   image2d_t as write only
103 */
104
105__kernel void kernel_csc_yuyvtorgba (__read_only image2d_t input, __write_only image2d_t output)
106{
107    int x = get_global_id (0);
108    int y = get_global_id (1);
109    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
110    float4 pixel_in1 = read_imagef(input, sampler, (int2)(x, y));
111    float4 pixel_out1, pixel_out2;
112    pixel_out1.x = pixel_in1.x + 1.13983f * (pixel_in1.w - 0.5f);
113    pixel_out1.y = pixel_in1.x - 0.39465f * (pixel_in1.y - 0.5f) - 0.5806f * (pixel_in1.w - 0.5f);
114    pixel_out1.z = pixel_in1.x + 2.03211f * (pixel_in1.y - 0.5f);
115    pixel_out1.w = 0.0f;
116    pixel_out2.x = pixel_in1.z + 1.13983f * (pixel_in1.w - 0.5f);
117    pixel_out2.y = pixel_in1.z - 0.39465f * (pixel_in1.y - 0.5f) - 0.5806f * (pixel_in1.w - 0.5f);
118    pixel_out2.z = pixel_in1.z + 2.03211f * (pixel_in1.y - 0.5f);
119    pixel_out2.w = 0.0f;
120    write_imagef(output, (int2)(2 * x, y), pixel_out1);
121    write_imagef(output, (int2)(2 * x + 1, y), pixel_out2);
122}
123
124/*
125 * function: kernel_csc_nv12torgba
126 * input:    image2d_t as read only
127 * output:   image2d_t as write only
128 * vertical_offset, vertical offset from y to uv
129 */
130
131__kernel void kernel_csc_nv12torgba (
132    __read_only image2d_t input_y, __write_only image2d_t output, __read_only image2d_t input_uv)
133{
134    int x = get_global_id (0);
135    int y = get_global_id (1);
136    sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;
137    float4 pixel_y1 = read_imagef(input_y, sampler, (int2)(2 * x, 2 * y));
138    float4 pixel_y2 = read_imagef(input_y, sampler, (int2)(2 * x + 1, 2 * y));
139    float4 pixel_y3 = read_imagef(input_y, sampler, (int2)(2 * x, 2 * y + 1));
140    float4 pixel_y4 = read_imagef(input_y, sampler, (int2)(2 * x + 1, 2 * y + 1));
141    float4 pixel_u = read_imagef(input_uv, sampler, (int2)(2 * x, y));
142    float4 pixel_v = read_imagef(input_uv, sampler, (int2)(2 * x + 1, y));
143    float4 pixel_out1, pixel_out2, pixel_out3, pixel_out4;
144    pixel_out1.x = pixel_y1.x + 1.13983f * (pixel_v.x - 0.5f);
145    pixel_out1.y = pixel_y1.x - 0.39465f * (pixel_u.x - 0.5f) - 0.5806f * (pixel_v.x - 0.5f);
146    pixel_out1.z = pixel_y1.x + 2.03211f * (pixel_u.x - 0.5f);
147    pixel_out1.w = 0.0f;
148    pixel_out2.x = pixel_y2.x + 1.13983f * (pixel_v.x - 0.5f);
149    pixel_out2.y = pixel_y2.x - 0.39465f * (pixel_u.x - 0.5f) - 0.5806f * (pixel_v.x - 0.5f);
150    pixel_out2.z = pixel_y2.x + 2.03211f * (pixel_u.x - 0.5f);
151    pixel_out2.w = 0.0f;
152    pixel_out3.x = pixel_y3.x + 1.13983f * (pixel_v.x - 0.5f);
153    pixel_out3.y = pixel_y3.x - 0.39465f * (pixel_u.x - 0.5f) - 0.5806f * (pixel_v.x - 0.5f);
154    pixel_out3.z = pixel_y3.x + 2.03211f * (pixel_u.x - 0.5f);
155    pixel_out3.w = 0.0f;
156    pixel_out4.x = pixel_y4.x + 1.13983f * (pixel_v.x - 0.5f);
157    pixel_out4.y = pixel_y4.x - 0.39465f * (pixel_u.x - 0.5f) - 0.5806f * (pixel_v.x - 0.5f);
158    pixel_out4.z = pixel_y4.x + 2.03211f * (pixel_u.x - 0.5f);
159    pixel_out4.w = 0.0f;
160    write_imagef(output, (int2)(2 * x, 2 * y), pixel_out1);
161    write_imagef(output, (int2)(2 * x + 1, 2 * y), pixel_out2);
162    write_imagef(output, (int2)(2 * x, 2 * y + 1), pixel_out3);
163    write_imagef(output, (int2)(2 * x + 1, 2 * y + 1), pixel_out4);
164}
165
166