1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <time.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24 #include "procs.h"
25 #include "harness/testHarness.h"
26 #include "harness/errorHelpers.h"
27
28 #ifndef uchar
29 typedef unsigned char uchar;
30 #endif
31
32 #undef MIN
33 #define MIN(x,y) ( (x) < (y) ? (x) : (y) )
34
35 #undef MAX
36 #define MAX(x,y) ( (x) > (y) ? (x) : (y) )
37
38 //#define CREATE_OUTPUT 1
39
40 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
41
42
43
44 //--- the code for kernel executables
45 static const char *image_filter_src =
46 "constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
47 "\n"
48 "__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
49 " read_only image2d_t src_image, write_only image2d_t dst_image )\n"
50 "{\n"
51 " int i, j;\n"
52 " int indx = 0;\n"
53 " int tid_x = get_global_id(0);\n"
54 " int tid_y = get_global_id(1);\n"
55 " float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
56 "\n"
57 " for (i=-m/2; i<(m+1)/2; i++){\n"
58 " for (j=-n/2; j<(n+1)/2; j++){\n"
59 " float w = filter_weights[indx++];\n"
60 "\n"
61 " if (w != 0.0f){\n"
62 " filter_result += w * read_imagef(src_image, sampler,\n"
63 " (int2)(tid_x + j, tid_y + i));\n"
64 " }\n"
65 " }\n"
66 " }\n"
67 "\n"
68 " write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
69 "}\n";
70
71
72 //--- equivalent non-kernel code
read_imagef(int x,int y,int w,int h,int nChannels,uchar * src,float * srcRgb)73 static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
74 {
75 // clamp the coords
76 int x0 = MIN( MAX( x, 0 ), w - 1 );
77 int y0 = MIN( MAX( y, 0 ), h - 1 );
78
79 // get tine index
80 int indx = ( y0 * w + x0 ) * nChannels;
81
82 // seed the return array
83 int i;
84 for( i = 0; i < nChannels; i++ ){
85 srcRgb[i] = (float)src[indx+i];
86 }
87 } // end read_imagef()
88
89
write_imagef(uchar * dst,int x,int y,int w,int h,int nChannels,float * dstRgb)90 static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
91 {
92 // get tine index
93 int indx = ( y * w + x ) * nChannels;
94
95 // seed the return array
96 int i;
97 for( i = 0; i < nChannels; i++ ){
98 dst[indx+i] = (uchar)dstRgb[i];
99 }
100 } // end write_imagef()
101
102
basicFilterPixel(int x,int y,int n,int m,int xsize,int ysize,int nChannels,const float * filter_weights,uchar * src,uchar * dst)103 static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
104 {
105 int i, j, k;
106 int indx = 0;
107 float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
108 float srcRgb[4];
109
110 for( i = -m/2; i < (m+1)/2; i++ ){
111 for( j = -n/2; j < (n+1)/2; j++ ){
112 float w = filter_weights[indx++];
113
114 if( w != 0 ){
115 read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
116 for( k = 0; k < nChannels; k++ ){
117 filter_result[k] += w * srcRgb[k];
118 }
119 }
120 }
121 }
122
123 write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
124
125 } // end basicFilterPixel()
126
127
128 //--- helper functions
createImage(int elements,MTdata d)129 static uchar *createImage( int elements, MTdata d)
130 {
131 int i;
132 uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
133 if( ! ptr )
134 return NULL;
135
136 for( i = 0; i < elements; i++ ){
137 ptr[i] = (uchar)genrand_int32(d);
138 }
139
140 return ptr;
141
142 } // end createImage()
143
144
verifyImages(uchar * ptr0,uchar * ptr1,uchar tolerance,int xsize,int ysize,int nChannels)145 static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
146 {
147 int x, y, z;
148 uchar *p0 = ptr0;
149 uchar *p1 = ptr1;
150
151 for( y = 0; y < ysize; y++ ){
152 for( x = 0; x < xsize; x++ ){
153 for( z = 0; z < nChannels; z++ ){
154 if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
155 log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
156 (int)p0[-1], (int)p1[-1] );
157 return -1;
158 }
159 }
160 }
161 }
162
163 return 0;
164
165 } // end verifyImages()
166
167
kernelFilter(cl_device_id device,cl_context context,cl_command_queue queue,int w,int h,int nChannels,uchar * inptr,uchar * outptr)168 static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
169 uchar *inptr, uchar *outptr )
170 {
171 cl_program program[1];
172 cl_kernel kernel[1];
173 cl_mem memobjs[3];
174 cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
175 cl_event executeEvent;
176 cl_ulong queueStart, submitStart, writeStart, writeEnd;
177 size_t threads[2];
178 float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
179 int filter_w = 3, filter_h = 3;
180 int err = 0;
181
182 // set thread dimensions
183 threads[0] = w;
184 threads[1] = h;
185
186 // allocate the input and output image memory objects
187 memobjs[0] =
188 create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
189 &image_format_desc, w, h, 0, inptr, &err);
190 if( memobjs[0] == (cl_mem)0 ){
191 log_error( " unable to create 2D image using create_image_2d\n" );
192 return -1;
193 }
194
195 memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
196 if( memobjs[1] == (cl_mem)0 ){
197 log_error( " unable to create 2D image using create_image_2d\n" );
198 clReleaseMemObject( memobjs[0] );
199 return -1;
200 }
201
202 // allocate an array memory object to load the filter weights
203 memobjs[2] = clCreateBuffer(
204 context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
205 sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
206 if( memobjs[2] == (cl_mem)0 ){
207 log_error( " unable to create array using clCreateBuffer\n" );
208 clReleaseMemObject( memobjs[1] );
209 clReleaseMemObject( memobjs[0] );
210 return -1;
211 }
212
213 // create the compute program
214 err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
215 if( err ){
216 clReleaseMemObject( memobjs[2] );
217 clReleaseMemObject( memobjs[1] );
218 clReleaseMemObject( memobjs[0] );
219 return -1;
220 }
221
222
223 // create kernel args object and set arg values.
224 // set the args values
225 err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
226 err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
227 err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
228 err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
229 err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
230
231 if( err != CL_SUCCESS ){
232 print_error( err, "clSetKernelArg failed\n" );
233 clReleaseKernel( kernel[0] );
234 clReleaseProgram( program[0] );
235 clReleaseMemObject( memobjs[2] );
236 clReleaseMemObject( memobjs[1] );
237 clReleaseMemObject( memobjs[0] );
238 return -1;
239 }
240
241 err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
242
243 if( err != CL_SUCCESS ){
244 print_error( err, "clEnqueueNDRangeKernel failed\n" );
245 clReleaseEvent( executeEvent );
246 clReleaseKernel( kernel[0] );
247 clReleaseProgram( program[0] );
248 clReleaseMemObject( memobjs[2] );
249 clReleaseMemObject( memobjs[1] );
250 clReleaseMemObject( memobjs[0] );
251 return -1;
252 }
253
254 // This synchronization point is needed in order to assume the data is valid.
255 // Getting profiling information is not a synchronization point.
256 err = clWaitForEvents( 1, &executeEvent );
257 if( err != CL_SUCCESS )
258 {
259 clReleaseEvent( executeEvent );
260 clReleaseKernel( kernel[0] );
261 clReleaseProgram( program[0] );
262 clReleaseMemObject( memobjs[2] );
263 clReleaseMemObject( memobjs[1] );
264 clReleaseMemObject( memobjs[0] );
265 return -1;
266 }
267
268 // test profiling
269 while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
270 CL_PROFILING_INFO_NOT_AVAILABLE );
271 if( err != CL_SUCCESS ){
272 print_error( err, "clGetEventProfilingInfo failed" );
273 clReleaseEvent( executeEvent );
274 clReleaseKernel( kernel[0] );
275 clReleaseProgram( program[0] );
276 clReleaseMemObject( memobjs[2] );
277 clReleaseMemObject( memobjs[1] );
278 clReleaseMemObject( memobjs[0] );
279 return -1;
280 }
281
282 while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
283 CL_PROFILING_INFO_NOT_AVAILABLE );
284 if( err != CL_SUCCESS ){
285 print_error( err, "clGetEventProfilingInfo failed" );
286 clReleaseEvent( executeEvent );
287 clReleaseKernel( kernel[0] );
288 clReleaseProgram( program[0] );
289 clReleaseMemObject( memobjs[2] );
290 clReleaseMemObject( memobjs[1] );
291 clReleaseMemObject( memobjs[0] );
292 return -1;
293 }
294
295 err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
296 if( err != CL_SUCCESS ){
297 print_error( err, "clGetEventProfilingInfo failed" );
298 clReleaseEvent( executeEvent );
299 clReleaseKernel( kernel[0] );
300 clReleaseProgram( program[0] );
301 clReleaseMemObject( memobjs[2] );
302 clReleaseMemObject( memobjs[1] );
303 clReleaseMemObject( memobjs[0] );
304 return -1;
305 }
306
307 err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
308 if( err != CL_SUCCESS ){
309 print_error( err, "clGetEventProfilingInfo failed" );
310 clReleaseEvent( executeEvent );
311 clReleaseKernel( kernel[0] );
312 clReleaseProgram( program[0] );
313 clReleaseMemObject( memobjs[2] );
314 clReleaseMemObject( memobjs[1] );
315 clReleaseMemObject( memobjs[0] );
316 return -1;
317 }
318
319 // read output image
320 size_t origin[3] = { 0, 0, 0 };
321 size_t region[3] = { w, h, 1 };
322 err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
323 if( err != CL_SUCCESS ){
324 print_error( err, "clReadImage failed\n" );
325 clReleaseEvent( executeEvent );
326 clReleaseKernel( kernel[0] );
327 clReleaseProgram( program[0] );
328 clReleaseMemObject( memobjs[2] );
329 clReleaseMemObject( memobjs[1] );
330 clReleaseMemObject( memobjs[0] );
331 return -1;
332 }
333
334 // release event, kernel, program, and memory objects
335 clReleaseEvent( executeEvent );
336 clReleaseKernel( kernel[0] );
337 clReleaseProgram( program[0] );
338 clReleaseMemObject( memobjs[2] );
339 clReleaseMemObject( memobjs[1] );
340 clReleaseMemObject( memobjs[0] );
341
342 if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
343 err = -1;
344
345 return err;
346
347 } // end kernelFilter()
348
349
basicFilter(int w,int h,int nChannels,uchar * inptr,uchar * outptr)350 static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
351 {
352 const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
353 int filter_w = 3, filter_h = 3;
354 int x, y;
355
356 for( y = 0; y < h; y++ ){
357 for( x = 0; x < w; x++ ){
358 basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
359 }
360 }
361
362 return 0;
363
364 } // end of basicFilter()
365
366
test_execute(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)367 int test_execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
368 {
369 uchar *inptr;
370 uchar *outptr[2];
371 int w = 256, h = 256;
372 int nChannels = 4;
373 int nElements = w * h * nChannels;
374 int err = 0;
375 MTdata d;
376
377
378 PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
379
380 d = init_genrand( gRandomSeed );
381 inptr = createImage( nElements, d );
382 free_mtdata( d); d = NULL;
383
384 if( ! inptr ){
385 log_error( " unable to allocate %d bytes of memory for image\n", nElements );
386 return -1;
387 }
388
389 outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
390 if( ! outptr[0] ){
391 log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
392 free( (void *)inptr );
393 return -1;
394 }
395
396 outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
397 if( ! outptr[1] ){
398 log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
399 free( (void *)outptr[0] );
400 free( (void *)inptr );
401 return -1;
402 }
403
404 err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
405
406 if( ! err ){
407 basicFilter( w, h, nChannels, inptr, outptr[1] );
408
409 // verify that the images are the same
410 err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
411 if( err )
412 log_error( " images do not match\n" );
413 }
414
415 // clean up
416 free( (void *)outptr[1] );
417 free( (void *)outptr[0] );
418 free( (void *)inptr );
419
420 return err;
421
422 } // end execute()
423
424
425
426