1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <time.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 #include "procs.h"
25 #include "harness/testHarness.h"
26 #include "harness/errorHelpers.h"
27 
28 #ifndef uchar
29 typedef unsigned char uchar;
30 #endif
31 
32 #undef MIN
33 #define MIN(x,y)    ( (x) < (y) ? (x) : (y) )
34 
35 #undef MAX
36 #define MAX(x,y)    ( (x) > (y) ? (x) : (y) )
37 
38 //#define CREATE_OUTPUT    1
39 
40 extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
41 
42 
43 
44 //--- the code for kernel executables
45 static const char *image_filter_src =
46 "constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
47 "\n"
48 "__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
49 "              read_only image2d_t src_image, write_only image2d_t dst_image )\n"
50 "{\n"
51 "   int     i, j;\n"
52 "   int     indx = 0;\n"
53 "    int        tid_x = get_global_id(0);\n"
54 "    int        tid_y = get_global_id(1);\n"
55 "    float4  filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
56 "\n"
57 "    for (i=-m/2; i<(m+1)/2; i++){\n"
58 "        for (j=-n/2; j<(n+1)/2; j++){\n"
59 "            float   w = filter_weights[indx++];\n"
60 "\n"
61 "            if (w != 0.0f){\n"
62 "                filter_result += w * read_imagef(src_image, sampler,\n"
63 "                                                 (int2)(tid_x + j, tid_y + i));\n"
64 "            }\n"
65 "        }\n"
66 "    }\n"
67 "\n"
68 "    write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
69 "}\n";
70 
71 
72 //--- equivalent non-kernel code
read_imagef(int x,int y,int w,int h,int nChannels,uchar * src,float * srcRgb)73 static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
74 {
75     // clamp the coords
76     int    x0 = MIN( MAX( x, 0 ), w - 1 );
77     int    y0 = MIN( MAX( y, 0 ), h - 1 );
78 
79     // get tine index
80     int    indx = ( y0 * w + x0 ) * nChannels;
81 
82     // seed the return array
83     int    i;
84     for( i = 0; i < nChannels; i++ ){
85         srcRgb[i] = (float)src[indx+i];
86     }
87 }    // end read_imagef()
88 
89 
write_imagef(uchar * dst,int x,int y,int w,int h,int nChannels,float * dstRgb)90 static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
91 {
92     // get tine index
93     int    indx = ( y * w + x ) * nChannels;
94 
95     // seed the return array
96     int    i;
97     for( i = 0; i < nChannels; i++ ){
98         dst[indx+i] = (uchar)dstRgb[i];
99     }
100 }    // end write_imagef()
101 
102 
basicFilterPixel(int x,int y,int n,int m,int xsize,int ysize,int nChannels,const float * filter_weights,uchar * src,uchar * dst)103 static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
104 {
105     int        i, j, k;
106     int        indx = 0;
107     float    filter_result[] = { 0.f, 0.f, 0.f, 0.f };
108     float    srcRgb[4];
109 
110     for( i = -m/2; i < (m+1)/2; i++ ){
111         for( j = -n/2; j < (n+1)/2; j++ ){
112             float    w = filter_weights[indx++];
113 
114             if( w != 0 ){
115                 read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
116                 for( k = 0; k < nChannels; k++ ){
117                     filter_result[k] += w * srcRgb[k];
118                 }
119             }
120         }
121     }
122 
123     write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
124 
125 }    // end basicFilterPixel()
126 
127 
128 //--- helper functions
createImage(int elements,MTdata d)129 static uchar *createImage( int elements, MTdata d)
130 {
131     int        i;
132     uchar    *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
133     if( ! ptr )
134         return NULL;
135 
136     for( i = 0; i < elements; i++ ){
137         ptr[i] = (uchar)genrand_int32(d);
138     }
139 
140     return ptr;
141 
142 }    // end createImage()
143 
144 
verifyImages(uchar * ptr0,uchar * ptr1,uchar tolerance,int xsize,int ysize,int nChannels)145 static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
146 {
147     int        x, y, z;
148     uchar    *p0 = ptr0;
149     uchar    *p1 = ptr1;
150 
151     for( y = 0; y < ysize; y++ ){
152         for( x = 0; x < xsize; x++ ){
153             for( z = 0; z < nChannels; z++ ){
154                 if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
155                     log_error( "  images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
156                               (int)p0[-1], (int)p1[-1] );
157                     return -1;
158                 }
159             }
160         }
161     }
162 
163     return 0;
164 
165 }    // end verifyImages()
166 
167 
kernelFilter(cl_device_id device,cl_context context,cl_command_queue queue,int w,int h,int nChannels,uchar * inptr,uchar * outptr)168 static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
169                          uchar *inptr, uchar *outptr )
170 {
171     cl_program            program[1];
172     cl_kernel            kernel[1];
173     cl_mem                memobjs[3];
174     cl_image_format        image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
175     cl_event            executeEvent;
176     cl_ulong    queueStart, submitStart, writeStart, writeEnd;
177     size_t                threads[2];
178     float                filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
179     int                    filter_w = 3, filter_h = 3;
180     int                    err = 0;
181 
182     // set thread dimensions
183     threads[0] = w;
184     threads[1] = h;
185 
186     // allocate the input and output image memory objects
187     memobjs[0] =
188         create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
189                         &image_format_desc, w, h, 0, inptr, &err);
190     if( memobjs[0] == (cl_mem)0 ){
191         log_error( " unable to create 2D image using create_image_2d\n" );
192         return -1;
193     }
194 
195     memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
196     if( memobjs[1] == (cl_mem)0 ){
197         log_error( " unable to create 2D image using create_image_2d\n" );
198         clReleaseMemObject( memobjs[0] );
199         return -1;
200     }
201 
202     // allocate an array memory object to load the filter weights
203     memobjs[2] = clCreateBuffer(
204         context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
205         sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
206     if( memobjs[2] == (cl_mem)0 ){
207         log_error( " unable to create array using clCreateBuffer\n" );
208         clReleaseMemObject( memobjs[1] );
209         clReleaseMemObject( memobjs[0] );
210         return -1;
211     }
212 
213     // create the compute program
214     err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
215     if( err ){
216         clReleaseMemObject( memobjs[2] );
217         clReleaseMemObject( memobjs[1] );
218         clReleaseMemObject( memobjs[0] );
219         return -1;
220     }
221 
222 
223     // create kernel args object and set arg values.
224     // set the args values
225     err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
226     err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
227     err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
228     err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
229     err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
230 
231     if( err != CL_SUCCESS ){
232         print_error( err, "clSetKernelArg failed\n" );
233         clReleaseKernel( kernel[0] );
234         clReleaseProgram( program[0] );
235         clReleaseMemObject( memobjs[2] );
236         clReleaseMemObject( memobjs[1] );
237         clReleaseMemObject( memobjs[0] );
238         return -1;
239     }
240 
241     err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
242 
243     if( err != CL_SUCCESS ){
244         print_error( err, "clEnqueueNDRangeKernel failed\n" );
245     clReleaseEvent( executeEvent );
246         clReleaseKernel( kernel[0] );
247         clReleaseProgram( program[0] );
248         clReleaseMemObject( memobjs[2] );
249         clReleaseMemObject( memobjs[1] );
250         clReleaseMemObject( memobjs[0] );
251         return -1;
252     }
253 
254     // This synchronization point is needed in order to assume the data is valid.
255     // Getting profiling information is not a synchronization point.
256     err = clWaitForEvents( 1, &executeEvent );
257     if( err != CL_SUCCESS )
258     {
259     clReleaseEvent( executeEvent );
260         clReleaseKernel( kernel[0] );
261         clReleaseProgram( program[0] );
262         clReleaseMemObject( memobjs[2] );
263         clReleaseMemObject( memobjs[1] );
264         clReleaseMemObject( memobjs[0] );
265         return -1;
266     }
267 
268     // test profiling
269     while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
270           CL_PROFILING_INFO_NOT_AVAILABLE );
271     if( err != CL_SUCCESS ){
272         print_error( err, "clGetEventProfilingInfo failed" );
273     clReleaseEvent( executeEvent );
274         clReleaseKernel( kernel[0] );
275         clReleaseProgram( program[0] );
276         clReleaseMemObject( memobjs[2] );
277         clReleaseMemObject( memobjs[1] );
278         clReleaseMemObject( memobjs[0] );
279         return -1;
280     }
281 
282     while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
283           CL_PROFILING_INFO_NOT_AVAILABLE );
284     if( err != CL_SUCCESS ){
285         print_error( err, "clGetEventProfilingInfo failed" );
286     clReleaseEvent( executeEvent );
287         clReleaseKernel( kernel[0] );
288         clReleaseProgram( program[0] );
289         clReleaseMemObject( memobjs[2] );
290         clReleaseMemObject( memobjs[1] );
291         clReleaseMemObject( memobjs[0] );
292         return -1;
293     }
294 
295     err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
296     if( err != CL_SUCCESS ){
297         print_error( err, "clGetEventProfilingInfo failed" );
298     clReleaseEvent( executeEvent );
299         clReleaseKernel( kernel[0] );
300         clReleaseProgram( program[0] );
301         clReleaseMemObject( memobjs[2] );
302         clReleaseMemObject( memobjs[1] );
303         clReleaseMemObject( memobjs[0] );
304         return -1;
305     }
306 
307     err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
308     if( err != CL_SUCCESS ){
309         print_error( err, "clGetEventProfilingInfo failed" );
310     clReleaseEvent( executeEvent );
311         clReleaseKernel( kernel[0] );
312         clReleaseProgram( program[0] );
313         clReleaseMemObject( memobjs[2] );
314         clReleaseMemObject( memobjs[1] );
315         clReleaseMemObject( memobjs[0] );
316         return -1;
317     }
318 
319     // read output image
320     size_t origin[3] = { 0, 0, 0 };
321     size_t region[3] = { w, h, 1 };
322     err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
323     if( err != CL_SUCCESS ){
324         print_error( err, "clReadImage failed\n" );
325     clReleaseEvent( executeEvent );
326         clReleaseKernel( kernel[0] );
327         clReleaseProgram( program[0] );
328         clReleaseMemObject( memobjs[2] );
329         clReleaseMemObject( memobjs[1] );
330         clReleaseMemObject( memobjs[0] );
331         return -1;
332     }
333 
334     // release event, kernel, program, and memory objects
335   clReleaseEvent( executeEvent );
336     clReleaseKernel( kernel[0] );
337     clReleaseProgram( program[0] );
338     clReleaseMemObject( memobjs[2] );
339     clReleaseMemObject( memobjs[1] );
340     clReleaseMemObject( memobjs[0] );
341 
342   if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
343     err = -1;
344 
345     return err;
346 
347 }    // end kernelFilter()
348 
349 
basicFilter(int w,int h,int nChannels,uchar * inptr,uchar * outptr)350 static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
351 {
352     const float    filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
353     int            filter_w = 3, filter_h = 3;
354     int            x, y;
355 
356     for( y = 0; y < h; y++ ){
357         for( x = 0; x < w; x++ ){
358             basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
359         }
360     }
361 
362     return 0;
363 
364 }    // end of basicFilter()
365 
366 
test_execute(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)367 int test_execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
368 {
369     uchar    *inptr;
370     uchar    *outptr[2];
371     int        w = 256, h = 256;
372     int        nChannels = 4;
373     int        nElements = w * h * nChannels;
374     int        err = 0;
375     MTdata  d;
376 
377 
378     PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
379 
380     d = init_genrand( gRandomSeed );
381     inptr = createImage( nElements, d );
382     free_mtdata( d);    d = NULL;
383 
384     if( ! inptr ){
385         log_error( " unable to allocate %d bytes of memory for image\n", nElements );
386         return -1;
387     }
388 
389     outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
390     if( ! outptr[0] ){
391         log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
392         free( (void *)inptr );
393         return -1;
394     }
395 
396     outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
397     if( ! outptr[1] ){
398         log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
399         free( (void *)outptr[0] );
400         free( (void *)inptr );
401         return -1;
402     }
403 
404     err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
405 
406     if( ! err ){
407         basicFilter( w, h, nChannels, inptr, outptr[1] );
408 
409         // verify that the images are the same
410         err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
411         if( err )
412             log_error( " images do not match\n" );
413     }
414 
415     // clean up
416     free( (void *)outptr[1] );
417     free( (void *)outptr[0] );
418     free( (void *)inptr );
419 
420     return err;
421 
422 }    // end execute()
423 
424 
425 
426