1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "cl_utils.h"
17 #include <stdlib.h>
18 
19 #if !defined (_WIN32)
20 #include <sys/mman.h>
21 #endif
22 
23 #include "test_config.h"
24 #include "string.h"
25 #include "harness/kernelHelpers.h"
26 
27 #include "harness/testHarness.h"
28 
29 #define HALF_MIN 1.0p-14
30 
31 
32 const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
33 const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
34 const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
35 const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
36 
37 
38 void            *gIn_half = NULL;
39 void            *gOut_half = NULL;
40 void            *gOut_half_reference = NULL;
41 void            *gOut_half_reference_double = NULL;
42 void            *gIn_single = NULL;
43 void            *gOut_single = NULL;
44 void            *gOut_single_reference = NULL;
45 void            *gIn_double = NULL;
46 // void            *gOut_double = NULL;
47 // void            *gOut_double_reference = NULL;
48 cl_mem          gInBuffer_half = NULL;
49 cl_mem          gOutBuffer_half = NULL;
50 cl_mem          gInBuffer_single = NULL;
51 cl_mem          gOutBuffer_single = NULL;
52 cl_mem          gInBuffer_double = NULL;
53 // cl_mem          gOutBuffer_double = NULL;
54 
55 cl_context       gContext = NULL;
56 cl_command_queue gQueue = NULL;
57 uint32_t        gDeviceFrequency = 0;
58 uint32_t        gComputeDevices = 0;
59 size_t          gMaxThreadGroupSize = 0;
60 size_t          gWorkGroupSize = 0;
61 bool            gWimpyMode = false;
62 int             gWimpyReductionFactor = 512;
63 int             gTestDouble = 0;
64 
65 #if defined( __APPLE__ )
66 int             gReportTimes = 1;
67 #else
68 int             gReportTimes = 0;
69 #endif
70 
71 #pragma mark -
72 
InitCL(cl_device_id device)73 test_status InitCL( cl_device_id device )
74 {
75     size_t configSize = sizeof( gComputeDevices );
76     int error;
77 
78 #if MULTITHREAD
79     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS,  configSize, &gComputeDevices, NULL )) )
80 #endif
81     gComputeDevices = 1;
82 
83     configSize = sizeof( gMaxThreadGroupSize );
84     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize,  NULL )) )
85         gMaxThreadGroupSize = 1;
86 
87     // Use only one-eighth the work group size
88     if (gMaxThreadGroupSize > 8)
89         gWorkGroupSize = gMaxThreadGroupSize / 8;
90     else
91         gWorkGroupSize = gMaxThreadGroupSize;
92 
93     configSize = sizeof( gDeviceFrequency );
94     if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency,  NULL )) )
95         gDeviceFrequency = 1;
96 
97     // Check extensions
98     int hasDouble = is_extension_available(device, "cl_khr_fp64");
99     gTestDouble ^= hasDouble;
100 
101     //detect whether profile of the device is embedded
102     char profile[64] = "";
103     if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
104     {
105         vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
106     }
107     else if( strstr(profile, "EMBEDDED_PROFILE" ) )
108     {
109         gIsEmbedded = 1;
110     }
111 
112     vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
113     vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
114 
115     gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
116     if( NULL == gContext )
117     {
118         vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
119         return TEST_FAIL;
120     }
121 
122     gQueue = clCreateCommandQueue(gContext, device, 0, &error);
123     if( NULL == gQueue )
124     {
125         vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
126         return TEST_FAIL;
127     }
128 
129 #if defined( __APPLE__ )
130     // FIXME: use clProtectedArray
131 #endif
132     //Allocate buffers
133     gIn_half   = malloc( getBufferSize(device)/2  );
134     gOut_half = malloc( BUFFER_SIZE/2  );
135     gOut_half_reference = malloc( BUFFER_SIZE/2  );
136     gOut_half_reference_double = malloc( BUFFER_SIZE/2  );
137     gIn_single   = malloc( BUFFER_SIZE );
138     gOut_single = malloc( getBufferSize(device)  );
139     gOut_single_reference = malloc( getBufferSize(device)  );
140     gIn_double   = malloc( 2*BUFFER_SIZE  );
141     // gOut_double = malloc( (2*getBufferSize(device))  );
142     // gOut_double_reference = malloc( (2*getBufferSize(device))  );
143 
144     if ( NULL == gIn_half ||
145      NULL == gOut_half ||
146      NULL == gOut_half_reference ||
147      NULL == gOut_half_reference_double ||
148          NULL == gIn_single ||
149      NULL == gOut_single ||
150      NULL == gOut_single_reference ||
151          NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
152          )
153         return TEST_FAIL;
154 
155     gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(device) / 2, NULL, &error);
156     if( gInBuffer_half == NULL )
157     {
158         vlog_error( "clCreateArray failed for input (%d)\n", error );
159         return TEST_FAIL;
160     }
161 
162     gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
163     if( gInBuffer_single == NULL )
164     {
165         vlog_error( "clCreateArray failed for input (%d)\n", error );
166         return TEST_FAIL;
167     }
168 
169     gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
170     if( gInBuffer_double == NULL )
171     {
172         vlog_error( "clCreateArray failed for input (%d)\n", error );
173         return TEST_FAIL;
174     }
175 
176     gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
177     if( gOutBuffer_half == NULL )
178     {
179         vlog_error( "clCreateArray failed for output (%d)\n", error );
180         return TEST_FAIL;
181     }
182 
183     gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(device), NULL, &error );
184     if( gOutBuffer_single == NULL )
185     {
186         vlog_error( "clCreateArray failed for output (%d)\n", error );
187         return TEST_FAIL;
188     }
189 
190 #if 0
191     gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(device)), NULL, &error );
192     if( gOutBuffer_double == NULL )
193     {
194         vlog_error( "clCreateArray failed for output (%d)\n", error );
195         return TEST_FAIL;
196     }
197 #endif
198 
199     char string[16384];
200     vlog( "\nCompute Device info:\n" );
201     error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(string), string, NULL);
202     vlog( "\tDevice Name: %s\n", string );
203     error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
204     vlog( "\tVendor: %s\n", string );
205     error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(string), string, NULL);
206     vlog( "\tDevice Version: %s\n", string );
207     error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
208     vlog( "\tOpenCL C Version: %s\n", string );
209     error = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(string), string, NULL);
210     vlog( "\tDriver Version: %s\n", string );
211     vlog( "\tProcessing with %d devices\n", gComputeDevices );
212     vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
213     vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
214     vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
215 
216     return TEST_PASS;
217 }
218 
MakeProgram(cl_device_id device,const char * source[],int count)219 cl_program MakeProgram( cl_device_id device, const char *source[], int count )
220 {
221     int error;
222     int i;
223 
224     //create the program
225     cl_program program;
226     error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
227     if( NULL == program )
228     {
229         vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
230         return NULL;
231     }
232 
233     // build it
234     if( (error = clBuildProgram( program, 1, &device, NULL, NULL, NULL )) )
235     {
236         size_t  len;
237         char    buffer[16384];
238 
239         vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
240         clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
241         vlog_error("Log: %s\n", buffer);
242         vlog_error("Source :\n");
243         for(i = 0; i < count; ++i) {
244             vlog_error("%s", source[i]);
245         }
246         vlog_error("\n");
247 
248         clReleaseProgram( program );
249         return NULL;
250     }
251 
252     return program;
253 }
254 
ReleaseCL(void)255 void ReleaseCL(void)
256 {
257     clReleaseMemObject(gInBuffer_half);
258     clReleaseMemObject(gOutBuffer_half);
259     clReleaseMemObject(gInBuffer_single);
260     clReleaseMemObject(gOutBuffer_single);
261     clReleaseMemObject(gInBuffer_double);
262     // clReleaseMemObject(gOutBuffer_double);
263     clReleaseCommandQueue(gQueue);
264     clReleaseContext(gContext);
265 
266     free(gIn_half);
267     free(gOut_half);
268     free(gOut_half_reference);
269     free(gOut_half_reference_double);
270     free(gIn_single);
271     free(gOut_single);
272     free(gOut_single_reference);
273     free(gIn_double);
274 }
275 
numVecs(cl_uint count,int vectorSizeIdx,bool aligned)276 cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
277     if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
278         return count/4;
279     }
280     return  (count + g_arrVecSizes[vectorSizeIdx] - 1)/
281     ( (g_arrVecSizes[vectorSizeIdx]) );
282 }
283 
runsOverBy(cl_uint count,int vectorSizeIdx,bool aligned)284 cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
285     if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
286     return count% (g_arrVecSizes[vectorSizeIdx]);
287 }
288 
printSource(const char * src[],int len)289 void printSource(const char * src[], int len) {
290     int i;
291     for(i = 0; i < len; ++i) {
292         vlog("%s", src[i]);
293     }
294 }
295 
RunKernel(cl_device_id device,cl_kernel kernel,void * inBuf,void * outBuf,uint32_t blockCount,int extraArg)296 int RunKernel( cl_device_id device, cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
297 {
298     size_t localCount = blockCount;
299     size_t wg_size;
300     int error;
301 
302     error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
303     error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
304 
305     if(extraArg >= 0) {
306         error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
307     }
308 
309     if( error )
310     {
311         vlog_error( "FAILED -- could not set kernel args\n" );
312         return -3;
313     }
314 
315     error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
316     if (error)
317     {
318         vlog_error( "FAILED -- could not get kernel work group info\n" );
319         return -4;
320     }
321 
322     wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
323     while( localCount % wg_size )
324         wg_size--;
325 
326     if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
327     {
328         vlog_error( "FAILED -- could not execute kernel\n" );
329         return -5;
330     }
331 
332     return 0;
333 }
334 
335 #if defined (__APPLE__ )
336 
337 #include <mach/mach_time.h>
338 
ReadTime(void)339 uint64_t ReadTime( void )
340 {
341     return mach_absolute_time();        // returns time since boot.  Ticks have better than microsecond precsion.
342 }
343 
SubtractTime(uint64_t endTime,uint64_t startTime)344 double SubtractTime( uint64_t endTime, uint64_t startTime )
345 {
346     static double conversion = 0.0;
347 
348     if(  0.0 == conversion )
349     {
350         mach_timebase_info_data_t   info;
351         kern_return_t err = mach_timebase_info( &info );
352         if( 0 == err )
353             conversion = 1e-9 * (double) info.numer / (double) info.denom;
354     }
355 
356     return (double) (endTime - startTime) * conversion;
357 }
358 
359 #elif defined( _WIN32 ) && defined (_MSC_VER)
360 
361 // functions are defined in compat.h
362 
363 #else
364 
365 //
366 //  Please feel free to substitute your own timing facility here.
367 //
368 
369 #warning  Times are meaningless. No timing facility in place for this platform.
ReadTime(void)370 uint64_t ReadTime( void )
371 {
372     return 0ULL;
373 }
374 
375 // return the difference between two times obtained from ReadTime in seconds
SubtractTime(uint64_t endTime,uint64_t startTime)376 double SubtractTime( uint64_t endTime, uint64_t startTime )
377 {
378     return INFINITY;
379 }
380 
381 #endif
382 
getBufferSize(cl_device_id device_id)383 size_t getBufferSize(cl_device_id device_id)
384 {
385     static int s_initialized = 0;
386     static cl_device_id s_device_id;
387     static cl_ulong s_result = 64*1024;
388 
389     if(s_initialized == 0 || s_device_id != device_id)
390     {
391         cl_ulong result, maxGlobalSize;
392         cl_int err = clGetDeviceInfo (device_id,
393                                       CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
394                                       sizeof(result), (void *)&result,
395                                       NULL);
396         if(err)
397         {
398             vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n");
399             s_result = 64*1024;
400             goto exit;
401         }
402         if (result > BUFFER_SIZE)
403             result = BUFFER_SIZE;
404         log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result);
405         err = clGetDeviceInfo (device_id,
406                                CL_DEVICE_GLOBAL_MEM_SIZE,
407                                sizeof(maxGlobalSize), (void *)&maxGlobalSize,
408                                NULL);
409         if(err)
410         {
411             vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n");
412             goto exit;
413         }
414         result = result / 2;
415         if(maxGlobalSize < result * 10)
416             result = result / 10;
417         s_initialized = 1;
418         s_device_id = device_id;
419         s_result = result;
420     }
421 
422 exit:
423     if( s_result > SIZE_MAX )
424     {
425         vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
426         fflush(stdout);
427         abort();
428     }
429 
430     return (size_t) s_result;
431 }
432 
getBufferCount(cl_device_id device_id,size_t vecSize,size_t typeSize)433 cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
434 {
435     cl_ulong tmp = getBufferSize(device_id);
436     if(vecSize == 3)
437     {
438         return tmp/(cl_ulong)(4*typeSize);
439     }
440     return tmp/(cl_ulong)(vecSize*typeSize);
441 }
442