1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/testHarness.h"
19 #include <ctype.h>
20 #include <string.h>
21
22 const char *sample_single_param_kernel[] = {
23 "__kernel void sample_test(__global int *src)\n"
24 "{\n"
25 " int tid = get_global_id(0);\n"
26 "\n"
27 "}\n" };
28
29 const char *sample_single_param_write_kernel[] = {
30 "__kernel void sample_test(__global int *src)\n"
31 "{\n"
32 " int tid = get_global_id(0);\n"
33 " src[tid] = tid;\n"
34 "\n"
35 "}\n" };
36
37 const char *sample_read_image_kernel_pattern[] = {
38 "__kernel void sample_test( __global float *result, ", " )\n"
39 "{\n"
40 " sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;\n"
41 " int tid = get_global_id(0);\n"
42 " result[0] = 0.0f;\n",
43 "\n"
44 "}\n" };
45
46 const char *sample_write_image_kernel_pattern[] = {
47 "__kernel void sample_test( ", " )\n"
48 "{\n"
49 " int tid = get_global_id(0);\n",
50 "\n"
51 "}\n" };
52
53
54 const char *sample_large_parmam_kernel_pattern[] = {
55 "__kernel void sample_test(%s, __global long *result)\n"
56 "{\n"
57 "result[0] = 0;\n"
58 "%s"
59 "\n"
60 "}\n" };
61
62 const char *sample_large_int_parmam_kernel_pattern[] = {
63 "__kernel void sample_test(%s, __global int *result)\n"
64 "{\n"
65 "result[0] = 0;\n"
66 "%s"
67 "\n"
68 "}\n" };
69
70 const char *sample_sampler_kernel_pattern[] = {
71 "__kernel void sample_test( read_only image2d_t src, __global int4 *dst", ", sampler_t sampler%d", ")\n"
72 "{\n"
73 " int tid = get_global_id(0);\n",
74 " dst[ 0 ] = read_imagei( src, sampler%d, (int2)( 0, 0 ) );\n",
75 "\n"
76 "}\n" };
77
78 const char *sample_const_arg_kernel[] = {
79 "__kernel void sample_test(__constant int *src1, __global int *dst)\n"
80 "{\n"
81 " int tid = get_global_id(0);\n"
82 "\n"
83 " dst[tid] = src1[tid];\n"
84 "\n"
85 "}\n" };
86
87 const char *sample_local_arg_kernel[] = {
88 "__kernel void sample_test(__local int *src1, __global int *global_src, __global int *dst)\n"
89 "{\n"
90 " int tid = get_global_id(0);\n"
91 "\n"
92 " src1[tid] = global_src[tid];\n"
93 " barrier(CLK_GLOBAL_MEM_FENCE);\n"
94 " dst[tid] = src1[tid];\n"
95 "\n"
96 "}\n" };
97
98 const char *sample_const_max_arg_kernel_pattern =
99 "__kernel void sample_test(__constant int *src1 %s, __global int *dst)\n"
100 "{\n"
101 " int tid = get_global_id(0);\n"
102 "\n"
103 " dst[tid] = src1[tid];\n"
104 "%s"
105 "\n"
106 "}\n";
107
test_min_max_thread_dimensions(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)108 int test_min_max_thread_dimensions(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
109 {
110 int error, retVal;
111 unsigned int maxThreadDim, threadDim, i;
112 clProgramWrapper program;
113 clKernelWrapper kernel;
114 clMemWrapper streams[1];
115 size_t *threads, *localThreads;
116 cl_event event;
117 cl_int event_status;
118
119
120 /* Get the max thread dimensions */
121 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxThreadDim ), &maxThreadDim, NULL );
122 test_error( error, "Unable to get max work item dimensions from device" );
123
124 if( maxThreadDim < 3 )
125 {
126 log_error( "ERROR: Reported max work item dimensions is less than required! (%d)\n", maxThreadDim );
127 return -1;
128 }
129
130 log_info("Reported max thread dimensions of %d.\n", maxThreadDim);
131
132 /* Create a kernel to test with */
133 if( create_single_kernel_helper( context, &program, &kernel, 1, sample_single_param_kernel, "sample_test" ) != 0 )
134 {
135 return -1;
136 }
137
138 /* Create some I/O streams */
139 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE,
140 sizeof(cl_int) * 100, NULL, &error);
141 if( streams[0] == NULL )
142 {
143 log_error("ERROR: Creating test array failed!\n");
144 return -1;
145 }
146
147 /* Set the arguments */
148 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
149 test_error( error, "Unable to set kernel arguments" );
150
151 retVal = 0;
152
153 /* Now try running the kernel with up to that many threads */
154 for (threadDim=1; threadDim <= maxThreadDim; threadDim++)
155 {
156 threads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
157 localThreads = (size_t *)malloc( sizeof( size_t ) * maxThreadDim );
158 for( i = 0; i < maxThreadDim; i++ )
159 {
160 threads[ i ] = 1;
161 localThreads[i] = 1;
162 }
163
164 error = clEnqueueNDRangeKernel( queue, kernel, maxThreadDim, NULL, threads, localThreads, 0, NULL, &event );
165 test_error( error, "Failed clEnqueueNDRangeKernel");
166
167 // Verify that the event does not return an error from the execution
168 error = clWaitForEvents(1, &event);
169 test_error( error, "clWaitForEvent failed");
170 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
171 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
172 clReleaseEvent(event);
173 if (event_status < 0)
174 test_error(error, "Kernel execution event returned error");
175
176 /* All done */
177 free( threads );
178 free( localThreads );
179 }
180
181 return retVal;
182 }
183
184
test_min_max_work_items_sizes(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)185 int test_min_max_work_items_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
186 {
187 int error;
188 size_t *deviceMaxWorkItemSize;
189 unsigned int maxWorkItemDim;
190
191 /* Get the max work item dimensions */
192 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof( maxWorkItemDim ), &maxWorkItemDim, NULL );
193 test_error( error, "Unable to get max work item dimensions from device" );
194
195 log_info("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS returned %d\n", maxWorkItemDim);
196 deviceMaxWorkItemSize = (size_t*)malloc(sizeof(size_t)*maxWorkItemDim);
197 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t)*maxWorkItemDim, deviceMaxWorkItemSize, NULL );
198 test_error( error, "clDeviceInfo for CL_DEVICE_MAX_WORK_ITEM_SIZES failed" );
199
200 unsigned int i;
201 int errors = 0;
202 for(i=0; i<maxWorkItemDim; i++) {
203 if (deviceMaxWorkItemSize[i]<1) {
204 log_error("MAX_WORK_ITEM_SIZE in dimension %d is invalid: %lu\n", i, deviceMaxWorkItemSize[i]);
205 errors++;
206 } else {
207 log_info("Dimension %d has max work item size %lu\n", i, deviceMaxWorkItemSize[i]);
208 }
209 }
210
211 free(deviceMaxWorkItemSize);
212
213 if (errors)
214 return -1;
215 return 0;
216 }
217
218
219
test_min_max_work_group_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)220 int test_min_max_work_group_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
221 {
222 int error;
223 size_t deviceMaxThreadSize;
224
225 /* Get the max thread dimensions */
226 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof( deviceMaxThreadSize ), &deviceMaxThreadSize, NULL );
227 test_error( error, "Unable to get max work group size from device" );
228
229 log_info("Reported %ld max device work group size.\n", deviceMaxThreadSize);
230
231 if( deviceMaxThreadSize == 0 )
232 {
233 log_error( "ERROR: Max work group size is reported as zero!\n" );
234 return -1;
235 }
236 return 0;
237 }
238
test_min_max_read_image_args(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)239 int test_min_max_read_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
240 {
241 int error;
242 unsigned int maxReadImages, i;
243 unsigned int deviceAddressSize;
244 clProgramWrapper program;
245 char readArgLine[128], *programSrc;
246 const char *readArgPattern = ", read_only image2d_t srcimg%d";
247 clKernelWrapper kernel;
248 clMemWrapper *streams, result;
249 size_t threads[2];
250 cl_image_format image_format_desc;
251 size_t maxParameterSize;
252 cl_event event;
253 cl_int event_status;
254 cl_float image_data[4*4];
255 float image_result = 0.0f;
256 float actual_image_result;
257 cl_uint minRequiredReadImages = gIsEmbedded ? 8 : 128;
258 cl_device_type deviceType;
259
260 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
261 image_format_desc.image_channel_order = CL_RGBA;
262 image_format_desc.image_channel_data_type = CL_FLOAT;
263
264 /* Get the max read image arg count */
265 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_READ_IMAGE_ARGS, sizeof( maxReadImages ), &maxReadImages, NULL );
266 test_error( error, "Unable to get max read image arg count from device" );
267
268 if( maxReadImages < minRequiredReadImages )
269 {
270 log_error( "ERROR: Reported max read image arg count is less than required! (%d)\n", maxReadImages );
271 return -1;
272 }
273
274 log_info("Reported %d max read image args.\n", maxReadImages);
275
276 error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( deviceAddressSize ), &deviceAddressSize, NULL );
277 test_error( error, "Unable to query CL_DEVICE_ADDRESS_BITS for device" );
278 deviceAddressSize /= 8; // convert from bits to bytes
279
280
281 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
282 test_error( error, "Unable to get max parameter size from device" );
283
284 if (!gIsEmbedded && maxReadImages >= 128 && maxParameterSize == 1024)
285 {
286 error = clGetDeviceInfo( deviceID, CL_DEVICE_TYPE, sizeof( deviceType ), &deviceType, NULL );
287 test_error( error, "Unable to get device type from device" );
288
289 if(deviceType != CL_DEVICE_TYPE_CUSTOM)
290 {
291 maxReadImages = 127;
292 }
293 }
294 // Subtract the size of the result
295 maxParameterSize -= deviceAddressSize;
296
297 // Calculate the number we can use
298 if (maxParameterSize/deviceAddressSize < maxReadImages) {
299 log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/deviceAddressSize));
300 maxReadImages = (unsigned int)(maxParameterSize/deviceAddressSize);
301 }
302
303 /* Create a program with that many read args */
304 programSrc = (char *)malloc( strlen( sample_read_image_kernel_pattern[ 0 ] ) + ( strlen( readArgPattern ) + 6 ) * ( maxReadImages ) +
305 strlen( sample_read_image_kernel_pattern[ 1 ] ) + 1 + 40240);
306
307 strcpy( programSrc, sample_read_image_kernel_pattern[ 0 ] );
308 strcat( programSrc, "read_only image2d_t srcimg0" );
309 for( i = 0; i < maxReadImages-1; i++ )
310 {
311 sprintf( readArgLine, readArgPattern, i+1 );
312 strcat( programSrc, readArgLine );
313 }
314 strcat( programSrc, sample_read_image_kernel_pattern[ 1 ] );
315 for ( i = 0; i < maxReadImages; i++) {
316 sprintf( readArgLine, "\tresult[0] += read_imagef( srcimg%d, sampler, (int2)(0,0)).x;\n", i);
317 strcat( programSrc, readArgLine );
318 }
319 strcat( programSrc, sample_read_image_kernel_pattern[ 2 ] );
320
321 error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
322 test_error( error, "Failed to create the program and kernel.");
323 free( programSrc );
324
325 result = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_float), NULL,
326 &error);
327 test_error( error, "clCreateBufer failed");
328
329 /* Create some I/O streams */
330 streams = new clMemWrapper[maxReadImages + 1];
331 for( i = 0; i < maxReadImages; i++ )
332 {
333 image_data[0]=i;
334 image_result+= image_data[0];
335 streams[i] = create_image_2d( context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, &image_format_desc, 4, 4, 0, image_data, &error );
336 test_error( error, "Unable to allocate test image" );
337 }
338
339 error = clSetKernelArg( kernel, 0, sizeof( result ), &result );
340 test_error( error, "Unable to set kernel arguments" );
341
342 /* Set the arguments */
343 for( i = 1; i < maxReadImages+1; i++ )
344 {
345 error = clSetKernelArg( kernel, i, sizeof( streams[i-1] ), &streams[i-1] );
346 test_error( error, "Unable to set kernel arguments" );
347 }
348
349 /* Now try running the kernel */
350 threads[0] = threads[1] = 1;
351 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
352 test_error( error, "clEnqueueNDRangeKernel failed");
353
354 // Verify that the event does not return an error from the execution
355 error = clWaitForEvents(1, &event);
356 test_error( error, "clWaitForEvent failed");
357 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
358 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
359 clReleaseEvent(event);
360 if (event_status < 0)
361 test_error(error, "Kernel execution event returned error");
362
363 error = clEnqueueReadBuffer(queue, result, CL_TRUE, 0, sizeof(cl_float), &actual_image_result, 0, NULL, NULL);
364 test_error(error, "clEnqueueReadBuffer failed");
365
366 delete[] streams;
367
368 if (actual_image_result != image_result) {
369 log_error("Result failed to verify. Got %g, expected %g.\n", actual_image_result, image_result);
370 return 1;
371 }
372
373 return 0;
374 }
375
test_min_max_write_image_args(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)376 int test_min_max_write_image_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
377 {
378 int error;
379 unsigned int maxWriteImages, i;
380 clProgramWrapper program;
381 char writeArgLine[128], *programSrc;
382 const char *writeArgPattern = ", write_only image2d_t dstimg%d";
383 clKernelWrapper kernel;
384 clMemWrapper *streams;
385 size_t threads[2];
386 cl_image_format image_format_desc;
387 size_t maxParameterSize;
388 cl_event event;
389 cl_int event_status;
390 cl_uint minRequiredWriteImages = gIsEmbedded ? 1 : 8;
391
392
393 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
394 image_format_desc.image_channel_order = CL_RGBA;
395 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
396
397 /* Get the max read image arg count */
398 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, sizeof( maxWriteImages ), &maxWriteImages, NULL );
399 test_error( error, "Unable to get max write image arg count from device" );
400
401 if( maxWriteImages == 0 )
402 {
403 log_info( "WARNING: Device reports 0 for a max write image arg count (write image arguments unsupported). Skipping test (implicitly passes). This is only valid if the number of image formats is also 0.\n" );
404 return 0;
405 }
406
407 if( maxWriteImages < minRequiredWriteImages )
408 {
409 log_error( "ERROR: Reported max write image arg count is less than required! (%d)\n", maxWriteImages );
410 return -1;
411 }
412
413 log_info("Reported %d max write image args.\n", maxWriteImages);
414
415 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
416 test_error( error, "Unable to get max parameter size from device" );
417
418 // Calculate the number we can use
419 if (maxParameterSize/sizeof(cl_mem) < maxWriteImages) {
420 log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
421 maxWriteImages = (unsigned int)(maxParameterSize/sizeof(cl_mem));
422 }
423
424 /* Create a program with that many write args + 1 */
425 programSrc = (char *)malloc( strlen( sample_write_image_kernel_pattern[ 0 ] ) + ( strlen( writeArgPattern ) + 6 ) * ( maxWriteImages + 1 ) +
426 strlen( sample_write_image_kernel_pattern[ 1 ] ) + 1 + 40240 );
427
428 strcpy( programSrc, sample_write_image_kernel_pattern[ 0 ] );
429 strcat( programSrc, "write_only image2d_t dstimg0" );
430 for( i = 1; i < maxWriteImages; i++ )
431 {
432 sprintf( writeArgLine, writeArgPattern, i );
433 strcat( programSrc, writeArgLine );
434 }
435 strcat( programSrc, sample_write_image_kernel_pattern[ 1 ] );
436 for ( i = 0; i < maxWriteImages; i++) {
437 sprintf( writeArgLine, "\twrite_imagef( dstimg%d, (int2)(0,0), (float4)(0,0,0,0));\n", i);
438 strcat( programSrc, writeArgLine );
439 }
440 strcat( programSrc, sample_write_image_kernel_pattern[ 2 ] );
441
442 error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
443 test_error( error, "Failed to create the program and kernel.");
444 free( programSrc );
445
446
447 /* Create some I/O streams */
448 streams = new clMemWrapper[maxWriteImages + 1];
449 for( i = 0; i < maxWriteImages; i++ )
450 {
451 streams[i] = create_image_2d( context, CL_MEM_READ_WRITE, &image_format_desc, 16, 16, 0, NULL, &error );
452 test_error( error, "Unable to allocate test image" );
453 }
454
455 /* Set the arguments */
456 for( i = 0; i < maxWriteImages; i++ )
457 {
458 error = clSetKernelArg( kernel, i, sizeof( streams[i] ), &streams[i] );
459 test_error( error, "Unable to set kernel arguments" );
460 }
461
462 /* Now try running the kernel */
463 threads[0] = threads[1] = 16;
464 error = clEnqueueNDRangeKernel( queue, kernel, 2, NULL, threads, NULL, 0, NULL, &event );
465 test_error( error, "clEnqueueNDRangeKernel failed.");
466
467 // Verify that the event does not return an error from the execution
468 error = clWaitForEvents(1, &event);
469 test_error( error, "clWaitForEvent failed");
470 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
471 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
472 clReleaseEvent(event);
473 if (event_status < 0)
474 test_error(error, "Kernel execution event returned error");
475
476 /* All done */
477 delete[] streams;
478 return 0;
479 }
480
test_min_max_mem_alloc_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)481 int test_min_max_mem_alloc_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
482 {
483 int error;
484 cl_ulong maxAllocSize, memSize, minSizeToTry;
485 clMemWrapper memHdl;
486
487 cl_ulong requiredAllocSize;
488
489 if (gIsEmbedded)
490 requiredAllocSize = 1 * 1024 * 1024;
491 else
492 requiredAllocSize = 128 * 1024 * 1024;
493
494 /* Get the max mem alloc size */
495 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof( maxAllocSize ), &maxAllocSize, NULL );
496 test_error( error, "Unable to get max mem alloc size from device" );
497
498 error = clGetDeviceInfo( deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof( memSize ), &memSize, NULL );
499 test_error( error, "Unable to get global memory size from device" );
500
501 if (memSize > (cl_ulong)SIZE_MAX) {
502 memSize = (cl_ulong)SIZE_MAX;
503 }
504
505 if( maxAllocSize < requiredAllocSize)
506 {
507 log_error( "ERROR: Reported max allocation size is less than required %lldMB! (%llu or %lluMB, from a total mem size of %lldMB)\n", (requiredAllocSize / 1024) / 1024, maxAllocSize, (maxAllocSize / 1024)/1024, (memSize / 1024)/1024 );
508 return -1;
509 }
510
511 requiredAllocSize = ((memSize / 4) > (1024 * 1024 * 1024)) ? 1024 * 1024 * 1024 : memSize / 4;
512
513 if (gIsEmbedded)
514 requiredAllocSize = (requiredAllocSize < 1 * 1024 * 1024) ? 1 * 1024 * 1024 : requiredAllocSize;
515 else
516 requiredAllocSize = (requiredAllocSize < 128 * 1024 * 1024) ? 128 * 1024 * 1024 : requiredAllocSize;
517
518 if( maxAllocSize < requiredAllocSize )
519 {
520 log_error( "ERROR: Reported max allocation size is less than required of total memory! (%llu or %lluMB, from a total mem size of %lluMB)\n", maxAllocSize, (maxAllocSize / 1024)/1024, (requiredAllocSize / 1024)/1024 );
521 return -1;
522 }
523
524 log_info("Reported max allocation size of %lld bytes (%gMB) and global mem size of %lld bytes (%gMB).\n",
525 maxAllocSize, maxAllocSize/(1024.0*1024.0), requiredAllocSize, requiredAllocSize/(1024.0*1024.0));
526
527 if ( memSize < maxAllocSize ) {
528 log_info("Global memory size is less than max allocation size, using that.\n");
529 maxAllocSize = memSize;
530 }
531
532 minSizeToTry = maxAllocSize/16;
533 while (maxAllocSize > (maxAllocSize/4)) {
534
535 log_info("Trying to create a buffer of size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
536 memHdl = clCreateBuffer( context, CL_MEM_READ_ONLY, (size_t)maxAllocSize, NULL, &error );
537 if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE || error == CL_OUT_OF_RESOURCES || error == CL_OUT_OF_HOST_MEMORY) {
538 log_info("\tAllocation failed at size of %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
539 maxAllocSize -= minSizeToTry;
540 continue;
541 }
542 test_error( error, "clCreateBuffer failed for maximum sized buffer.");
543 return 0;
544 }
545 log_error("Failed to allocate even %lld bytes (%gMB).\n", maxAllocSize, (double)maxAllocSize/(1024.0*1024.0));
546 return -1;
547 }
548
test_min_max_image_2d_width(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)549 int test_min_max_image_2d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
550 {
551 int error;
552 size_t maxDimension;
553 clMemWrapper streams[1];
554 cl_image_format image_format_desc;
555 cl_ulong maxAllocSize;
556 cl_uint minRequiredDimension;
557 size_t length;
558
559
560 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
561
562 auto version = get_device_cl_version(deviceID);
563 if (version == Version(1, 0))
564 {
565 minRequiredDimension = gIsEmbedded ? 2048 : 4096;
566 }
567 else
568 {
569 minRequiredDimension = gIsEmbedded ? 2048 : 8192;
570 }
571
572
573 /* Just get any ol format to test with */
574 error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
575 test_error( error, "Unable to obtain suitable image format to test with!" );
576
577 /* Get the max 2d image width */
578 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
579 test_error( error, "Unable to get max image 2d width from device" );
580
581 if( maxDimension < minRequiredDimension )
582 {
583 log_error( "ERROR: Reported max image 2d width is less than required! (%d)\n", (int)maxDimension );
584 return -1;
585 }
586 log_info("Max reported width is %ld.\n", maxDimension);
587
588 /* Verify we can use the format */
589 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
590 image_format_desc.image_channel_order = CL_RGBA;
591 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
592 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
593 return -1;
594 }
595
596 /* Verify that we can actually allocate an image that large */
597 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
598 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
599 if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
600 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
601 (cl_ulong)maxDimension*1*4, maxAllocSize);
602 return -1;
603 }
604
605 log_info("Attempting to create an image of size %d x 1 = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
606
607 /* Try to allocate a very big image */
608 streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 0, NULL, &error );
609 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
610 {
611 print_error( error, "Image 2D creation failed for maximum width" );
612 return -1;
613 }
614
615 return 0;
616 }
617
test_min_max_image_2d_height(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)618 int test_min_max_image_2d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
619 {
620 int error;
621 size_t maxDimension;
622 clMemWrapper streams[1];
623 cl_image_format image_format_desc;
624 cl_ulong maxAllocSize;
625 cl_uint minRequiredDimension;
626 size_t length;
627
628 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
629
630 auto version = get_device_cl_version(deviceID);
631 if (version == Version(1, 0))
632 {
633 minRequiredDimension = gIsEmbedded ? 2048 : 4096;
634 }
635 else
636 {
637 minRequiredDimension = gIsEmbedded ? 2048 : 8192;
638 }
639
640 /* Just get any ol format to test with */
641 error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D, CL_MEM_READ_WRITE, 0, &image_format_desc );
642 test_error( error, "Unable to obtain suitable image format to test with!" );
643
644 /* Get the max 2d image width */
645 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE2D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
646 test_error( error, "Unable to get max image 2d height from device" );
647
648 if( maxDimension < minRequiredDimension )
649 {
650 log_error( "ERROR: Reported max image 2d height is less than required! (%d)\n", (int)maxDimension );
651 return -1;
652 }
653 log_info("Max reported height is %ld.\n", maxDimension);
654
655 /* Verify we can use the format */
656 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
657 image_format_desc.image_channel_order = CL_RGBA;
658 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D, &image_format_desc)) {
659 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
660 return -1;
661 }
662
663 /* Verify that we can actually allocate an image that large */
664 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
665 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
666 if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
667 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
668 (cl_ulong)maxDimension*1*4, maxAllocSize);
669 return -1;
670 }
671
672 log_info("Attempting to create an image of size 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
673
674 /* Try to allocate a very big image */
675 streams[0] = create_image_2d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 0, NULL, &error );
676 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
677 {
678 print_error( error, "Image 2D creation failed for maximum height" );
679 return -1;
680 }
681
682 return 0;
683 }
684
test_min_max_image_3d_width(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)685 int test_min_max_image_3d_width(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
686 {
687 int error;
688 size_t maxDimension;
689 clMemWrapper streams[1];
690 cl_image_format image_format_desc;
691 cl_ulong maxAllocSize;
692
693
694 PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
695
696 /* Just get any ol format to test with */
697 error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
698 CL_MEM_READ_ONLY, 0, &image_format_desc);
699 test_error( error, "Unable to obtain suitable image format to test with!" );
700
701 /* Get the max 2d image width */
702 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof( maxDimension ), &maxDimension, NULL );
703 test_error( error, "Unable to get max image 3d width from device" );
704
705 if( maxDimension < 2048 )
706 {
707 log_error( "ERROR: Reported max image 3d width is less than required! (%d)\n", (int)maxDimension );
708 return -1;
709 }
710 log_info("Max reported width is %ld.\n", maxDimension);
711
712 /* Verify we can use the format */
713 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
714 image_format_desc.image_channel_order = CL_RGBA;
715 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
716 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
717 return -1;
718 }
719
720 /* Verify that we can actually allocate an image that large */
721 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
722 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
723 if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
724 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
725 (cl_ulong)maxDimension*2*4, maxAllocSize);
726 return -1;
727 }
728
729 log_info("Attempting to create an image of size %d x 1 x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
730
731 /* Try to allocate a very big image */
732 streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimension, 1, 2, 0, 0, NULL, &error );
733 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
734 {
735 print_error( error, "Image 3D creation failed for maximum width" );
736 return -1;
737 }
738
739 return 0;
740 }
741
test_min_max_image_3d_height(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)742 int test_min_max_image_3d_height(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
743 {
744 int error;
745 size_t maxDimension;
746 clMemWrapper streams[1];
747 cl_image_format image_format_desc;
748 cl_ulong maxAllocSize;
749
750
751 PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
752
753 /* Just get any ol format to test with */
754 error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
755 CL_MEM_READ_ONLY, 0, &image_format_desc);
756 test_error( error, "Unable to obtain suitable image format to test with!" );
757
758 /* Get the max 2d image width */
759 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof( maxDimension ), &maxDimension, NULL );
760 test_error( error, "Unable to get max image 3d height from device" );
761
762 if( maxDimension < 2048 )
763 {
764 log_error( "ERROR: Reported max image 3d height is less than required! (%d)\n", (int)maxDimension );
765 return -1;
766 }
767 log_info("Max reported height is %ld.\n", maxDimension);
768
769 /* Verify we can use the format */
770 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
771 image_format_desc.image_channel_order = CL_RGBA;
772 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
773 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
774 return -1;
775 }
776
777 /* Verify that we can actually allocate an image that large */
778 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
779 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
780 if ( (cl_ulong)maxDimension*2*4 > maxAllocSize ) {
781 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
782 (cl_ulong)maxDimension*2*4, maxAllocSize);
783 return -1;
784 }
785
786 log_info("Attempting to create an image of size 1 x %d x 2 = %gMB.\n", (int)maxDimension, (2*(float)maxDimension*4/1024.0/1024.0));
787
788 /* Try to allocate a very big image */
789 streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, maxDimension, 2, 0, 0, NULL, &error );
790 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
791 {
792 print_error( error, "Image 3D creation failed for maximum height" );
793 return -1;
794 }
795
796 return 0;
797 }
798
799
test_min_max_image_3d_depth(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)800 int test_min_max_image_3d_depth(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
801 {
802 int error;
803 size_t maxDimension;
804 clMemWrapper streams[1];
805 cl_image_format image_format_desc;
806 cl_ulong maxAllocSize;
807
808
809 PASSIVE_REQUIRE_3D_IMAGE_SUPPORT( deviceID )
810
811 /* Just get any ol format to test with */
812 error = get_8_bit_image_format(context, CL_MEM_OBJECT_IMAGE3D,
813 CL_MEM_READ_ONLY, 0, &image_format_desc);
814 test_error( error, "Unable to obtain suitable image format to test with!" );
815
816 /* Get the max 2d image width */
817 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof( maxDimension ), &maxDimension, NULL );
818 test_error( error, "Unable to get max image 3d depth from device" );
819
820 if( maxDimension < 2048 )
821 {
822 log_error( "ERROR: Reported max image 3d depth is less than required! (%d)\n", (int)maxDimension );
823 return -1;
824 }
825 log_info("Max reported depth is %ld.\n", maxDimension);
826
827 /* Verify we can use the format */
828 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
829 image_format_desc.image_channel_order = CL_RGBA;
830 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE3D, &image_format_desc)) {
831 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
832 return -1;
833 }
834
835 /* Verify that we can actually allocate an image that large */
836 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
837 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
838 if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
839 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
840 (cl_ulong)maxDimension*1*4, maxAllocSize);
841 return -1;
842 }
843
844 log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
845
846 /* Try to allocate a very big image */
847 streams[0] = create_image_3d( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
848 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
849 {
850 print_error( error, "Image 3D creation failed for maximum depth" );
851 return -1;
852 }
853
854 return 0;
855 }
856
test_min_max_image_array_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)857 int test_min_max_image_array_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
858 {
859 int error;
860 size_t maxDimension;
861 clMemWrapper streams[1];
862 cl_image_format image_format_desc;
863 cl_ulong maxAllocSize;
864 size_t minRequiredDimension = gIsEmbedded ? 256 : 2048;
865
866 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
867
868 /* Just get any ol format to test with */
869 error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE2D_ARRAY, CL_MEM_READ_WRITE, 0, &image_format_desc );
870 test_error( error, "Unable to obtain suitable image format to test with!" );
871
872 /* Get the max image array width */
873 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, sizeof( maxDimension ), &maxDimension, NULL );
874 test_error( error, "Unable to get max image array size from device" );
875
876 if( maxDimension < minRequiredDimension )
877 {
878 log_error( "ERROR: Reported max image array size is less than required! (%d)\n", (int)maxDimension );
879 return -1;
880 }
881 log_info("Max reported image array size is %ld.\n", maxDimension);
882
883 /* Verify we can use the format */
884 image_format_desc.image_channel_data_type = CL_UNORM_INT8;
885 image_format_desc.image_channel_order = CL_RGBA;
886 if (!is_image_format_supported( context, CL_MEM_READ_ONLY, CL_MEM_OBJECT_IMAGE2D_ARRAY, &image_format_desc)) {
887 log_error("CL_UNORM_INT8 CL_RGBA not supported. Can not test.");
888 return -1;
889 }
890
891 /* Verify that we can actually allocate an image that large */
892 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
893 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
894 if ( (cl_ulong)maxDimension*1*4 > maxAllocSize ) {
895 log_error("Can not allocate a large enough image (min size: %lld bytes, max allowed: %lld bytes) to test.\n",
896 (cl_ulong)maxDimension*1*4, maxAllocSize);
897 return -1;
898 }
899
900 log_info("Attempting to create an image of size 1 x 1 x %d = %gMB.\n", (int)maxDimension, ((float)maxDimension*4/1024.0/1024.0));
901
902 /* Try to allocate a very big image */
903 streams[0] = create_image_2d_array( context, CL_MEM_READ_ONLY, &image_format_desc, 1, 1, maxDimension, 0, 0, NULL, &error );
904 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
905 {
906 print_error( error, "2D Image Array creation failed for maximum array size" );
907 return -1;
908 }
909
910 return 0;
911 }
912
test_min_max_image_buffer_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)913 int test_min_max_image_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
914 {
915 int error;
916 size_t maxDimensionPixels;
917 clMemWrapper streams[2];
918 cl_image_format image_format_desc = {0};
919 cl_ulong maxAllocSize;
920 size_t minRequiredDimension = gIsEmbedded ? 2048 : 65536;
921 unsigned int i = 0;
922 size_t pixelBytes = 0;
923
924 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID );
925
926 /* Get the max memory allocation size */
927 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof ( maxAllocSize ), &maxAllocSize, NULL );
928 test_error( error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE." );
929
930 /* Get the max image array width */
931 error = clGetDeviceInfo( deviceID, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, sizeof( maxDimensionPixels ), &maxDimensionPixels, NULL );
932 test_error( error, "Unable to get max image buffer size from device" );
933
934 if( maxDimensionPixels < minRequiredDimension )
935 {
936 log_error( "ERROR: Reported max image buffer size is less than required! (%d)\n", (int)maxDimensionPixels );
937 return -1;
938 }
939 log_info("Max reported image buffer size is %ld pixels.\n", maxDimensionPixels);
940
941 pixelBytes = maxAllocSize / maxDimensionPixels;
942 if ( pixelBytes == 0 )
943 {
944 log_error( "Value of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE is greater than CL_MAX_MEM_ALLOC_SIZE so there is no way to allocate image of maximum size!\n" );
945 return -1;
946 }
947
948 error = -1;
949 for ( i = pixelBytes; i > 0; --i )
950 {
951 error = get_8_bit_image_format( context, CL_MEM_OBJECT_IMAGE1D, CL_MEM_READ_ONLY, i, &image_format_desc );
952 if ( error == CL_SUCCESS )
953 {
954 pixelBytes = i;
955 break;
956 }
957 }
958 test_error( error, "Device does not support format to be used to allocate image of CL_DEVICE_IMAGE_MAX_BUFFER_SIZE\n" );
959
960 log_info("Attempting to create an 1D image with channel order %s from buffer of size %d = %gMB.\n",
961 GetChannelOrderName( image_format_desc.image_channel_order ), (int)maxDimensionPixels, ((float)maxDimensionPixels*pixelBytes/1024.0/1024.0));
962
963 /* Try to allocate a buffer */
964 streams[0] = clCreateBuffer( context, CL_MEM_READ_ONLY, maxDimensionPixels*pixelBytes, NULL, &error );
965 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
966 {
967 print_error( error, "Buffer creation failed for maximum image buffer size" );
968 return -1;
969 }
970
971 /* Try to allocate a 1D image array from buffer */
972 streams[1] = create_image_1d( context, CL_MEM_READ_ONLY, &image_format_desc, maxDimensionPixels, 0, NULL, streams[0], &error );
973 if( ( streams[0] == NULL ) || ( error != CL_SUCCESS ))
974 {
975 print_error( error, "1D Image from buffer creation failed for maximum image buffer size" );
976 return -1;
977 }
978
979 return 0;
980 }
981
982
983
test_min_max_parameter_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)984 int test_min_max_parameter_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
985 {
986 int error, retVal, i;
987 size_t maxSize;
988 char *programSrc;
989 char *ptr;
990 size_t numberExpected;
991 long numberOfIntParametersToTry;
992 char *argumentLine, *codeLines;
993 void *data;
994 cl_long long_result, expectedResult;
995 cl_int int_result;
996 size_t decrement;
997 cl_event event;
998 cl_int event_status;
999 bool embeddedNoLong = gIsEmbedded && !gHasLong;
1000
1001
1002 /* Get the max param size */
1003 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxSize ), &maxSize, NULL );
1004 test_error( error, "Unable to get max parameter size from device" );
1005
1006
1007 if( ((!gIsEmbedded) && (maxSize < 1024)) || ((gIsEmbedded) && (maxSize < 256)) )
1008 {
1009 log_error( "ERROR: Reported max parameter size is less than required! (%d)\n", (int)maxSize );
1010 return -1;
1011 }
1012
1013 /* The embedded profile without cles_khr_int64 extension does not require
1014 * longs, so use ints */
1015 if (embeddedNoLong)
1016 numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_int);
1017 else
1018 numberOfIntParametersToTry = numberExpected = (maxSize-sizeof(cl_mem))/sizeof(cl_long);
1019
1020 decrement = (size_t)(numberOfIntParametersToTry/8);
1021 if (decrement < 1)
1022 decrement = 1;
1023 log_info("Reported max parameter size of %d bytes.\n", (int)maxSize);
1024
1025 while (numberOfIntParametersToTry > 0) {
1026 // These need to be inside to be deallocated automatically on each loop iteration.
1027 clProgramWrapper program;
1028 clMemWrapper mem;
1029 clKernelWrapper kernel;
1030
1031 if (embeddedNoLong)
1032 {
1033 log_info("Trying a kernel with %ld int arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
1034 numberOfIntParametersToTry, sizeof(cl_int)*numberOfIntParametersToTry, sizeof(cl_mem),
1035 sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
1036 }
1037 else
1038 {
1039 log_info("Trying a kernel with %ld long arguments (%ld bytes) and one cl_mem (%ld bytes) for %ld bytes total.\n",
1040 numberOfIntParametersToTry, sizeof(cl_long)*numberOfIntParametersToTry, sizeof(cl_mem),
1041 sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
1042 }
1043
1044 // Allocate memory for the program storage
1045 data = malloc(sizeof(cl_long)*numberOfIntParametersToTry);
1046
1047 argumentLine = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
1048 codeLines = (char*)malloc(sizeof(char)*numberOfIntParametersToTry*32);
1049 programSrc = (char*)malloc(sizeof(char)*(numberOfIntParametersToTry*64+1024));
1050 argumentLine[0] = '\0';
1051 codeLines[0] = '\0';
1052 programSrc[0] = '\0';
1053
1054 // Generate our results
1055 expectedResult = 0;
1056 for (i=0; i<(int)numberOfIntParametersToTry; i++)
1057 {
1058 if( gHasLong )
1059 {
1060 ((cl_long *)data)[i] = i;
1061 expectedResult += i;
1062 }
1063 else
1064 {
1065 ((cl_int *)data)[i] = i;
1066 expectedResult += i;
1067 }
1068 }
1069
1070 // Build the program
1071 if( gHasLong)
1072 sprintf(argumentLine, "%s", "long arg0");
1073 else
1074 sprintf(argumentLine, "%s", "int arg0");
1075
1076 sprintf(codeLines, "%s", "result[0] += arg0;");
1077 for (i=1; i<(int)numberOfIntParametersToTry; i++)
1078 {
1079 if( gHasLong)
1080 sprintf(argumentLine + strlen( argumentLine), ", long arg%d", i);
1081 else
1082 sprintf(argumentLine + strlen( argumentLine), ", int arg%d", i);
1083
1084 sprintf(codeLines + strlen( codeLines), "\nresult[0] += arg%d;", i);
1085 }
1086
1087 /* Create a kernel to test with */
1088 sprintf( programSrc, gHasLong ? sample_large_parmam_kernel_pattern[0]:
1089 sample_large_int_parmam_kernel_pattern[0], argumentLine, codeLines);
1090
1091 ptr = programSrc;
1092 if( create_single_kernel_helper( context, &program, &kernel, 1, (const char **)&ptr, "sample_test" ) != 0 )
1093 {
1094 log_info("Create program failed, decrementing number of parameters to try.\n");
1095 numberOfIntParametersToTry -= decrement;
1096 continue;
1097 }
1098
1099 /* Try to set a large argument to the kernel */
1100 retVal = 0;
1101
1102 mem = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(cl_long), NULL,
1103 &error);
1104 test_error(error, "clCreateBuffer failed");
1105
1106 for (i=0; i<(int)numberOfIntParametersToTry; i++) {
1107 if(gHasLong)
1108 error = clSetKernelArg(kernel, i, sizeof(cl_long), &(((cl_long*)data)[i]));
1109 else
1110 error = clSetKernelArg(kernel, i, sizeof(cl_int), &(((cl_int*)data)[i]));
1111
1112 if (error != CL_SUCCESS) {
1113 log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
1114 numberOfIntParametersToTry -= decrement;
1115 break;
1116 }
1117 }
1118 if (error != CL_SUCCESS)
1119 continue;
1120
1121
1122 error = clSetKernelArg(kernel, i, sizeof(cl_mem), &mem);
1123 if (error != CL_SUCCESS) {
1124 log_info( "clSetKernelArg failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
1125 numberOfIntParametersToTry -= decrement;
1126 continue;
1127 }
1128
1129 size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
1130 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
1131 if (error != CL_SUCCESS) {
1132 log_info( "clEnqueueNDRangeKernel failed (%s), decrementing number of parameters to try.\n", IGetErrorString(error));
1133 numberOfIntParametersToTry -= decrement;
1134 continue;
1135 }
1136
1137 // Verify that the event does not return an error from the execution
1138 error = clWaitForEvents(1, &event);
1139 test_error( error, "clWaitForEvent failed");
1140 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
1141 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
1142 clReleaseEvent(event);
1143 if (event_status < 0)
1144 test_error(error, "Kernel execution event returned error");
1145
1146 if(gHasLong)
1147 error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_long), &long_result, 0, NULL, NULL);
1148 else
1149 error = clEnqueueReadBuffer(queue, mem, CL_TRUE, 0, sizeof(cl_int), &int_result, 0, NULL, NULL);
1150
1151 test_error(error, "clEnqueueReadBuffer failed")
1152
1153 free(data);
1154 free(argumentLine);
1155 free(codeLines);
1156 free(programSrc);
1157
1158 if(gHasLong)
1159 {
1160 if (long_result != expectedResult) {
1161 log_error("Expected result (%lld) does not equal actual result (%lld).\n", expectedResult, long_result);
1162 numberOfIntParametersToTry -= decrement;
1163 continue;
1164 } else {
1165 log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_long));
1166 break;
1167 }
1168 }
1169 else
1170 {
1171 if (int_result != expectedResult) {
1172 log_error("Expected result (%lld) does not equal actual result (%d).\n", expectedResult, int_result);
1173 numberOfIntParametersToTry -= decrement;
1174 continue;
1175 } else {
1176 log_info("Results verified at %ld bytes of arguments.\n", sizeof(cl_mem)+numberOfIntParametersToTry*sizeof(cl_int));
1177 break;
1178 }
1179 }
1180 }
1181
1182 if (numberOfIntParametersToTry == (long)numberExpected)
1183 return 0;
1184 return -1;
1185 }
1186
test_min_max_samplers(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1187 int test_min_max_samplers(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1188 {
1189 int error;
1190 cl_uint maxSamplers, i;
1191 clProgramWrapper program;
1192 clKernelWrapper kernel;
1193 char *programSrc, samplerLine[1024];
1194 size_t maxParameterSize;
1195 cl_event event;
1196 cl_int event_status;
1197 cl_uint minRequiredSamplers = gIsEmbedded ? 8 : 16;
1198
1199
1200 PASSIVE_REQUIRE_IMAGE_SUPPORT( deviceID )
1201
1202 /* Get the max value */
1203 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_SAMPLERS, sizeof( maxSamplers ), &maxSamplers, NULL );
1204 test_error( error, "Unable to get max sampler count from device" );
1205
1206 if( maxSamplers < minRequiredSamplers )
1207 {
1208 log_error( "ERROR: Reported max sampler count is less than required! (%d)\n", (int)maxSamplers );
1209 return -1;
1210 }
1211
1212 log_info("Reported max %d samplers.\n", maxSamplers);
1213
1214 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
1215 test_error( error, "Unable to get max parameter size from device" );
1216
1217 // Subtract the size of the result
1218 maxParameterSize -= 2*sizeof(cl_mem);
1219
1220 // Calculate the number we can use
1221 if (maxParameterSize/sizeof(cl_sampler) < maxSamplers) {
1222 log_info("WARNING: Max parameter size of %d bytes limits test to %d max sampler arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_sampler)));
1223 maxSamplers = (unsigned int)(maxParameterSize/sizeof(cl_sampler));
1224 }
1225
1226 /* Create a kernel to test with */
1227 programSrc = (char *)malloc( ( strlen( sample_sampler_kernel_pattern[ 1 ] ) + 8 ) * ( maxSamplers ) +
1228 strlen( sample_sampler_kernel_pattern[ 0 ] ) + strlen( sample_sampler_kernel_pattern[ 2 ] ) +
1229 ( strlen( sample_sampler_kernel_pattern[ 3 ] ) + 8 ) * maxSamplers +
1230 strlen( sample_sampler_kernel_pattern[ 4 ] ) );
1231 strcpy( programSrc, sample_sampler_kernel_pattern[ 0 ] );
1232 for( i = 0; i < maxSamplers; i++ )
1233 {
1234 sprintf( samplerLine, sample_sampler_kernel_pattern[ 1 ], i );
1235 strcat( programSrc, samplerLine );
1236 }
1237 strcat( programSrc, sample_sampler_kernel_pattern[ 2 ] );
1238 for( i = 0; i < maxSamplers; i++ )
1239 {
1240 sprintf( samplerLine, sample_sampler_kernel_pattern[ 3 ], i );
1241 strcat( programSrc, samplerLine );
1242 }
1243 strcat( programSrc, sample_sampler_kernel_pattern[ 4 ] );
1244
1245
1246 error = create_single_kernel_helper(context, &program, &kernel, 1, (const char **)&programSrc, "sample_test");
1247 test_error( error, "Failed to create the program and kernel.");
1248
1249 // We have to set up some fake parameters so it'll work
1250 clSamplerWrapper *samplers = new clSamplerWrapper[maxSamplers];
1251
1252 cl_image_format format = { CL_RGBA, CL_SIGNED_INT8 };
1253
1254 clMemWrapper image = create_image_2d( context, CL_MEM_READ_WRITE, &format, 16, 16, 0, NULL, &error );
1255 test_error( error, "Unable to create a test image" );
1256
1257 clMemWrapper stream =
1258 clCreateBuffer(context, CL_MEM_READ_WRITE, 16, NULL, &error);
1259 test_error( error, "Unable to create test buffer" );
1260
1261 error = clSetKernelArg( kernel, 0, sizeof( cl_mem ), &image );
1262 error |= clSetKernelArg( kernel, 1, sizeof( cl_mem ), &stream );
1263 test_error( error, "Unable to set kernel arguments" );
1264 for( i = 0; i < maxSamplers; i++ )
1265 {
1266 samplers[ i ] = clCreateSampler( context, CL_FALSE, CL_ADDRESS_NONE, CL_FILTER_NEAREST, &error );
1267 test_error( error, "Unable to create sampler" );
1268
1269 error = clSetKernelArg( kernel, 2 + i, sizeof( cl_sampler ), &samplers[ i ] );
1270 test_error( error, "Unable to set sampler argument" );
1271 }
1272
1273 size_t globalDim[3]={1,1,1}, localDim[3]={1,1,1};
1274 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, globalDim, localDim, 0, NULL, &event);
1275 test_error(error, "clEnqueueNDRangeKernel failed with maximum number of samplers.");
1276
1277 // Verify that the event does not return an error from the execution
1278 error = clWaitForEvents(1, &event);
1279 test_error( error, "clWaitForEvent failed");
1280 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
1281 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
1282 clReleaseEvent(event);
1283 if (event_status < 0)
1284 test_error(error, "Kernel execution event returned error");
1285
1286 free( programSrc );
1287 delete[] samplers;
1288 return 0;
1289 }
1290
1291 #define PASSING_FRACTION 4
test_min_max_constant_buffer_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1292 int test_min_max_constant_buffer_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1293 {
1294 int error;
1295 clProgramWrapper program;
1296 clKernelWrapper kernel;
1297 size_t threads[1], localThreads[1];
1298 cl_int *constantData, *resultData;
1299 cl_ulong maxSize, stepSize, currentSize, maxGlobalSize, maxAllocSize;
1300 int i;
1301 cl_event event;
1302 cl_int event_status;
1303 MTdata d;
1304
1305 /* Verify our test buffer won't be bigger than allowed */
1306 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
1307 test_error( error, "Unable to get max constant buffer size" );
1308
1309 if( ( 0 == gIsEmbedded && maxSize < 64L * 1024L ) || maxSize < 1L * 1024L )
1310 {
1311 log_error( "ERROR: Reported max constant buffer size less than required by OpenCL 1.0 (reported %d KB)\n", (int)( maxSize / 1024L ) );
1312 return -1;
1313 }
1314
1315 log_info("Reported max constant buffer size of %lld bytes.\n", maxSize);
1316
1317 // Limit test buffer size to 1/8 of CL_DEVICE_GLOBAL_MEM_SIZE
1318 error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxGlobalSize), &maxGlobalSize, 0);
1319 test_error(error, "Unable to get CL_DEVICE_GLOBAL_MEM_SIZE");
1320
1321 if (maxSize > maxGlobalSize / 8)
1322 maxSize = maxGlobalSize / 8;
1323
1324 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(maxAllocSize), &maxAllocSize, 0);
1325 test_error(error, "Unable to get CL_DEVICE_MAX_MEM_ALLOC_SIZE ");
1326
1327 if (maxSize > maxAllocSize)
1328 maxSize = maxAllocSize;
1329
1330 /* Create a kernel to test with */
1331 if( create_single_kernel_helper( context, &program, &kernel, 1, sample_const_arg_kernel, "sample_test" ) != 0 )
1332 {
1333 return -1;
1334 }
1335
1336 /* Try the returned max size and decrease it until we get one that works. */
1337 stepSize = maxSize/16;
1338 currentSize = maxSize;
1339 int allocPassed = 0;
1340 d = init_genrand( gRandomSeed );
1341 while (!allocPassed && currentSize >= maxSize/PASSING_FRACTION) {
1342 log_info("Attempting to allocate constant buffer of size %lld bytes\n", maxSize);
1343
1344 /* Create some I/O streams */
1345 size_t sizeToAllocate = ((size_t)currentSize/sizeof( cl_int ))*sizeof(cl_int);
1346 size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
1347 constantData = (cl_int *)malloc( sizeToAllocate);
1348 if (constantData == NULL)
1349 {
1350 log_error("Failed to allocate memory for constantData!\n");
1351 free_mtdata(d);
1352 return EXIT_FAILURE;
1353 }
1354
1355 for(i=0; i<(int)(numberOfInts); i++)
1356 constantData[i] = (int)genrand_int32(d);
1357
1358 clMemWrapper streams[3];
1359 streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
1360 sizeToAllocate, constantData, &error);
1361 test_error( error, "Creating test array failed" );
1362 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
1363 NULL, &error);
1364 test_error( error, "Creating test array failed" );
1365
1366
1367 /* Set the arguments */
1368 error = clSetKernelArg(kernel, 0, sizeof( streams[0] ), &streams[0]);
1369 test_error( error, "Unable to set indexed kernel arguments" );
1370 error = clSetKernelArg(kernel, 1, sizeof( streams[1] ), &streams[1]);
1371 test_error( error, "Unable to set indexed kernel arguments" );
1372
1373
1374 /* Test running the kernel and verifying it */
1375 threads[0] = numberOfInts;
1376 localThreads[0] = 1;
1377 log_info("Filling constant buffer with %d cl_ints (%d bytes).\n", (int)threads[0], (int)(threads[0]*sizeof(cl_int)));
1378
1379 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
1380 /* If we failed due to a resource issue, reduce the size and try again. */
1381 if ((error == CL_OUT_OF_RESOURCES) || (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (error == CL_OUT_OF_HOST_MEMORY)) {
1382 log_info("Kernel enqueue failed at size %lld, trying at a reduced size.\n", currentSize);
1383 currentSize -= stepSize;
1384 free(constantData);
1385 continue;
1386 }
1387 test_error( error, "clEnqueueNDRangeKernel with maximum constant buffer size failed.");
1388
1389 // Verify that the event does not return an error from the execution
1390 error = clWaitForEvents(1, &event);
1391 test_error( error, "clWaitForEvent failed");
1392 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
1393 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
1394 clReleaseEvent(event);
1395 if (event_status < 0) {
1396 if ((event_status == CL_OUT_OF_RESOURCES) || (event_status == CL_MEM_OBJECT_ALLOCATION_FAILURE) || (event_status == CL_OUT_OF_HOST_MEMORY)) {
1397 log_info("Kernel event indicates failure at size %lld, trying at a reduced size.\n", currentSize);
1398 currentSize -= stepSize;
1399 free(constantData);
1400 continue;
1401 } else {
1402 test_error(error, "Kernel execution event returned error");
1403 }
1404 }
1405
1406 /* Otherwise we did not fail due to resource issues. */
1407 allocPassed = 1;
1408
1409 resultData = (cl_int *)malloc(sizeToAllocate);
1410 if (resultData == NULL)
1411 {
1412 log_error("Failed to allocate memory for resultData!\n");
1413 free(constantData);
1414 free_mtdata(d);
1415 return EXIT_FAILURE;
1416 }
1417
1418 error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
1419 test_error( error, "clEnqueueReadBuffer failed");
1420
1421 for(i=0; i<(int)(numberOfInts); i++)
1422 if (constantData[i] != resultData[i]) {
1423 log_error("Data failed to verify: constantData[%d]=%d != resultData[%d]=%d\n",
1424 i, constantData[i], i, resultData[i]);
1425 free( constantData );
1426 free(resultData);
1427 free_mtdata(d); d = NULL;
1428 return -1;
1429 }
1430
1431 free( constantData );
1432 free(resultData);
1433 }
1434 free_mtdata(d); d = NULL;
1435
1436 if (allocPassed) {
1437 if (currentSize < maxSize/PASSING_FRACTION) {
1438 log_error("Failed to allocate at least 1/8 of the reported constant size.\n");
1439 return -1;
1440 } else if (currentSize != maxSize) {
1441 log_info("Passed at reduced size. (%lld of %lld bytes)\n", currentSize, maxSize);
1442 return 0;
1443 }
1444 return 0;
1445 }
1446 return -1;
1447 }
1448
test_min_max_constant_args(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1449 int test_min_max_constant_args(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1450 {
1451 int error;
1452 clProgramWrapper program;
1453 clKernelWrapper kernel;
1454 clMemWrapper *streams;
1455 size_t threads[1], localThreads[1];
1456 cl_uint i, maxArgs;
1457 cl_ulong maxSize;
1458 cl_ulong maxParameterSize;
1459 size_t individualBufferSize;
1460 char *programSrc, *constArgs, *str2;
1461 char str[512];
1462 const char *ptr;
1463 cl_event event;
1464 cl_int event_status;
1465
1466
1467 /* Verify our test buffer won't be bigger than allowed */
1468 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_ARGS, sizeof( maxArgs ), &maxArgs, 0 );
1469 test_error( error, "Unable to get max constant arg count" );
1470
1471 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_PARAMETER_SIZE, sizeof( maxParameterSize ), &maxParameterSize, NULL );
1472 test_error( error, "Unable to get max parameter size from device" );
1473
1474 // Subtract the size of the result
1475 maxParameterSize -= sizeof(cl_mem);
1476
1477 // Calculate the number we can use
1478 if (maxParameterSize/sizeof(cl_mem) < maxArgs) {
1479 log_info("WARNING: Max parameter size of %d bytes limits test to %d max image arguments.\n", (int)maxParameterSize, (int)(maxParameterSize/sizeof(cl_mem)));
1480 maxArgs = (unsigned int)(maxParameterSize/sizeof(cl_mem));
1481 }
1482
1483
1484 if( maxArgs < (gIsEmbedded ? 4 : 8) )
1485 {
1486 log_error( "ERROR: Reported max constant arg count less than required by OpenCL 1.0 (reported %d)\n", (int)maxArgs );
1487 return -1;
1488 }
1489
1490 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof( maxSize ), &maxSize, 0 );
1491 test_error( error, "Unable to get max constant buffer size" );
1492 individualBufferSize = ((int)maxSize/2)/maxArgs;
1493
1494 log_info("Reported max constant arg count of %d and max constant buffer size of %d. Test will attempt to allocate half of that, or %d buffers of size %d.\n",
1495 (int)maxArgs, (int)maxSize, (int)maxArgs, (int)individualBufferSize);
1496
1497 str2 = (char*)malloc(sizeof(char)*32*(maxArgs+2));
1498 constArgs = (char*)malloc(sizeof(char)*32*(maxArgs+2));
1499 programSrc = (char*)malloc(sizeof(char)*32*2*(maxArgs+2)+1024);
1500
1501 /* Create a test program */
1502 constArgs[0] = 0;
1503 str2[0] = 0;
1504 for( i = 0; i < maxArgs-1; i++ )
1505 {
1506 sprintf( str, ", __constant int *src%d", (int)( i + 2 ) );
1507 strcat( constArgs, str );
1508 sprintf( str2 + strlen( str2), "\tdst[tid] += src%d[tid];\n", (int)(i+2));
1509 if (strlen(str2) > (sizeof(char)*32*(maxArgs+2)-32) || strlen(constArgs) > (sizeof(char)*32*(maxArgs+2)-32)) {
1510 log_info("Limiting number of arguments tested to %d due to test program allocation size.\n", i);
1511 break;
1512 }
1513 }
1514 sprintf( programSrc, sample_const_max_arg_kernel_pattern, constArgs, str2 );
1515
1516 /* Create a kernel to test with */
1517 ptr = programSrc;
1518 if( create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "sample_test" ) != 0 )
1519 {
1520 return -1;
1521 }
1522
1523 /* Create some I/O streams */
1524 streams = new clMemWrapper[ maxArgs + 1 ];
1525 for( i = 0; i < maxArgs + 1; i++ )
1526 {
1527 streams[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
1528 individualBufferSize, NULL, &error);
1529 test_error( error, "Creating test array failed" );
1530 }
1531
1532 /* Set the arguments */
1533 for( i = 0; i < maxArgs + 1; i++ )
1534 {
1535 error = clSetKernelArg(kernel, i, sizeof( streams[i] ), &streams[i]);
1536 test_error( error, "Unable to set kernel argument" );
1537 }
1538
1539 /* Test running the kernel and verifying it */
1540 threads[0] = (size_t)10;
1541 while (threads[0]*sizeof(cl_int) > individualBufferSize)
1542 threads[0]--;
1543
1544 error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
1545 test_error( error, "Unable to get work group size to use" );
1546
1547 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &event );
1548 test_error( error, "clEnqueueNDRangeKernel failed");
1549
1550 // Verify that the event does not return an error from the execution
1551 error = clWaitForEvents(1, &event);
1552 test_error( error, "clWaitForEvent failed");
1553 error = clGetEventInfo(event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(event_status), &event_status, NULL);
1554 test_error( error, "clGetEventInfo for CL_EVENT_COMMAND_EXECUTION_STATUS failed");
1555 clReleaseEvent(event);
1556 if (event_status < 0)
1557 test_error(error, "Kernel execution event returned error");
1558
1559 error = clFinish(queue);
1560 test_error( error, "clFinish failed.");
1561
1562 delete [] streams;
1563 free(str2);
1564 free(constArgs);
1565 free(programSrc);
1566 return 0;
1567 }
1568
test_min_max_compute_units(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1569 int test_min_max_compute_units(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1570 {
1571 int error;
1572 cl_uint value;
1573
1574
1575 error = clGetDeviceInfo( deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( value ), &value, 0 );
1576 test_error( error, "Unable to get compute unit count" );
1577
1578 if( value < 1 )
1579 {
1580 log_error( "ERROR: Reported compute unit count less than required by OpenCL 1.0 (reported %d)\n", (int)value );
1581 return -1;
1582 }
1583
1584 log_info("Reported %d max compute units.\n", value);
1585
1586 return 0;
1587 }
1588
test_min_max_address_bits(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1589 int test_min_max_address_bits(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1590 {
1591 int error;
1592 cl_uint value;
1593
1594
1595 error = clGetDeviceInfo( deviceID, CL_DEVICE_ADDRESS_BITS, sizeof( value ), &value, 0 );
1596 test_error( error, "Unable to get address bit count" );
1597
1598 if( value != 32 && value != 64 )
1599 {
1600 log_error( "ERROR: Reported address bit count not valid by OpenCL 1.0 (reported %d)\n", (int)value );
1601 return -1;
1602 }
1603
1604 log_info("Reported %d device address bits.\n", value);
1605
1606 return 0;
1607 }
1608
test_min_max_single_fp_config(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1609 int test_min_max_single_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1610 {
1611 int error;
1612 cl_device_fp_config value;
1613 char profile[128] = "";
1614
1615 error = clGetDeviceInfo( deviceID, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( value ), &value, 0 );
1616 test_error( error, "Unable to get device single fp config" );
1617
1618 //Check to see if we are an embedded profile device
1619 if((error = clGetDeviceInfo( deviceID, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL )))
1620 {
1621 log_error( "FAILURE: Unable to get CL_DEVICE_PROFILE: error %d\n", error );
1622 return error;
1623 }
1624
1625 if( 0 == strcmp( profile, "EMBEDDED_PROFILE" ))
1626 { // embedded device
1627
1628 if( 0 == (value & (CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO)))
1629 {
1630 log_error( "FAILURE: embedded device supports neither CL_FP_ROUND_TO_NEAREST or CL_FP_ROUND_TO_ZERO\n" );
1631 return -1;
1632 }
1633 }
1634 else
1635 { // Full profile
1636 if( ( value & ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN )) != ( CL_FP_ROUND_TO_NEAREST | CL_FP_INF_NAN ) )
1637 {
1638 log_error( "ERROR: Reported single fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
1639 return -1;
1640 }
1641 }
1642 return 0;
1643 }
1644
test_min_max_double_fp_config(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1645 int test_min_max_double_fp_config(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1646 {
1647 int error;
1648 cl_device_fp_config value;
1649
1650 error = clGetDeviceInfo( deviceID, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof( value ), &value, 0 );
1651 test_error( error, "Unable to get device double fp config" );
1652
1653 if (value == 0)
1654 return 0;
1655
1656 if( ( value & (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)) != ( CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM) )
1657 {
1658 log_error( "ERROR: Reported double fp config doesn't meet minimum set by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
1659 return -1;
1660 }
1661 return 0;
1662 }
1663
test_min_max_local_mem_size(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1664 int test_min_max_local_mem_size(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1665 {
1666 int error;
1667 clProgramWrapper program;
1668 clKernelWrapper kernel;
1669 clMemWrapper streams[3];
1670 size_t threads[1], localThreads[1];
1671 cl_int *localData, *resultData;
1672 cl_ulong maxSize, kernelLocalUsage, min_max_local_mem_size;
1673 Version device_version;
1674 int i;
1675 int err = 0;
1676 MTdata d;
1677
1678 /* Verify our test buffer won't be bigger than allowed */
1679 error = clGetDeviceInfo( deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof( maxSize ), &maxSize, 0 );
1680 test_error( error, "Unable to get max local buffer size" );
1681
1682 try
1683 {
1684 device_version = get_device_cl_version(deviceID);
1685 } catch (const std::runtime_error &e)
1686 {
1687 log_error("%s", e.what());
1688 return -1;
1689 }
1690
1691 if (!gIsEmbedded)
1692 {
1693 if (device_version == Version(1, 0))
1694 min_max_local_mem_size = 16L * 1024L;
1695 else
1696 min_max_local_mem_size = 32L * 1024L;
1697 }
1698 else
1699 {
1700 min_max_local_mem_size = 1L * 1024L;
1701 }
1702
1703 if (maxSize < min_max_local_mem_size)
1704 {
1705 const std::string version_as_string = device_version.to_string();
1706 log_error("ERROR: Reported local mem size less than required by OpenCL "
1707 "%s (reported %d KB)\n",
1708 version_as_string.c_str(), (int)(maxSize / 1024L));
1709 return -1;
1710 }
1711
1712 log_info("Reported max local buffer size for device: %lld bytes.\n", maxSize);
1713
1714 /* Create a kernel to test with */
1715 if( create_single_kernel_helper( context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" ) != 0 )
1716 {
1717 return -1;
1718 }
1719
1720 error = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_MEM_SIZE, sizeof(kernelLocalUsage), &kernelLocalUsage, NULL);
1721 test_error(error, "clGetKernelWorkGroupInfo for CL_KERNEL_LOCAL_MEM_SIZE failed");
1722
1723 log_info("Reported local buffer usage for kernel (CL_KERNEL_LOCAL_MEM_SIZE): %lld bytes.\n", kernelLocalUsage);
1724
1725 /* Create some I/O streams */
1726 size_t sizeToAllocate = ((size_t)(maxSize-kernelLocalUsage)/sizeof( cl_int ))*sizeof(cl_int);
1727 size_t numberOfInts = sizeToAllocate/sizeof(cl_int);
1728
1729 log_info("Attempting to use %lld bytes of local memory.\n", (cl_ulong)sizeToAllocate);
1730
1731 localData = (cl_int *)malloc( sizeToAllocate );
1732 d = init_genrand( gRandomSeed );
1733 for(i=0; i<(int)(numberOfInts); i++)
1734 localData[i] = (int)genrand_int32(d);
1735 free_mtdata(d); d = NULL;
1736
1737 streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, sizeToAllocate,
1738 localData, &error);
1739 test_error( error, "Creating test array failed" );
1740 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeToAllocate,
1741 NULL, &error);
1742 test_error( error, "Creating test array failed" );
1743
1744
1745 /* Set the arguments */
1746 error = clSetKernelArg(kernel, 0, sizeToAllocate, NULL);
1747 test_error( error, "Unable to set indexed kernel arguments" );
1748 error = clSetKernelArg(kernel, 1, sizeof( streams[0] ), &streams[0]);
1749 test_error( error, "Unable to set indexed kernel arguments" );
1750 error = clSetKernelArg(kernel, 2, sizeof( streams[1] ), &streams[1]);
1751 test_error( error, "Unable to set indexed kernel arguments" );
1752
1753
1754 /* Test running the kernel and verifying it */
1755 threads[0] = numberOfInts;
1756 localThreads[0] = 1;
1757 log_info("Creating local buffer with %d cl_ints (%d bytes).\n", (int)numberOfInts, (int)sizeToAllocate);
1758
1759 cl_event evt;
1760 cl_int evt_err;
1761 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, &evt );
1762 test_error(error, "clEnqueueNDRangeKernel failed");
1763
1764 error = clFinish(queue);
1765 test_error( error, "clFinish failed");
1766
1767 error = clGetEventInfo(evt, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof evt_err, &evt_err, NULL);
1768 test_error( error, "clGetEventInfo with maximum local buffer size failed.");
1769
1770 if (evt_err != CL_COMPLETE) {
1771 print_error(evt_err, "Kernel event returned error");
1772 clReleaseEvent(evt);
1773 return -1;
1774 }
1775
1776 resultData = (cl_int *)malloc(sizeToAllocate);
1777
1778 error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, sizeToAllocate, resultData, 0, NULL, NULL);
1779 test_error( error, "clEnqueueReadBuffer failed");
1780
1781 for(i=0; i<(int)(numberOfInts); i++)
1782 if (localData[i] != resultData[i]) {
1783 clReleaseEvent(evt);
1784 free( localData );
1785 free(resultData);
1786 log_error("Results failed to verify.\n");
1787 return -1;
1788 }
1789 clReleaseEvent(evt);
1790 free( localData );
1791 free(resultData);
1792
1793 return err;
1794 }
1795
test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1796 int test_min_max_kernel_preferred_work_group_size_multiple(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1797 {
1798 int err;
1799 clProgramWrapper program;
1800 clKernelWrapper kernel;
1801
1802 size_t max_local_workgroup_size[3];
1803 size_t max_workgroup_size = 0, preferred_workgroup_size = 0;
1804
1805 err = create_single_kernel_helper(context, &program, &kernel, 1, sample_local_arg_kernel, "sample_test" );
1806 test_error(err, "Failed to build kernel/program.");
1807
1808 err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE,
1809 sizeof(max_workgroup_size), &max_workgroup_size, NULL);
1810 test_error(err, "clGetKernelWorkgroupInfo failed.");
1811
1812 err = clGetKernelWorkGroupInfo(kernel, deviceID, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
1813 sizeof(preferred_workgroup_size), &preferred_workgroup_size, NULL);
1814 test_error(err, "clGetKernelWorkgroupInfo failed.");
1815
1816 err = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_local_workgroup_size), max_local_workgroup_size, NULL);
1817 test_error(err, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
1818
1819 // Since the preferred size is only a performance hint, we can only really check that we get a sane value
1820 // back
1821 log_info( "size: %ld preferred: %ld max: %ld\n", max_workgroup_size, preferred_workgroup_size, max_local_workgroup_size[0] );
1822
1823 if( preferred_workgroup_size > max_workgroup_size )
1824 {
1825 log_error( "ERROR: Reported preferred workgroup multiple larger than max workgroup size (preferred %ld, max %ld)\n", preferred_workgroup_size, max_workgroup_size );
1826 return -1;
1827 }
1828
1829 return 0;
1830 }
1831
test_min_max_execution_capabilities(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1832 int test_min_max_execution_capabilities(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1833 {
1834 int error;
1835 cl_device_exec_capabilities value;
1836
1837
1838 error = clGetDeviceInfo( deviceID, CL_DEVICE_EXECUTION_CAPABILITIES, sizeof( value ), &value, 0 );
1839 test_error( error, "Unable to get execution capabilities" );
1840
1841 if( ( value & CL_EXEC_KERNEL ) != CL_EXEC_KERNEL )
1842 {
1843 log_error( "ERROR: Reported execution capabilities less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
1844 return -1;
1845 }
1846 return 0;
1847 }
1848
test_min_max_queue_properties(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1849 int test_min_max_queue_properties(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1850 {
1851 int error;
1852 cl_command_queue_properties value;
1853
1854
1855 error = clGetDeviceInfo( deviceID, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof( value ), &value, 0 );
1856 test_error( error, "Unable to get queue properties" );
1857
1858 if( ( value & CL_QUEUE_PROFILING_ENABLE ) != CL_QUEUE_PROFILING_ENABLE )
1859 {
1860 log_error( "ERROR: Reported queue properties less than required by OpenCL 1.0 (reported 0x%08x)\n", (int)value );
1861 return -1;
1862 }
1863 return 0;
1864 }
1865
test_min_max_device_version(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1866 int test_min_max_device_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1867 {
1868 // Query for the device version.
1869 Version device_cl_version = get_device_cl_version(deviceID);
1870 log_info("Returned version %s.\n", device_cl_version.to_string().c_str());
1871
1872 // Make sure 2.x devices support required extensions for 2.x
1873 // note: these extensions are **not** required for devices
1874 // supporting OpenCL-3.0
1875 const char *requiredExtensions2x[] = {
1876 "cl_khr_3d_image_writes",
1877 "cl_khr_image2d_from_buffer",
1878 "cl_khr_depth_images",
1879 };
1880
1881 // Make sure 1.1 devices support required extensions for 1.1
1882 const char *requiredExtensions11[] = {
1883 "cl_khr_global_int32_base_atomics",
1884 "cl_khr_global_int32_extended_atomics",
1885 "cl_khr_local_int32_base_atomics",
1886 "cl_khr_local_int32_extended_atomics",
1887 "cl_khr_byte_addressable_store",
1888 };
1889
1890
1891 if (device_cl_version >= Version(1, 1))
1892 {
1893 log_info("Checking for required extensions for OpenCL 1.1 and later "
1894 "devices...\n");
1895 for (int i = 0; i < ARRAY_SIZE(requiredExtensions11); i++)
1896 {
1897 if (!is_extension_available(deviceID, requiredExtensions11[i]))
1898 {
1899 log_error("ERROR: Required extension for 1.1 and greater "
1900 "devices is not in extension string: %s\n",
1901 requiredExtensions11[i]);
1902 return -1;
1903 }
1904 else
1905 log_info("\t%s\n", requiredExtensions11[i]);
1906 }
1907
1908 if (device_cl_version >= Version(1, 2))
1909 {
1910 log_info("Checking for required extensions for OpenCL 1.2 and "
1911 "later devices...\n");
1912 // The only required extension for an OpenCL-1.2 device is
1913 // cl_khr_fp64 and it is only required if double precision is
1914 // supported.
1915 cl_device_fp_config doubles_supported;
1916 cl_int error = clGetDeviceInfo(deviceID, CL_DEVICE_DOUBLE_FP_CONFIG,
1917 sizeof(doubles_supported),
1918 &doubles_supported, 0);
1919 test_error(error, "Unable to get device double fp config");
1920 if (doubles_supported)
1921 {
1922 if (!is_extension_available(deviceID, "cl_khr_fp64"))
1923 {
1924 log_error(
1925 "ERROR: Required extension for 1.2 and greater devices "
1926 "is not in extension string: cl_khr_fp64\n");
1927 }
1928 else
1929 {
1930 log_info("\t%s\n", "cl_khr_fp64");
1931 }
1932 }
1933 }
1934
1935 if (device_cl_version >= Version(2, 0)
1936 && device_cl_version < Version(3, 0))
1937 {
1938 log_info("Checking for required extensions for OpenCL 2.0, 2.1 and "
1939 "2.2 devices...\n");
1940 for (int i = 0; i < ARRAY_SIZE(requiredExtensions2x); i++)
1941 {
1942 if (!is_extension_available(deviceID, requiredExtensions2x[i]))
1943 {
1944 log_error("ERROR: Required extension for 2.0, 2.1 and 2.2 "
1945 "devices is not in extension string: %s\n",
1946 requiredExtensions2x[i]);
1947 return -1;
1948 }
1949 else
1950 {
1951 log_info("\t%s\n", requiredExtensions2x[i]);
1952 }
1953 }
1954 }
1955 }
1956 else
1957 log_info("WARNING: skipping required extension test -- OpenCL 1.0 "
1958 "device.\n");
1959 return 0;
1960 }
1961
test_min_max_language_version(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1962 int test_min_max_language_version(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1963 {
1964 cl_int error;
1965 cl_char buffer[ 4098 ];
1966 size_t length;
1967
1968 // Device version should fit the regex "OpenCL [0-9]+\.[0-9]+ *.*"
1969 error = clGetDeviceInfo( deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof( buffer ), buffer, &length );
1970 test_error( error, "Unable to get device opencl c version string" );
1971 if( memcmp( buffer, "OpenCL C ", strlen( "OpenCL C " ) ) != 0 )
1972 {
1973 log_error( "ERROR: Initial part of device language version string does not match required format! (returned: \"%s\")\n", (char *)buffer );
1974 return -1;
1975 }
1976
1977 log_info("Returned version \"%s\".\n", buffer);
1978
1979 char *p1 = (char *)buffer + strlen( "OpenCL C " );
1980 while( *p1 == ' ' )
1981 p1++;
1982 char *p2 = p1;
1983 if( ! isdigit(*p2) )
1984 {
1985 log_error( "ERROR: Major revision number must follow space behind OpenCL C! (returned %s)\n", (char*) buffer );
1986 return -1;
1987 }
1988 while( isdigit( *p2 ) )
1989 p2++;
1990 if( *p2 != '.' )
1991 {
1992 log_error( "ERROR: Version number must contain a decimal point! (returned: %s)\n", (char *)buffer );
1993 return -1;
1994 }
1995 char *p3 = p2 + 1;
1996 if( ! isdigit(*p3) )
1997 {
1998 log_error( "ERROR: Minor revision number is missing or does not abut the decimal point! (returned %s)\n", (char*) buffer );
1999 return -1;
2000 }
2001 while( isdigit( *p3 ) )
2002 p3++;
2003 if( *p3 != ' ' )
2004 {
2005 log_error( "ERROR: A space must appear after the minor version! (returned: %s)\n", (char *)buffer );
2006 return -1;
2007 }
2008 *p2 = ' '; // Put in a space for atoi below.
2009 p2++;
2010
2011 int major = atoi( p1 );
2012 int minor = atoi( p2 );
2013 int minor_revision = 2;
2014
2015 if( major * 10 + minor < 10 + minor_revision )
2016 {
2017 // If the language version did not match, check to see if OPENCL_1_0_DEVICE is set.
2018 if( getenv("OPENCL_1_0_DEVICE"))
2019 {
2020 log_info( "WARNING: This test was run with OPENCL_1_0_DEVICE defined! This is not a OpenCL 1.1 or OpenCL 1.2 compatible device!!!\n" );
2021 }
2022 else if( getenv("OPENCL_1_1_DEVICE"))
2023 {
2024 log_info( "WARNING: This test was run with OPENCL_1_1_DEVICE defined! This is not a OpenCL 1.2 compatible device!!!\n" );
2025 }
2026 else
2027 {
2028 log_error( "ERROR: OpenCL device language version returned is less than 1.%d! (Returned: %s)\n", minor_revision, (char *)buffer );
2029 return -1;
2030 }
2031 }
2032
2033 // Sanity checks on the returned values
2034 if( length != (strlen( (char *)buffer ) + 1 ))
2035 {
2036 log_error( "ERROR: Returned length of version string does not match actual length (actual: %d, returned: %d)\n", (int)strlen( (char *)buffer ), (int)length );
2037 return -1;
2038 }
2039
2040 return 0;
2041 }
2042
2043