1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 #ifndef _WIN32
19 #include <unistd.h>
20 #endif
21
22 #define INT_TEST_VALUE 402258822
23 #define LONG_TEST_VALUE 515154531254381446LL
24
25
26 const char *atomic_global_pattern[] = {
27 "__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 ,
32 "\n"
33 "}\n" };
34
35 const char *atomic_local_pattern[] = {
36 "__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
37 "{\n"
38 " int tid = get_global_id(0);\n"
39 " int dstItemIdx;\n"
40 "\n"
41 " // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
42 " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
43 " destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
44 " barrier( CLK_LOCAL_MEM_FENCE );\n"
45 "\n"
46 ,
47 " barrier( CLK_LOCAL_MEM_FENCE );\n"
48 " // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
49 " for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
50 " finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
51 "}\n" };
52
53
54 #define TEST_COUNT 128 * 1024
55
56
57 struct TestFns
58 {
59 cl_int mIntStartValue;
60 cl_long mLongStartValue;
61
62 size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
63
64 // Integer versions
65 cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
66 void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
67 bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
68
69 // Long versions
70 cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
71 void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
72 bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
73
74 // Float versions
75 cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
76 void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
77 bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
78 };
79
check_atomic_support(cl_device_id device,bool extended,bool isLocal,ExplicitType dataType)80 bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
81 {
82 const char *extensionNames[8] = {
83 "cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
84 "cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
85 "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
86 "cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
87 };
88
89 size_t index = 0;
90 if( extended )
91 index += 1;
92 if( isLocal )
93 index += 2;
94
95 Version version = get_device_cl_version(device);
96
97 switch (dataType)
98 {
99 case kInt:
100 case kUInt:
101 if( version >= Version(1,1) )
102 return 1;
103 break;
104 case kLong:
105 case kULong:
106 index += 4;
107 break;
108 case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
109 return version >= Version(1,1);
110 default:
111 log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
112 return 0;
113 }
114
115 return is_extension_available( device, extensionNames[index] );
116 }
117
test_atomic_function(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const char * programCore,TestFns testFns,bool extended,bool isLocal,ExplicitType dataType,bool matchGroupSize)118 int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
119 TestFns testFns,
120 bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
121 {
122 clProgramWrapper program;
123 clKernelWrapper kernel;
124 int error;
125 size_t threads[1];
126 clMemWrapper streams[2];
127 void *refValues, *startRefValues;
128 size_t threadSize, groupSize;
129 const char *programLines[4];
130 char pragma[ 512 ];
131 char programHeader[ 512 ];
132 MTdata d;
133 size_t typeSize = get_explicit_type_size( dataType );
134
135
136 // Verify we can run first
137 bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
138 if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
139 {
140 // Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
141 if( dataType == kFloat )
142 log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
143 else if( !isUnsigned )
144 log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
145 // Since we don't support the operation, they implicitly pass
146 return 0;
147 }
148 else
149 {
150 if( dataType == kFloat )
151 log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
152 else
153 log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
154 (int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
155 }
156
157 //// Set up the kernel code
158
159 // Create the pragma line for this kernel
160 bool isLong = ( dataType == kLong || dataType == kULong );
161 sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
162 isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
163 extended ? "extended" : "base" );
164
165 // Now create the program header
166 const char *typeName = get_explicit_type_name( dataType );
167 if( isLocal )
168 sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
169 else
170 sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
171
172 // Set up our entire program now
173 programLines[ 0 ] = pragma;
174 programLines[ 1 ] = programHeader;
175 programLines[ 2 ] = programCore;
176 programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
177
178 if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
179 {
180 return -1;
181 }
182
183 //// Set up to actually run
184 threadSize = num_elements;
185
186 error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
187 test_error( error, "Unable to get thread group max size" );
188
189 if( matchGroupSize )
190 // HACK because xchg and cmpxchg apparently are limited by hardware
191 threadSize = groupSize;
192
193 if( isLocal )
194 {
195 size_t maxSizes[3] = {0, 0, 0};
196 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
197 test_error( error, "Unable to obtain max work item sizes for the device" );
198
199 size_t workSize;
200 error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
201 test_error( error, "Unable to obtain max work group size for device and kernel combo" );
202
203 // "workSize" is limited to that of the first dimension as only a 1DRange is executed.
204 if( maxSizes[0] < workSize )
205 {
206 workSize = maxSizes[0];
207 }
208
209 threadSize = groupSize = workSize;
210 }
211
212
213 log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
214
215 refValues = (cl_int *)malloc( typeSize * threadSize );
216
217 if( testFns.GenerateRefsIntFn != NULL )
218 {
219 // We have a ref generator provided
220 d = init_genrand( gRandomSeed );
221 startRefValues = malloc( typeSize * threadSize );
222 if( typeSize == 4 )
223 testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
224 else
225 testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
226 free_mtdata(d);
227 d = NULL;
228 }
229 else
230 startRefValues = NULL;
231
232 // If we're given a num_results function, we need to determine how many result objects we need. If
233 // we don't have it, we assume it's just 1
234 size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
235
236 char * destItems = new char[ typeSize * numDestItems ];
237 if( destItems == NULL )
238 {
239 log_error( "ERROR: Unable to allocate memory!\n" );
240 return -1;
241 }
242 void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
243 for( size_t i = 0; i < numDestItems; i++ )
244 memcpy( destItems + i * typeSize, startValue, typeSize );
245
246 streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
247 typeSize * numDestItems, destItems, NULL);
248 if (!streams[0])
249 {
250 log_error("ERROR: Creating output array failed!\n");
251 return -1;
252 }
253 streams[1] = clCreateBuffer(
254 context,
255 ((startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
256 typeSize * threadSize, startRefValues, NULL);
257 if (!streams[1])
258 {
259 log_error("ERROR: Creating reference array failed!\n");
260 return -1;
261 }
262
263 /* Set the arguments */
264 error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
265 test_error( error, "Unable to set indexed kernel arguments" );
266 error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
267 test_error( error, "Unable to set indexed kernel arguments" );
268
269 if( isLocal )
270 {
271 error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
272 test_error( error, "Unable to set indexed local kernel argument" );
273
274 cl_int numDestItemsInt = (cl_int)numDestItems;
275 error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
276 test_error( error, "Unable to set indexed kernel argument" );
277 }
278
279 /* Run the kernel */
280 threads[0] = threadSize;
281 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
282 test_error( error, "Unable to execute test kernel" );
283
284 error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
285 test_error( error, "Unable to read result value!" );
286
287 error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
288 test_error( error, "Unable to read reference values!" );
289
290 // If we have an expectedFn, then we need to generate a final value to compare against. If we don't
291 // have one, it's because we're comparing ref values only
292 if( testFns.ExpectedValueIntFn != NULL )
293 {
294 for( size_t i = 0; i < numDestItems; i++ )
295 {
296 char expected[ 8 ];
297 cl_int intVal;
298 cl_long longVal;
299 if( typeSize == 4 )
300 {
301 // Int version
302 intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
303 memcpy( expected, &intVal, sizeof( intVal ) );
304 }
305 else
306 {
307 // Long version
308 longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
309 memcpy( expected, &longVal, sizeof( longVal ) );
310 }
311
312 if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
313 {
314 if( typeSize == 4 )
315 {
316 cl_int *outValue = (cl_int *)( destItems + i * typeSize );
317 log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
318 cl_int *startRefs = (cl_int *)startRefValues;
319 cl_int *refs = (cl_int *)refValues;
320 for( i = 0; i < threadSize; i++ )
321 {
322 if( startRefs != NULL )
323 log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
324 else
325 log_info( " --- %ld --- %d\n", i, refs[i] );
326 }
327 }
328 else
329 {
330 cl_long *outValue = (cl_long *)( destItems + i * typeSize );
331 log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
332 cl_long *startRefs = (cl_long *)startRefValues;
333 cl_long *refs = (cl_long *)refValues;
334 for( i = 0; i < threadSize; i++ )
335 {
336 if( startRefs != NULL )
337 log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
338 else
339 log_info( " --- %ld --- %lld\n", i, refs[i] );
340 }
341 }
342 return -1;
343 }
344 }
345 }
346
347 if( testFns.VerifyRefsIntFn != NULL )
348 {
349 /* Use the verify function to also check the results */
350 if( dataType == kFloat )
351 {
352 cl_float *outValue = (cl_float *)destItems;
353 if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
354 {
355 log_error( "ERROR: Reference values did not validate!\n" );
356 return -1;
357 }
358 }
359 else if( typeSize == 4 )
360 {
361 cl_int *outValue = (cl_int *)destItems;
362 if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
363 {
364 log_error( "ERROR: Reference values did not validate!\n" );
365 return -1;
366 }
367 }
368 else
369 {
370 cl_long *outValue = (cl_long *)destItems;
371 if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
372 {
373 log_error( "ERROR: Reference values did not validate!\n" );
374 return -1;
375 }
376 }
377 }
378 else if( testFns.ExpectedValueIntFn == NULL )
379 {
380 log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
381 return -1;
382 }
383
384
385 /* Re-write the starting value */
386 for( size_t i = 0; i < numDestItems; i++ )
387 memcpy( destItems + i * typeSize, startValue, typeSize );
388 error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
389 test_error( error, "Unable to write starting values!" );
390
391 /* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
392 threads[0] = 1;
393 error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
394 test_error( error, "Unable to execute test kernel" );
395
396 error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
397 test_error( error, "Unable to read reference values!" );
398
399 if( memcmp( refValues, destItems, typeSize ) != 0 )
400 {
401 if( typeSize == 4 )
402 {
403 cl_int *s = (cl_int *)destItems;
404 cl_int *r = (cl_int *)refValues;
405 log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
406 " (should have been %d, returned %d)!\n", *s, *r );
407 }
408 else
409 {
410 cl_long *s = (cl_long *)destItems;
411 cl_long *r = (cl_long *)refValues;
412 log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
413 " (should have been %lld, returned %lld)!\n", *s, *r );
414 }
415 return -1;
416 }
417
418 delete [] destItems;
419 free( refValues );
420 if( startRefValues != NULL )
421 free( startRefValues );
422
423 return 0;
424 }
425
test_atomic_function_set(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,const char * programCore,TestFns testFns,bool extended,bool matchGroupSize,bool usingAtomicPrefix)426 int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
427 TestFns testFns,
428 bool extended, bool matchGroupSize, bool usingAtomicPrefix )
429 {
430 log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
431
432 int errors = 0;
433 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
434 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
435 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
436 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
437
438 // Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
439 // The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
440 if (!usingAtomicPrefix) {
441 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
442 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
443 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
444 errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
445 }
446
447 return errors;
448 }
449
450 #pragma mark ---- add
451
452 const char atom_add_core[] =
453 " oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
454 " atom_add( &destMemory[0], tid + 3 );\n"
455 " atom_add( &destMemory[0], tid + 3 );\n"
456 " atom_add( &destMemory[0], tid + 3 );\n";
457
458 const char atomic_add_core[] =
459 " oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
460 " atomic_add( &destMemory[0], tid + 3 );\n"
461 " atomic_add( &destMemory[0], tid + 3 );\n"
462 " atomic_add( &destMemory[0], tid + 3 );\n";
463
test_atomic_add_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)464 cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
465 {
466 cl_int total = 0;
467 for( size_t i = 0; i < size; i++ )
468 total += ( (cl_int)i + 3 ) * 4;
469 return total;
470 }
471
test_atomic_add_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)472 cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
473 {
474 cl_long total = 0;
475 for( size_t i = 0; i < size; i++ )
476 total += ( ( i + 3 ) * 4 );
477 return total;
478 }
479
test_atomic_add(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)480 int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
481 {
482 TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
483
484 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
485 return -1;
486 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
487 return -1;
488 return 0;
489 }
490
491 #pragma mark ---- sub
492
493 const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
494
495 const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
496
test_atomic_sub_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)497 cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
498 {
499 cl_int total = INT_TEST_VALUE;
500 for( size_t i = 0; i < size; i++ )
501 total -= (cl_int)i + 3;
502 return total;
503 }
504
test_atomic_sub_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)505 cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
506 {
507 cl_long total = LONG_TEST_VALUE;
508 for( size_t i = 0; i < size; i++ )
509 total -= i + 3;
510 return total;
511 }
512
test_atomic_sub(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)513 int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
514 {
515 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
516
517 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
518 return -1;
519 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
520 return -1;
521 return 0;
522 }
523
524 #pragma mark ---- xchg
525
526 const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
527
528 const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
529 const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
530
test_atomic_xchg_verify_int(size_t size,cl_int * refValues,cl_int finalValue)531 bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
532 {
533 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
534 char *valids;
535 size_t i;
536 char originalValidCount = 0;
537
538 valids = (char *)malloc( sizeof( char ) * size );
539 memset( valids, 0, sizeof( char ) * size );
540
541 for( i = 0; i < size; i++ )
542 {
543 if( refValues[ i ] == INT_TEST_VALUE )
544 {
545 // Special initial value
546 originalValidCount++;
547 continue;
548 }
549 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
550 {
551 log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
552 return false;
553 }
554 valids[ refValues[ i ] ] ++;
555 }
556
557 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
558 the final value outputted */
559 if( valids[ finalValue ] > 0 )
560 {
561 log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
562 return false;
563 }
564 else
565 valids[ finalValue ] = 1; // So the following loop will be okay
566
567 /* Now check that every entry has one and only one count */
568 if( originalValidCount != 1 )
569 {
570 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
571 return false;
572 }
573 for( i = 0; i < size; i++ )
574 {
575 if( valids[ i ] != 1 )
576 {
577 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
578 for( size_t j = 0; j < size; j++ )
579 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
580 return false;
581 }
582 }
583
584 free( valids );
585 return true;
586 }
587
test_atomic_xchg_verify_long(size_t size,cl_long * refValues,cl_long finalValue)588 bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
589 {
590 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
591 char *valids;
592 size_t i;
593 char originalValidCount = 0;
594
595 valids = (char *)malloc( sizeof( char ) * size );
596 memset( valids, 0, sizeof( char ) * size );
597
598 for( i = 0; i < size; i++ )
599 {
600 if( refValues[ i ] == LONG_TEST_VALUE )
601 {
602 // Special initial value
603 originalValidCount++;
604 continue;
605 }
606 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
607 {
608 log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
609 return false;
610 }
611 valids[ refValues[ i ] ] ++;
612 }
613
614 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
615 the final value outputted */
616 if( valids[ finalValue ] > 0 )
617 {
618 log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
619 return false;
620 }
621 else
622 valids[ finalValue ] = 1; // So the following loop will be okay
623
624 /* Now check that every entry has one and only one count */
625 if( originalValidCount != 1 )
626 {
627 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
628 return false;
629 }
630 for( i = 0; i < size; i++ )
631 {
632 if( valids[ i ] != 1 )
633 {
634 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
635 for( size_t j = 0; j < size; j++ )
636 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
637 return false;
638 }
639 }
640
641 free( valids );
642 return true;
643 }
644
test_atomic_xchg_verify_float(size_t size,cl_float * refValues,cl_float finalValue)645 bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
646 {
647 /* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
648 char *valids;
649 size_t i;
650 char originalValidCount = 0;
651
652 valids = (char *)malloc( sizeof( char ) * size );
653 memset( valids, 0, sizeof( char ) * size );
654
655 for( i = 0; i < size; i++ )
656 {
657 cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
658 if( *intRefValue == INT_TEST_VALUE )
659 {
660 // Special initial value
661 originalValidCount++;
662 continue;
663 }
664 if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
665 {
666 log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
667 return false;
668 }
669 valids[ (int)refValues[ i ] ] ++;
670 }
671
672 /* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
673 the final value outputted */
674 if( valids[ (int)finalValue ] > 0 )
675 {
676 log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
677 return false;
678 }
679 else
680 valids[ (int)finalValue ] = 1; // So the following loop will be okay
681
682 /* Now check that every entry has one and only one count */
683 if( originalValidCount != 1 )
684 {
685 log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
686 return false;
687 }
688 for( i = 0; i < size; i++ )
689 {
690 if( valids[ i ] != 1 )
691 {
692 log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
693 for( size_t j = 0; j < size; j++ )
694 log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
695 return false;
696 }
697 }
698
699 free( valids );
700 return true;
701 }
702
test_atomic_xchg(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)703 int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
704 {
705 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
706
707 int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
708 errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
709
710 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
711 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
712
713 return errors;
714 }
715
716
717 #pragma mark ---- min
718
719 const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
720
721 const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
722
test_atomic_min_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)723 cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
724 {
725 cl_int total = 0x7fffffffL;
726 for( size_t i = 0; i < size; i++ )
727 {
728 if( startRefValues[ i ] < total )
729 total = startRefValues[ i ];
730 }
731 return total;
732 }
733
test_atomic_min_gen_int(size_t size,cl_int * startRefValues,MTdata d)734 void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
735 {
736 for( size_t i = 0; i < size; i++ )
737 startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
738 }
739
test_atomic_min_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)740 cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
741 {
742 cl_long total = 0x7fffffffffffffffLL;
743 for( size_t i = 0; i < size; i++ )
744 {
745 if( startRefValues[ i ] < total )
746 total = startRefValues[ i ];
747 }
748 return total;
749 }
750
test_atomic_min_gen_long(size_t size,cl_long * startRefValues,MTdata d)751 void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
752 {
753 for( size_t i = 0; i < size; i++ )
754 startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
755 }
756
test_atomic_min(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)757 int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
758 {
759 TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
760
761 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
762 return -1;
763 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
764 return -1;
765 return 0;
766 }
767
768
769 #pragma mark ---- max
770
771 const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
772
773 const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
774
test_atomic_max_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)775 cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
776 {
777 cl_int total = 0;
778 for( size_t i = 0; i < size; i++ )
779 {
780 if( startRefValues[ i ] > total )
781 total = startRefValues[ i ];
782 }
783 return total;
784 }
785
test_atomic_max_gen_int(size_t size,cl_int * startRefValues,MTdata d)786 void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
787 {
788 for( size_t i = 0; i < size; i++ )
789 startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
790 }
791
test_atomic_max_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)792 cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
793 {
794 cl_long total = 0;
795 for( size_t i = 0; i < size; i++ )
796 {
797 if( startRefValues[ i ] > total )
798 total = startRefValues[ i ];
799 }
800 return total;
801 }
802
test_atomic_max_gen_long(size_t size,cl_long * startRefValues,MTdata d)803 void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
804 {
805 for( size_t i = 0; i < size; i++ )
806 startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
807 }
808
test_atomic_max(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)809 int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
810 {
811 TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
812
813 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
814 return -1;
815 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
816 return -1;
817 return 0;
818 }
819
820
821 #pragma mark ---- inc
822
823 const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
824
825 const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
826
test_atomic_inc_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)827 cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
828 {
829 return INT_TEST_VALUE + (cl_int)size;
830 }
831
test_atomic_inc_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)832 cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
833 {
834 return LONG_TEST_VALUE + size;
835 }
836
test_atomic_inc(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)837 int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
838 {
839 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
840
841 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
842 return -1;
843 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
844 return -1;
845 return 0;
846 }
847
848
849 #pragma mark ---- dec
850
851 const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
852
853 const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
854
test_atomic_dec_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)855 cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
856 {
857 return INT_TEST_VALUE - (cl_int)size;
858 }
859
test_atomic_dec_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)860 cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
861 {
862 return LONG_TEST_VALUE - size;
863 }
864
test_atomic_dec(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)865 int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
866 {
867 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
868
869 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
870 return -1;
871 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
872 return -1;
873 return 0;
874 }
875
876
877 #pragma mark ---- cmpxchg
878
879 /* We test cmpxchg by implementing (the long way) atom_add */
880 const char atom_cmpxchg_core[] =
881 " int oldValue, origValue, newValue;\n"
882 " do { \n"
883 " origValue = destMemory[0];\n"
884 " newValue = origValue + tid + 2;\n"
885 " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
886 " } while( oldValue != origValue );\n"
887 " oldValues[tid] = oldValue;\n"
888 ;
889
890 const char atom_cmpxchg64_core[] =
891 " long oldValue, origValue, newValue;\n"
892 " do { \n"
893 " origValue = destMemory[0];\n"
894 " newValue = origValue + tid + 2;\n"
895 " oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
896 " } while( oldValue != origValue );\n"
897 " oldValues[tid] = oldValue;\n"
898 ;
899
900 const char atomic_cmpxchg_core[] =
901 " int oldValue, origValue, newValue;\n"
902 " do { \n"
903 " origValue = destMemory[0];\n"
904 " newValue = origValue + tid + 2;\n"
905 " oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
906 " } while( oldValue != origValue );\n"
907 " oldValues[tid] = oldValue;\n"
908 ;
909
test_atomic_cmpxchg_result_int(size_t size,cl_int * startRefValues,size_t whichDestValue)910 cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
911 {
912 cl_int total = INT_TEST_VALUE;
913 for( size_t i = 0; i < size; i++ )
914 total += (cl_int)i + 2;
915 return total;
916 }
917
test_atomic_cmpxchg_result_long(size_t size,cl_long * startRefValues,size_t whichDestValue)918 cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
919 {
920 cl_long total = LONG_TEST_VALUE;
921 for( size_t i = 0; i < size; i++ )
922 total += i + 2;
923 return total;
924 }
925
test_atomic_cmpxchg(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)926 int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
927 {
928 TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
929
930 int errors = 0;
931
932 log_info(" Testing atom_ functions...\n");
933 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
934 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
935 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
936 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
937
938 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
939 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
940 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
941 errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
942
943 log_info(" Testing atomic_ functions...\n");
944 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
945 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
946 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
947 errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
948
949 if( errors )
950 return -1;
951
952 return 0;
953 }
954
955 #pragma mark -------- Bitwise functions
956
test_bitwise_num_results(size_t threadCount,ExplicitType dataType)957 size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
958 {
959 size_t numBits = get_explicit_type_size( dataType ) * 8;
960
961 return ( threadCount + numBits - 1 ) / numBits;
962 }
963
964 #pragma mark ---- and
965
966 const char atom_and_core[] =
967 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
968 " int whichResult = tid / numBits;\n"
969 " int bitIndex = tid - ( whichResult * numBits );\n"
970 "\n"
971 " oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
972 ;
973
974 const char atomic_and_core[] =
975 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
976 " int whichResult = tid / numBits;\n"
977 " int bitIndex = tid - ( whichResult * numBits );\n"
978 "\n"
979 " oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
980 ;
981
982
test_atomic_and_result_int(size_t size,cl_int * startRefValues,size_t whichResult)983 cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
984 {
985 size_t numThreads = ( (size_t)size + 31 ) / 32;
986 if( whichResult < numThreads - 1 )
987 return 0;
988
989 // Last item doesn't get and'ed on every bit, so we have to mask away
990 size_t numBits = (size_t)size - whichResult * 32;
991 cl_int bits = (cl_int)0xffffffffL;
992 for( size_t i = 0; i < numBits; i++ )
993 bits &= ~( 1 << i );
994
995 return bits;
996 }
997
test_atomic_and_result_long(size_t size,cl_long * startRefValues,size_t whichResult)998 cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
999 {
1000 size_t numThreads = ( (size_t)size + 63 ) / 64;
1001 if( whichResult < numThreads - 1 )
1002 return 0;
1003
1004 // Last item doesn't get and'ed on every bit, so we have to mask away
1005 size_t numBits = (size_t)size - whichResult * 64;
1006 cl_long bits = (cl_long)0xffffffffffffffffLL;
1007 for (size_t i = 0; i < numBits; i++) bits &= ~(1LL << i);
1008
1009 return bits;
1010 }
1011
test_atomic_and(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1012 int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1013 {
1014 TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
1015 test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
1016
1017 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1018 return -1;
1019 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1020 return -1;
1021 return 0;
1022 }
1023
1024
1025 #pragma mark ---- or
1026
1027 const char atom_or_core[] =
1028 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1029 " int whichResult = tid / numBits;\n"
1030 " int bitIndex = tid - ( whichResult * numBits );\n"
1031 "\n"
1032 " oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
1033 ;
1034
1035 const char atomic_or_core[] =
1036 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1037 " int whichResult = tid / numBits;\n"
1038 " int bitIndex = tid - ( whichResult * numBits );\n"
1039 "\n"
1040 " oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
1041 ;
1042
test_atomic_or_result_int(size_t size,cl_int * startRefValues,size_t whichResult)1043 cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
1044 {
1045 size_t numThreads = ( (size_t)size + 31 ) / 32;
1046 if( whichResult < numThreads - 1 )
1047 return 0xffffffff;
1048
1049 // Last item doesn't get and'ed on every bit, so we have to mask away
1050 size_t numBits = (size_t)size - whichResult * 32;
1051 cl_int bits = 0;
1052 for( size_t i = 0; i < numBits; i++ )
1053 bits |= ( 1 << i );
1054
1055 return bits;
1056 }
1057
test_atomic_or_result_long(size_t size,cl_long * startRefValues,size_t whichResult)1058 cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
1059 {
1060 size_t numThreads = ( (size_t)size + 63 ) / 64;
1061 if( whichResult < numThreads - 1 )
1062 return 0x0ffffffffffffffffLL;
1063
1064 // Last item doesn't get and'ed on every bit, so we have to mask away
1065 size_t numBits = (size_t)size - whichResult * 64;
1066 cl_long bits = 0;
1067 for( size_t i = 0; i < numBits; i++ )
1068 bits |= ( 1LL << i );
1069
1070 return bits;
1071 }
1072
test_atomic_or(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1073 int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1074 {
1075 TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
1076
1077 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1078 return -1;
1079 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1080 return -1;
1081 return 0;
1082 }
1083
1084
1085 #pragma mark ---- xor
1086
1087 const char atom_xor_core[] =
1088 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1089 " int bitIndex = tid & ( numBits - 1 );\n"
1090 "\n"
1091 " oldValues[tid] = atom_xor( &destMemory[0], 1L << bitIndex );\n";
1092
1093 const char atomic_xor_core[] =
1094 " size_t numBits = sizeof( destMemory[0] ) * 8;\n"
1095 " int bitIndex = tid & ( numBits - 1 );\n"
1096 "\n"
1097 " oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
1098
test_atomic_xor_result_int(size_t size,cl_int * startRefValues,size_t whichResult)1099 cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
1100 {
1101 cl_int total = 0x2f08ab41;
1102 for( size_t i = 0; i < size; i++ )
1103 total ^= ( 1 << ( i & 31 ) );
1104 return total;
1105 }
1106
test_atomic_xor_result_long(size_t size,cl_long * startRefValues,size_t whichResult)1107 cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
1108 {
1109 cl_long total = 0x2f08ab418ba0541LL;
1110 for( size_t i = 0; i < size; i++ )
1111 total ^= ( 1LL << ( i & 63 ) );
1112 return total;
1113 }
1114
test_atomic_xor(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1115 int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1116 {
1117 TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
1118
1119 if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
1120 return -1;
1121 if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
1122 return -1;
1123 return 0;
1124 }
1125
1126
1127
1128
1129