1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 #include "harness/conversions.h"
25 #include "harness/ThreadPool.h"
26 
27 #define NUM_TESTS 23
28 
29 #define  LONG_MATH_SHIFT_SIZE 26
30 #define QUICK_MATH_SHIFT_SIZE 16
31 
32 static const char *kernel_code =
33 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
34 "{\n"
35 "    int  tid = get_global_id(0);\n"
36 "\n"
37 "    dst[tid] = srcA[tid] %s srcB[tid];\n"
38 "}\n";
39 
40 static const char *kernel_code_V3 =
41 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
42 "{\n"
43 "    int  tid = get_global_id(0);\n"
44 "\n"
45 "    vstore3( vload3( tid, srcA ) %s vload3( tid, srcB), tid, dst );\n"
46 "}\n";
47 
48 static const char *kernel_code_V3_scalar_vector =
49 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
50 "{\n"
51 "    int  tid = get_global_id(0);\n"
52 "\n"
53 "    vstore3( srcA[tid] %s vload3( tid, srcB), tid, dst );\n"
54 "}\n";
55 
56 static const char *kernel_code_V3_vector_scalar =
57 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
58 "{\n"
59 "    int  tid = get_global_id(0);\n"
60 "\n"
61 "    vstore3( vload3( tid, srcA ) %s srcB[tid], tid, dst );\n"
62 "}\n";
63 
64 
65 // Separate kernel here because it does not fit the pattern
66 static const char *not_kernel_code =
67 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
68 "{\n"
69 "    int  tid = get_global_id(0);\n"
70 "\n"
71 "    dst[tid] = %ssrcA[tid];\n"
72 "}\n";
73 
74 static const char *not_kernel_code_V3 =
75 "__kernel void test(__global %s /*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
76 "{\n"
77 "    int  tid = get_global_id(0);\n"
78 "\n"
79 "    vstore3( %s vload3( tid, srcA ), tid, dst );\n"
80 "}\n";
81 
82 static const char *kernel_code_scalar_shift =
83 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
84 "{\n"
85 "    int  tid = get_global_id(0);\n"
86 "\n"
87 "    dst[tid] = srcA[tid] %s srcB[tid]%s;\n"
88 "}\n";
89 
90 static const char *kernel_code_scalar_shift_V3 =
91 "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
92 "{\n"
93 "    int  tid = get_global_id(0);\n"
94 "\n"
95 "    vstore3( vload3( tid, srcA) %s vload3( tid, srcB )%s, tid, dst );\n"
96 "}\n";
97 
98 static const char *kernel_code_question_colon =
99 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *dst)\n"
100 "{\n"
101 "    int  tid = get_global_id(0);\n"
102 "\n"
103 "    dst[tid] = (srcA[tid]%s < srcB[tid]%s) ? srcA[tid] : srcB[tid];\n"
104 "}\n";
105 
106 static const char *kernel_code_question_colon_V3 =
107 "__kernel void test(__global %s/*%s*/ *srcA, __global %s/*%s*/ *srcB, __global %s/*%s*/ *dst)\n"
108 "{\n"
109 "    int  tid = get_global_id(0);\n"
110 "\n"
111 "    vstore3( (vload3( tid, srcA)%s < vload3(tid, srcB)%s) ? vload3( tid, srcA) : vload3( tid, srcB), tid, dst );\n"
112 "}\n";
113 
114 
115 
116 
117 // External verification and data generation functions
118 extern const char *tests[];
119 extern const char *test_names[];
120 extern int verify_long(int test, size_t vector_size, cl_long *inptrA, cl_long *inptrB, cl_long *outptr, size_t n);
121 extern void init_long_data(uint64_t indx, int num_elements, cl_long *input_ptr[], MTdata d) ;
122 extern int verify_ulong(int test, size_t vector_size, cl_ulong *inptrA, cl_ulong *inptrB, cl_ulong *outptr, size_t n);
123 extern void init_ulong_data(uint64_t indx, int num_elements, cl_ulong *input_ptr[], MTdata d) ;
124 extern int verify_int(int test, size_t vector_size, cl_int *inptrA, cl_int *inptrB, cl_int *outptr, size_t n);
125 extern void init_int_data(uint64_t indx, int num_elements, cl_int *input_ptr[], MTdata d) ;
126 extern int verify_uint(int test, size_t vector_size, cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n);
127 extern void init_uint_data(uint64_t indx, int num_elements, cl_uint *input_ptr[], MTdata d) ;
128 extern int verify_short(int test, size_t vector_size, cl_short *inptrA, cl_short *inptrB, cl_short *outptr, size_t n);
129 extern void init_short_data(uint64_t indx, int num_elements, cl_short *input_ptr[], MTdata d) ;
130 extern int verify_ushort(int test, size_t vector_size, cl_ushort *inptrA, cl_ushort *inptrB, cl_ushort *outptr, size_t n);
131 extern void init_ushort_data(uint64_t indx, int num_elements, cl_ushort *input_ptr[], MTdata d) ;
132 extern int verify_char(int test, size_t vector_size, cl_char *inptrA, cl_char *inptrB, cl_char *outptr, size_t n);
133 extern void init_char_data(uint64_t indx, int num_elements, cl_char *input_ptr[], MTdata d) ;
134 extern int verify_uchar(int test, size_t vector_size, cl_uchar *inptrA, cl_uchar *inptrB, cl_uchar *outptr, size_t n);
135 extern void init_uchar_data(uint64_t indx, int num_elements, cl_uchar *input_ptr[], MTdata d) ;
136 
137 // Supported type list
138 const ExplicitType types[] = {
139     kChar,
140     kUChar,
141     kShort,
142     kUShort,
143     kInt,
144     kUInt,
145     kLong,
146     kULong,
147 };
148 
149 enum TestStyle
150 {
151     kDontCare=0,
152     kBothVectors,
153     kInputAScalar,
154     kInputBScalar,
155     kVectorScalarScalar,    // for the ?: operator only; indicates vector ? scalar : scalar.
156     kInputCAlsoScalar = 0x80    // Or'ed flag to indicate that the selector for the ?: operator is also scalar
157 };
158 
159 typedef struct _perThreadData
160 {
161     cl_mem            m_streams[3];
162     cl_int            *m_input_ptr[2], *m_output_ptr;
163     size_t                      m_type_size;
164     cl_program                m_program[NUM_TESTS];
165     cl_kernel                m_kernel[NUM_TESTS];
166 } perThreadData;
167 
168 
perThreadDataNew()169 perThreadData * perThreadDataNew()
170 {
171     perThreadData * pThis = (perThreadData *)malloc(sizeof(perThreadData));
172 
173 
174     memset(pThis->m_program, 0, sizeof(cl_program)*NUM_TESTS);
175     memset(pThis->m_kernel, 0, sizeof(cl_kernel)*NUM_TESTS);
176 
177     pThis->m_input_ptr[0] = pThis->m_input_ptr[1] = NULL;
178     pThis->m_output_ptr = NULL;
179 
180     return pThis;
181 }
182 
183 
perThreadDataDestroy(perThreadData * pThis)184 void perThreadDataDestroy(perThreadData * pThis)
185 {
186     int                i;
187     // cleanup
188     clReleaseMemObject(pThis->m_streams[0]);
189     clReleaseMemObject(pThis->m_streams[1]);
190     clReleaseMemObject(pThis->m_streams[2]);
191     for (i=0; i<NUM_TESTS; i++)
192     {
193         if (pThis->m_kernel[i] != NULL) clReleaseKernel(pThis->m_kernel[i]);
194         if (pThis->m_program[i] != NULL) clReleaseProgram(pThis->m_program[i]);
195     }
196     free(pThis->m_input_ptr[0]);
197     free(pThis->m_input_ptr[1]);
198     free(pThis->m_output_ptr);
199 
200     free(pThis);
201 }
202 
203 
perThreadDataInit(perThreadData * pThis,ExplicitType type,int num_elements,int vectorSize,int inputAVecSize,int inputBVecSize,cl_context context,int start_test_ID,int end_test_ID,int testID)204 cl_int perThreadDataInit(perThreadData * pThis, ExplicitType type,
205                          int num_elements, int vectorSize,
206                          int inputAVecSize, int inputBVecSize,
207                          cl_context context, int start_test_ID,
208                          int end_test_ID, int testID)
209 {
210     int i;
211     const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
212 
213     const char *type_name = get_explicit_type_name(type);
214     pThis->m_type_size = get_explicit_type_size(type);
215     int err;
216     // Used for the && and || tests where the vector case returns a signed value
217     const char *signed_type_name;
218     switch (type) {
219         case kChar:
220         case kUChar:
221             signed_type_name = get_explicit_type_name(kChar);
222             break;
223         case kShort:
224         case kUShort:
225             signed_type_name = get_explicit_type_name(kShort);
226             break;
227         case kInt:
228         case kUInt:
229             signed_type_name = get_explicit_type_name(kInt);
230             break;
231         case kLong:
232         case kULong:
233             signed_type_name = get_explicit_type_name(kLong);
234             break;
235         default:
236             log_error("Invalid type.\n");
237             return -1;
238             break;
239     }
240 
241     pThis->m_input_ptr[0] =
242     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
243     pThis->m_input_ptr[1] =
244     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
245     pThis->m_output_ptr =
246     (cl_int*)malloc(pThis->m_type_size * num_elements * vectorSize);
247     pThis->m_streams[0] = clCreateBuffer(
248         context, CL_MEM_READ_WRITE,
249         pThis->m_type_size * num_elements * inputAVecSize, NULL, &err);
250 
251     test_error(err, "clCreateBuffer failed");
252 
253     pThis->m_streams[1] = clCreateBuffer(
254         context, CL_MEM_READ_WRITE,
255         pThis->m_type_size * num_elements * inputBVecSize, NULL, &err);
256 
257     test_error(err, "clCreateBuffer failed");
258 
259     pThis->m_streams[2] = clCreateBuffer(
260         context, CL_MEM_READ_WRITE,
261         pThis->m_type_size * num_elements * vectorSize, NULL, &err);
262 
263     test_error(err, "clCreateBuffer failed");
264 
265     const char *vectorString = sizeNames[ vectorSize ];
266     const char *inputAVectorString = sizeNames[ inputAVecSize ];
267     const char *inputBVectorString = sizeNames[ inputBVecSize ];
268 
269     if (testID == -1)
270     {
271         log_info("\tTesting %s%s (%d bytes)...\n", type_name, vectorString, (int)(pThis->m_type_size*vectorSize));
272     }
273 
274     char programString[4096];
275     const char *ptr;
276 
277 
278     const char * kernel_code_base = ( vectorSize != 3 ) ? kernel_code : ( inputAVecSize == 1 ) ? kernel_code_V3_scalar_vector : ( inputBVecSize == 1 ) ? kernel_code_V3_vector_scalar : kernel_code_V3;
279 
280     for (i=start_test_ID; i<end_test_ID; i++) {
281         switch (i) {
282             case 10:
283             case 11:
284                 sprintf(programString, vectorSize == 3 ? kernel_code_scalar_shift_V3 : kernel_code_scalar_shift, type_name, inputAVectorString, type_name, inputBVectorString,
285                         type_name, vectorString, tests[i], ((vectorSize == 1) ? "":".s0"));
286                 break;
287             case 12:
288                 sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
289                         type_name, vectorString, tests[i]);
290                 break;
291             case 13:
292                 sprintf(programString, vectorSize == 3 ? kernel_code_question_colon_V3 : kernel_code_question_colon,
293                         type_name, inputAVectorString, type_name, inputBVectorString,
294                         type_name, vectorString, ((vectorSize == 1) ? "":".s0"), ((vectorSize == 1) ? "":".s0")) ;
295                 break;
296             case 14:
297             case 15:
298             case 16:
299             case 17:
300             case 18:
301             case 19:
302             case 20:
303             case 21:
304                 // Need an unsigned result here for vector sizes > 1
305                 sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
306                         ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
307                 break;
308             case 22:
309                 // Need an unsigned result here for vector sizes > 1
310                 sprintf(programString, vectorSize == 3 ? not_kernel_code_V3 : not_kernel_code, type_name, inputAVectorString, type_name, inputBVectorString,
311                         ((vectorSize == 1) ? type_name : signed_type_name), vectorString, tests[i]);
312                 break;
313             default:
314                 sprintf(programString, kernel_code_base, type_name, inputAVectorString, type_name, inputBVectorString,
315                         type_name, vectorString, tests[i]);
316                 break;
317         }
318 
319         //printf("kernel: %s\n", programString);
320         ptr = programString;
321         err = create_single_kernel_helper( context,
322                                           &(pThis->m_program[ i ]),
323                                           &(pThis->m_kernel[ i ]), 1,
324                                           &ptr, "test" );
325         test_error( err, "Unable to create test kernel" );
326         err = clSetKernelArg(pThis->m_kernel[i], 0,
327                              sizeof pThis->m_streams[0],
328                              &(pThis->m_streams[0]) );
329         err |= clSetKernelArg(pThis->m_kernel[i], 1,
330                               sizeof pThis->m_streams[1],
331                               &(pThis->m_streams[1]) );
332         err |= clSetKernelArg(pThis->m_kernel[i], 2,
333                               sizeof pThis->m_streams[2],
334                               &(pThis->m_streams[2]) );
335         test_error(err, "clSetKernelArgs failed");
336     }
337 
338     return CL_SUCCESS;
339 }
340 
341 typedef struct _globalThreadData
342 {
343     cl_device_id     m_deviceID;
344     cl_context       m_context;
345     // cl_command_queue m_queue;
346     int              m_num_elements;
347     int              m_threadcount;
348     int              m_vectorSize;
349     int              m_num_runs_shift;
350     TestStyle        m_style;
351     ExplicitType     m_type;
352     MTdata *         m_pRandData;
353     uint64_t         m_offset;
354     int              m_testID;
355     perThreadData  **m_arrPerThreadData;
356 } globalThreadData;
357 
358 
359 
globalThreadDataNew(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID,int threadcount)360 globalThreadData * globalThreadDataNew(cl_device_id deviceID, cl_context context,
361                                        cl_command_queue queue, int num_elements,
362                                        int vectorSize, TestStyle style, int num_runs_shift,
363                                        ExplicitType type, int testID,
364                                        int threadcount)
365 {
366     int i;
367     globalThreadData * pThis = (globalThreadData *)malloc(sizeof(globalThreadData));
368     pThis->m_deviceID = deviceID;
369     pThis->m_context = context;
370     // pThis->m_queue = queue;
371     pThis->m_num_elements = num_elements;
372     pThis->m_num_runs_shift = num_runs_shift;
373     pThis->m_vectorSize = vectorSize;
374     pThis->m_style = style;
375     pThis->m_type = type;
376     pThis->m_offset = (uint64_t)0;
377     pThis->m_testID = testID;
378     pThis->m_arrPerThreadData = NULL;
379     pThis->m_threadcount = threadcount;
380 
381     pThis->m_pRandData = (MTdata *)malloc(threadcount*sizeof(MTdata));
382     pThis->m_arrPerThreadData = (perThreadData **)
383     malloc(threadcount*sizeof(perThreadData *));
384     for(i=0; i < threadcount; ++i)
385     {
386         pThis->m_pRandData[i] = init_genrand(i+1);
387         pThis->m_arrPerThreadData[i] = NULL;
388     }
389 
390     return pThis;
391 }
392 
globalThreadDataDestroy(globalThreadData * pThis)393 void globalThreadDataDestroy(globalThreadData * pThis)
394 {
395     int i;
396 
397     for(i=0; i < pThis->m_threadcount; ++i)
398     {
399         free_mtdata(pThis->m_pRandData[i]);
400         if(pThis->m_arrPerThreadData[i] != NULL)
401         {
402             perThreadDataDestroy(pThis->m_arrPerThreadData[i]);
403         }
404     }
405     free(pThis->m_arrPerThreadData);
406     free(pThis->m_pRandData);
407     free(pThis);
408 }
409 
410 int
411 test_integer_ops(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID, MTdata randIn, uint64_t startIndx, uint64_t endIndx,
412                  perThreadData ** ppThreadData);
413 
414 
test_integer_ops_do_thread(cl_uint job_id,cl_uint thread_id,void * userInfo)415 cl_int test_integer_ops_do_thread( cl_uint job_id, cl_uint thread_id, void *userInfo )
416 {
417     cl_int error; cl_int result;
418     globalThreadData * threadInfoGlobal = (globalThreadData *)userInfo;
419     cl_command_queue queue;
420 
421 #if THREAD_DEBUG
422     log_error("Thread %x (job %x) about to create command queue\n",
423               thread_id, job_id);
424 #endif
425 
426     queue =  clCreateCommandQueue (threadInfoGlobal->m_context,
427                                    threadInfoGlobal->m_deviceID,0,
428                                    &error);
429 
430     if(error != CL_SUCCESS)
431     {
432         log_error("Thread %x (job %x) could not create command queue\n",
433                   thread_id, job_id);
434         return error; // should we clean up the queue too?
435     }
436 
437 #if THREAD_DEBUG
438     log_error("Thread %x (job %x) created command queue\n",
439               thread_id, job_id);
440 #endif
441 
442     result = test_integer_ops(  threadInfoGlobal->m_deviceID,
443                               threadInfoGlobal->m_context,
444                               queue,
445                               threadInfoGlobal->m_num_elements,
446                               threadInfoGlobal->m_vectorSize, threadInfoGlobal->m_style,
447                               threadInfoGlobal->m_num_runs_shift,
448                               threadInfoGlobal->m_type, threadInfoGlobal->m_testID,
449                               threadInfoGlobal->m_pRandData[thread_id],
450                               threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*job_id,
451                               threadInfoGlobal->m_offset + threadInfoGlobal->m_num_elements*(job_id+1),
452                               &(threadInfoGlobal->m_arrPerThreadData[thread_id])
453                               );
454 
455     if(result != 0)
456     {
457         log_error("Thread %x (job %x) failed test_integer_ops with result %x\n",
458                   thread_id, job_id, result);
459         // return error;
460     }
461 
462 
463     error = clReleaseCommandQueue(queue);
464     if(error != CL_SUCCESS)
465     {
466         log_error("Thread %x (job %x) could not release command queue\n",
467                   thread_id, job_id);
468         return error;
469     }
470     return result;
471 }
472 
473 int
test_integer_ops_threaded(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID)474 test_integer_ops_threaded(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, int vectorSize, TestStyle style, int num_runs_shift, ExplicitType type, int testID)
475 {
476     globalThreadData * pThreadInfo = NULL;
477     cl_int result=0;
478     cl_uint threadcount = GetThreadCount();
479 
480   // Check to see if we are using single threaded mode on other than a 1.0 device
481   if (getenv( "CL_TEST_SINGLE_THREADED" )) {
482 
483     char device_version[1024] = { 0 };
484     result = clGetDeviceInfo( deviceID, CL_DEVICE_VERSION, sizeof(device_version), device_version, NULL );
485     if(result != CL_SUCCESS)
486     {
487       log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
488       return result;
489     }
490 
491     if (strcmp("OpenCL 1.0 ",device_version)) {
492       log_error("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. Running single threaded.\n");
493     }
494   }
495 
496     // This test will run threadcount threads concurrently; each thread will execute test_integer_ops()
497     // which will allocate 2 OpenCL buffers on the device; each buffer has size num_elements * type_size * vectorSize.
498     // We need to make sure that the total device memory allocated by all threads does not exceed the maximum
499     // memory on the device. If it does, we decrease num_elements until all threads combined will not
500     // over-subscribe device memory.
501     cl_ulong maxDeviceGlobalMem;
502     result = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(maxDeviceGlobalMem), &maxDeviceGlobalMem, NULL);
503     if(result != CL_SUCCESS)
504     {
505         log_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed: %d\n", result);
506         return result;
507     }
508 
509   if (maxDeviceGlobalMem > (cl_ulong)SIZE_MAX) {
510     maxDeviceGlobalMem = (cl_ulong)SIZE_MAX;
511   }
512 
513     // Let's not take all device memory - reduce by 75%
514     maxDeviceGlobalMem = (maxDeviceGlobalMem * 3) >> 2;
515     // Now reduce num_elements so that the total device memory usage does not exceed 75% of global device memory.
516     size_t type_size = get_explicit_type_size(type);
517     while ((cl_ulong)threadcount * 4 * num_elements * type_size * vectorSize > maxDeviceGlobalMem)
518     {
519         num_elements >>= 1;
520     }
521 
522     uint64_t startIndx = (uint64_t)0;
523     uint64_t endIndx = (1ULL<<num_runs_shift);
524     uint64_t jobcount = (endIndx-startIndx)/num_elements;
525 
526     if(jobcount==0)
527     {
528         jobcount = 1;
529     }
530 
531     pThreadInfo = globalThreadDataNew(deviceID, context, queue, num_elements,
532                                       vectorSize, style, num_runs_shift,
533                                       type, testID, threadcount);
534 
535 
536     pThreadInfo->m_offset = startIndx;
537 
538 #if THREAD_DEBUG
539     log_error("Launching %llx jobs\n",
540               jobcount);
541 #endif
542 
543     result = ThreadPool_Do(test_integer_ops_do_thread, (cl_uint)jobcount, (void *)pThreadInfo);
544 
545     if(result != 0)
546     {
547         // cleanup ??
548         log_error("ThreadPool_Do return non-success value %d\n", result);
549 
550     }
551     globalThreadDataDestroy(pThreadInfo);
552     return result;
553 }
554 
555 
556 
557 int
test_integer_ops(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,int num_runs_shift,ExplicitType type,int testID,MTdata randDataIn,uint64_t startIndx,uint64_t endIndx,perThreadData ** ppThreadData)558 test_integer_ops(cl_device_id deviceID, cl_context context,
559                  cl_command_queue queue, int num_elements,
560                  int vectorSize, TestStyle style, int num_runs_shift,
561                  ExplicitType type, int testID, MTdata randDataIn,
562                  uint64_t startIndx, uint64_t endIndx,
563                  perThreadData ** ppThreadData)
564 {
565     size_t    threads[1];
566     int                err;
567     int                i;
568     int inputAVecSize, inputBVecSize;
569 
570 
571 
572     inputAVecSize = inputBVecSize = vectorSize;
573     if( style == kInputAScalar )
574         inputAVecSize = 1;
575     else if( style == kInputBScalar )
576         inputBVecSize = 1;
577 
578     /*
579      if( inputAVecSize != inputBVecSize )
580      log_info("Testing \"%s\" on %s%d (%s-%s inputs) (range %llx - %llx of 0-%llx)\n",
581      test_names[testID],
582      get_explicit_type_name(type), vectorSize,
583      ( inputAVecSize == 1 ) ? "scalar" : "vector",
584      ( inputBVecSize == 1 ) ? "scalar" : "vector",
585      startIndx, endIndx, (1ULL<<num_runs_shift) );
586      else
587      log_info("Testing \"%s\" on %s%d (range %llx - %llx of 0-%llx)\n",
588      test_names[testID],
589      get_explicit_type_name(type), vectorSize,
590      startIndx, endIndx, (1ULL<<num_runs_shift));
591      */
592 
593 
594     // Figure out which sub-test to run, or all of them
595     int start_test_ID = 0;
596     int end_test_ID = NUM_TESTS;
597     if (testID != -1) {
598         start_test_ID = testID;
599         end_test_ID = testID+1;
600     }
601     if (testID > NUM_TESTS) {
602         log_error("Invalid test ID: %d\n", testID);
603         return -1;
604     }
605 
606     if(*ppThreadData == NULL)
607     {
608         *ppThreadData = perThreadDataNew();
609         err = perThreadDataInit(*ppThreadData,
610                                 type, num_elements, vectorSize,
611                                 inputAVecSize, inputBVecSize,
612                                 context, start_test_ID,
613                                 end_test_ID, testID);
614         test_error(err, "failed to init per thread data\n");
615     }
616 
617     perThreadData * pThreadData = *ppThreadData;
618 
619 
620 
621     threads[0] = (size_t)num_elements;
622     int error_count = 0;
623     for (i=start_test_ID; i<end_test_ID; i++)
624     {
625         uint64_t    indx;
626 
627 
628         if(startIndx >= endIndx)
629         {
630             startIndx = (uint64_t)0;
631             endIndx = (1ULL<<num_runs_shift);
632         }
633         for (indx=startIndx; indx < endIndx; indx+=num_elements)
634         {
635 
636             switch (type) {
637                 case     kChar:
638                     init_char_data(indx, num_elements * vectorSize, (cl_char**)(pThreadData->m_input_ptr), randDataIn);
639                     break;
640                 case     kUChar:
641                     init_uchar_data(indx, num_elements * vectorSize, (cl_uchar**)(pThreadData->m_input_ptr), randDataIn);
642                     break;
643                 case     kShort:
644                     init_short_data(indx, num_elements * vectorSize, (cl_short**)(pThreadData->m_input_ptr), randDataIn);
645                     break;
646                 case     kUShort:
647                     init_ushort_data(indx, num_elements * vectorSize, (cl_ushort**)(pThreadData->m_input_ptr), randDataIn);
648                     break;
649                 case     kInt:
650                     init_int_data(indx, num_elements * vectorSize, (cl_int**)(pThreadData->m_input_ptr), randDataIn);
651                     break;
652                 case     kUInt:
653                     init_uint_data(indx, num_elements * vectorSize, (cl_uint**)(pThreadData->m_input_ptr), randDataIn);
654                     break;
655                 case     kLong:
656                     init_long_data(indx, num_elements * vectorSize, (cl_long**)(pThreadData->m_input_ptr), randDataIn);
657                     break;
658                 case     kULong:
659                     init_ulong_data(indx, num_elements * vectorSize, (cl_ulong**)(pThreadData->m_input_ptr), randDataIn);
660                     break;
661                 default:
662                     err = 1;
663                     log_error("Invalid type.\n");
664                     break;
665             }
666 
667 
668             err = clEnqueueWriteBuffer(queue, pThreadData->m_streams[0], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputAVecSize, (void *)pThreadData->m_input_ptr[0], 0, NULL, NULL);
669             test_error(err, "clEnqueueWriteBuffer failed");
670             err = clEnqueueWriteBuffer( queue, pThreadData->m_streams[1], CL_FALSE, 0, pThreadData->m_type_size*num_elements * inputBVecSize, (void *)pThreadData->m_input_ptr[1], 0, NULL, NULL );
671             test_error(err, "clEnqueueWriteBuffer failed");
672 
673             err = clEnqueueNDRangeKernel( queue, pThreadData->m_kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL );
674             test_error(err, "clEnqueueNDRangeKernel failed");
675 
676             err = clEnqueueReadBuffer( queue, pThreadData->m_streams[2], CL_TRUE, 0, pThreadData->m_type_size*num_elements * vectorSize, (void *)pThreadData->m_output_ptr, 0, NULL, NULL );
677             test_error(err, "clEnqueueReadBuffer failed");
678 
679             // log_info("Performing verification\n");
680 
681             // If one of the inputs are scalar, we need to extend the input values to vectors
682             // to accommodate the verify functions
683             if( vectorSize > 1 )
684             {
685                 char * p = NULL;
686                 if( style == kInputAScalar )
687                     p = (char *)pThreadData->m_input_ptr[ 0 ];
688                 else if( style == kInputBScalar )
689                     p = (char *)pThreadData->m_input_ptr[ 1 ];
690                 if( p != NULL )
691                 {
692                     for( int element = num_elements - 1; element >= 0; element-- )
693                     {
694                         for( int vec = ( element == 0 ) ? 1 : 0; vec < vectorSize; vec++ )
695                             memcpy( p + ( element * vectorSize + vec ) * pThreadData->m_type_size, p + element * pThreadData->m_type_size, pThreadData->m_type_size );
696                     }
697                 }
698             }
699 
700             switch (type) {
701                 case     kChar:
702                     err = verify_char(i, vectorSize, (cl_char*)pThreadData->m_input_ptr[0], (cl_char*)pThreadData->m_input_ptr[1], (cl_char*)pThreadData->m_output_ptr, num_elements * vectorSize);
703                     break;
704                 case     kUChar:
705                     err = verify_uchar(i, vectorSize, (cl_uchar*)pThreadData->m_input_ptr[0], (cl_uchar*)pThreadData->m_input_ptr[1], (cl_uchar*)pThreadData->m_output_ptr, num_elements * vectorSize);
706                     break;
707                 case     kShort:
708                     err = verify_short(i, vectorSize, (cl_short*)pThreadData->m_input_ptr[0], (cl_short*)pThreadData->m_input_ptr[1], (cl_short*)pThreadData->m_output_ptr, num_elements * vectorSize);
709                     break;
710                 case     kUShort:
711                     err = verify_ushort(i, vectorSize, (cl_ushort*)pThreadData->m_input_ptr[0], (cl_ushort*)pThreadData->m_input_ptr[1], (cl_ushort*)pThreadData->m_output_ptr, num_elements * vectorSize);
712                     break;
713                 case     kInt:
714                     err = verify_int(i, vectorSize, (cl_int*)pThreadData->m_input_ptr[0], (cl_int*)pThreadData->m_input_ptr[1], (cl_int*)pThreadData->m_output_ptr, num_elements * vectorSize);
715                     break;
716                 case     kUInt:
717                     err = verify_uint(i, vectorSize, (cl_uint*)pThreadData->m_input_ptr[0], (cl_uint*)pThreadData->m_input_ptr[1], (cl_uint*)pThreadData->m_output_ptr, num_elements * vectorSize);
718                     break;
719                 case     kLong:
720                     err = verify_long(i, vectorSize, (cl_long*)pThreadData->m_input_ptr[0], (cl_long*)pThreadData->m_input_ptr[1], (cl_long*)pThreadData->m_output_ptr, num_elements * vectorSize);
721                     break;
722                 case     kULong:
723                     err = verify_ulong(i, vectorSize, (cl_ulong*)pThreadData->m_input_ptr[0], (cl_ulong*)pThreadData->m_input_ptr[1], (cl_ulong*)pThreadData->m_output_ptr, num_elements * vectorSize);
724                     break;
725                 default:
726                     err = 1;
727                     log_error("Invalid type.\n");
728                     break;
729             }
730 
731             if (err) {
732 #if 0
733                 log_error( "* inASize: %d inBSize: %d numElem: %d\n", inputAVecSize, inputBVecSize, num_elements );
734                 cl_char *inP = (cl_char *)pThreadData->m_input_ptr[0];
735                 log_error( "from 18:\n" );
736                 for( int q = 18; q < 64; q++ )
737                 {
738                     log_error( "%02x ", inP[ q ] );
739                 }
740                 log_error( "\n" );
741                 inP = (cl_char *)pThreadData->m_input_ptr[1];
742                 for( int q = 18; q < 64; q++ )
743                 {
744                     log_error( "%02x ", inP[ q ] );
745                 }
746                 log_error( "\n" );
747                 inP = (cl_char *)pThreadData->m_output_ptr;
748                 for( int q = 18; q < 64; q++ )
749                 {
750                     log_error( "%02x ", inP[ q ] );
751                 }
752                 log_error( "\n" );
753                 log_error( "from 36:\n" );
754                 inP = (cl_char *)pThreadData->m_input_ptr[0];
755                 for( int q = 36; q < 64; q++ )
756                 {
757                     log_error( "%02x ", inP[ q ] );
758                 }
759                 log_error( "\n" );
760                 inP = (cl_char *)pThreadData->m_input_ptr[1];
761                 for( int q = 36; q < 64; q++ )
762                 {
763                     log_error( "%02x ", inP[ q ] );
764                 }
765                 log_error( "\n" );
766                 inP = (cl_char *)pThreadData->m_output_ptr;
767                 for( int q = 36; q < 64; q++ )
768                 {
769                     log_error( "%02x ", inP[ q ] );
770                 }
771                 log_error( "\n" );
772 #endif
773                 error_count++;
774                 break;
775             }
776         }
777 
778         /*
779 
780          const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
781 
782          if (err) {
783          log_error("\t\t%s%s test %s failed (range %llx - %llx of 0-%llx)\n",
784          get_explicit_type_name(type), sizeNames[vectorSize],
785          test_names[i],
786          startIndx, endIndx,
787          (1ULL<<num_runs_shift));
788          } else {
789          log_info("\t\t%s%s test %s passed (range %llx - %llx of 0-%llx)\n",
790          get_explicit_type_name(type), sizeNames[vectorSize],
791          test_names[i],
792          startIndx, endIndx,
793          (1ULL<<num_runs_shift));
794          }
795          */
796     }
797 
798 
799 
800     return error_count;
801 }
802 
803 
804 
805 
806 
807 
808 
809 
810 
811 // Run all the vector sizes for a given test
run_specific_test(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int testID)812 int run_specific_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID) {
813     int errors = 0;
814     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/1, 1, kBothVectors, num, type, testID);
815     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/2, 2, kBothVectors, num, type, testID);
816     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/3, 3, kBothVectors, num, type, testID);
817     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/4, 4, kBothVectors, num, type, testID);
818     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/8, 8, kBothVectors, num, type, testID);
819     errors += test_integer_ops_threaded(deviceID, context, queue, (1024*1024*2)/16, 16, kBothVectors, num, type, testID);
820     return errors;
821 }
822 
823 // Run multiple tests for a given type
run_multiple_tests(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int * tests,int total_tests)824 int run_multiple_tests(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int *tests, int total_tests) {
825     int errors = 0;
826 
827     if (getenv("CL_WIMPY_MODE") && num == LONG_MATH_SHIFT_SIZE) {
828       log_info("Detected CL_WIMPY_MODE env\n");
829       log_info("Skipping long test\n");
830       return 0;
831     }
832 
833     int i;
834     for (i=0; i<total_tests; i++)
835     {
836         int localErrors;
837         log_info("Testing \"%s\" ", test_names[tests[i]]);  fflush( stdout );
838         localErrors = run_specific_test(deviceID, context, queue, num_elements, type, num, tests[i]);
839         if( localErrors )
840             log_info( "FAILED\n" );
841         else
842             log_info( "passed\n" );
843 
844         errors += localErrors;
845     }
846 
847     return errors;
848 }
849 
850 // Run all the math tests for a given type
run_test_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)851 int run_test_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
852     int tests[] = {0, 1, 2, 3, 4};
853     return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
854 }
855 
856 // Run all the logic tests for a given type
run_test_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)857 int run_test_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
858     int tests[] = {5, 6, 7, 12, 14, 15, 22};
859     return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
860 }
861 
862 // Run all the shifting tests for a given type
run_test_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)863 int run_test_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
864     int tests[] = {8, 9, 10, 11};
865     return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
866 }
867 
868 // Run all the comparison tests for a given type
run_test_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)869 int run_test_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
870     int tests[] = {13, 16, 17, 18, 19, 20, 21};
871     return run_multiple_tests(deviceID, context, queue, num_elements, type, num, tests, (int)(sizeof(tests)/sizeof(int)));
872 }
873 
874 // Run all tests for a given type
run_test(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)875 int run_test(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num) {
876     int errors = 0;
877     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 1, kBothVectors, num, type, -1);
878     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 2, kBothVectors, num, type, -1);
879     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 3, kBothVectors, num, type, -1);
880     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 4, kBothVectors, num, type, -1);
881     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 8, kBothVectors, num, type, -1);
882     errors += test_integer_ops_threaded(deviceID, context, queue, 1024*1024*2, 16, kBothVectors, num, type, -1);
883     return errors;
884 }
885 
886 
887 // -----------------
888 // Long tests
889 // -----------------
test_long_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)890 int test_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
891     if (!gHasLong)
892     {
893         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
894         return CL_SUCCESS;
895     }
896     return run_test_math(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
897 }
test_long_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)898 int test_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
899     if (!gHasLong)
900     {
901         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
902         return CL_SUCCESS;
903     }
904     return run_test_logic(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
905 }
test_long_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)906 int test_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
907     if (!gHasLong)
908     {
909         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
910         return CL_SUCCESS;
911     }
912     return run_test_shift(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
913 }
test_long_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)914 int test_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
915     if (!gHasLong)
916     {
917         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
918         return CL_SUCCESS;
919     }
920     return run_test_compare(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
921 }
test_quick_long_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)922 int test_quick_long_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
923     if (!gHasLong)
924     {
925         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
926         return CL_SUCCESS;
927     }
928     return run_test_math(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
929 }
test_quick_long_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)930 int test_quick_long_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
931     if (!gHasLong)
932     {
933         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
934         return CL_SUCCESS;
935     }
936     return run_test_logic(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
937 }
test_quick_long_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)938 int test_quick_long_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
939     if (!gHasLong)
940     {
941         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
942         return CL_SUCCESS;
943     }
944     return run_test_shift(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
945 }
test_quick_long_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)946 int test_quick_long_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
947     if (!gHasLong)
948     {
949         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
950         return CL_SUCCESS;
951     }
952     return run_test_compare(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
953 }
954 
955 
956 // -----------------
957 // ULong tests
958 // -----------------
test_ulong_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)959 int test_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
960     if (!gHasLong)
961     {
962         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
963         return CL_SUCCESS;
964     }
965     return run_test_math(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
966 }
test_ulong_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)967 int test_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
968     if (!gHasLong)
969     {
970         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
971         return CL_SUCCESS;
972     }
973     return run_test_logic(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
974 }
test_ulong_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)975 int test_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
976     if (!gHasLong)
977     {
978         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
979         return CL_SUCCESS;
980     }
981     return run_test_shift(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
982 }
test_ulong_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)983 int test_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
984     if (!gHasLong)
985     {
986         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
987         return CL_SUCCESS;
988     }
989     return run_test_compare(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
990 }
test_quick_ulong_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)991 int test_quick_ulong_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
992     if (!gHasLong)
993     {
994         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
995         return CL_SUCCESS;
996     }
997     return run_test_math(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
998 }
test_quick_ulong_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)999 int test_quick_ulong_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1000     if (!gHasLong)
1001     {
1002         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1003         return CL_SUCCESS;
1004     }
1005     return run_test_logic(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1006 }
test_quick_ulong_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1007 int test_quick_ulong_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1008     if (!gHasLong)
1009     {
1010         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1011         return CL_SUCCESS;
1012     }
1013     return run_test_shift(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1014 }
test_quick_ulong_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1015 int test_quick_ulong_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1016     if (!gHasLong)
1017     {
1018         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1019         return CL_SUCCESS;
1020     }
1021     return run_test_compare(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1022 }
1023 
1024 
1025 // -----------------
1026 // Int tests
1027 // -----------------
test_int_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1028 int test_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1029     return run_test_math(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1030 }
test_int_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1031 int test_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1032     return run_test_logic(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1033 }
test_int_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1034 int test_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1035     return run_test_shift(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1036 }
test_int_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1037 int test_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1038     return run_test_compare(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1039 }
test_quick_int_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1040 int test_quick_int_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1041     return run_test_math(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1042 }
test_quick_int_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1043 int test_quick_int_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1044     return run_test_logic(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1045 }
test_quick_int_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1046 int test_quick_int_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1047     return run_test_shift(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1048 }
test_quick_int_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1049 int test_quick_int_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1050     return run_test_compare(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1051 }
1052 
1053 
1054 // -----------------
1055 // UInt tests
1056 // -----------------
test_uint_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1057 int test_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1058     return run_test_math(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1059 }
test_uint_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1060 int test_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1061     return run_test_logic(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1062 }
test_uint_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1063 int test_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1064     return run_test_shift(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1065 }
test_uint_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1066 int test_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1067     return run_test_compare(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1068 }
test_quick_uint_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1069 int test_quick_uint_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1070     return run_test_math(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1071 }
test_quick_uint_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1072 int test_quick_uint_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1073     return run_test_logic(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1074 }
test_quick_uint_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1075 int test_quick_uint_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1076     return run_test_shift(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1077 }
test_quick_uint_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1078 int test_quick_uint_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1079     return run_test_compare(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1080 }
1081 
1082 
1083 // -----------------
1084 // Short tests
1085 // -----------------
test_short_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1086 int test_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1087     return run_test_math(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1088 }
test_short_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1089 int test_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1090     return run_test_logic(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1091 }
test_short_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1092 int test_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1093     return run_test_shift(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1094 }
test_short_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1095 int test_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1096     return run_test_compare(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1097 }
test_quick_short_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1098 int test_quick_short_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1099     return run_test_math(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1100 }
test_quick_short_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1101 int test_quick_short_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1102     return run_test_logic(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1103 }
test_quick_short_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1104 int test_quick_short_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1105     return run_test_shift(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1106 }
test_quick_short_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1107 int test_quick_short_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1108     return run_test_compare(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1109 }
1110 
1111 
1112 // -----------------
1113 // UShort tests
1114 // -----------------
test_ushort_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1115 int test_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1116     return run_test_math(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1117 }
test_ushort_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1118 int test_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1119     return run_test_logic(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1120 }
test_ushort_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1121 int test_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1122     return run_test_shift(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1123 }
test_ushort_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1124 int test_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1125     return run_test_compare(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1126 }
test_quick_ushort_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1127 int test_quick_ushort_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1128     return run_test_math(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1129 }
test_quick_ushort_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1130 int test_quick_ushort_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1131     return run_test_logic(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1132 }
test_quick_ushort_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1133 int test_quick_ushort_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1134     return run_test_shift(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1135 }
test_quick_ushort_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1136 int test_quick_ushort_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1137     return run_test_compare(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1138 }
1139 
1140 
1141 // -----------------
1142 // Char tests
1143 // -----------------
test_char_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1144 int test_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1145     return run_test_math(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1146 }
test_char_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1147 int test_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1148     return run_test_logic(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1149 }
test_char_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1150 int test_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1151     return run_test_shift(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1152 }
test_char_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1153 int test_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1154     return run_test_compare(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1155 }
test_quick_char_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1156 int test_quick_char_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1157     return run_test_math(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1158 }
test_quick_char_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1159 int test_quick_char_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1160     return run_test_logic(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1161 }
test_quick_char_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1162 int test_quick_char_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1163     return run_test_shift(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1164 }
test_quick_char_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1165 int test_quick_char_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1166     return run_test_compare(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1167 }
1168 
1169 
1170 // -----------------
1171 // UChar tests
1172 // -----------------
test_uchar_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1173 int test_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1174     return run_test_math(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1175 }
test_uchar_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1176 int test_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1177     return run_test_logic(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1178 }
test_uchar_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1179 int test_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1180     return run_test_shift(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1181 }
test_uchar_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1182 int test_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1183     return run_test_compare(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1184 }
test_quick_uchar_math(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1185 int test_quick_uchar_math(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1186     return run_test_math(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1187 }
test_quick_uchar_logic(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1188 int test_quick_uchar_logic(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1189     return run_test_logic(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1190 }
test_quick_uchar_shift(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1191 int test_quick_uchar_shift(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1192     return run_test_shift(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1193 }
test_quick_uchar_compare(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1194 int test_quick_uchar_compare(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1195     return run_test_compare(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1196 }
1197 
1198 
1199 
1200 // These are kept for debugging if you want to run all the tests together.
1201 
test_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1202 int test_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1203     if (!gHasLong)
1204     {
1205         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1206         return CL_SUCCESS;
1207     }
1208     return run_test(deviceID, context, queue, num_elements, kLong, LONG_MATH_SHIFT_SIZE);
1209 }
1210 
test_quick_long(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1211 int test_quick_long(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1212     if (!gHasLong)
1213     {
1214         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1215         return CL_SUCCESS;
1216     }
1217     return run_test(deviceID, context, queue, num_elements, kLong, QUICK_MATH_SHIFT_SIZE);
1218 }
1219 
test_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1220 int test_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1221     if (!gHasLong)
1222     {
1223         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1224         return CL_SUCCESS;
1225     }
1226     return run_test(deviceID, context, queue, num_elements, kULong, LONG_MATH_SHIFT_SIZE);
1227 }
1228 
test_quick_ulong(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1229 int test_quick_ulong(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1230     if (!gHasLong)
1231     {
1232         log_info( "WARNING: 64 bit integers are not supported on this device. Skipping\n" );
1233         return CL_SUCCESS;
1234     }
1235     return run_test(deviceID, context, queue, num_elements, kULong, QUICK_MATH_SHIFT_SIZE);
1236 }
1237 
test_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1238 int test_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1239     return run_test(deviceID, context, queue, num_elements, kInt, LONG_MATH_SHIFT_SIZE);
1240 }
1241 
test_quick_int(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1242 int test_quick_int(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1243     return run_test(deviceID, context, queue, num_elements, kInt, QUICK_MATH_SHIFT_SIZE);
1244 }
1245 
test_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1246 int test_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1247     return run_test(deviceID, context, queue, num_elements, kUInt, LONG_MATH_SHIFT_SIZE);
1248 }
1249 
test_quick_uint(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1250 int test_quick_uint(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1251     return run_test(deviceID, context, queue, num_elements, kUInt, QUICK_MATH_SHIFT_SIZE);
1252 }
1253 
test_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1254 int test_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1255     return run_test(deviceID, context, queue, num_elements, kShort, LONG_MATH_SHIFT_SIZE);
1256 }
1257 
test_quick_short(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1258 int test_quick_short(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1259     return run_test(deviceID, context, queue, num_elements, kShort, QUICK_MATH_SHIFT_SIZE);
1260 }
1261 
test_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1262 int test_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1263     return run_test(deviceID, context, queue, num_elements, kUShort, LONG_MATH_SHIFT_SIZE);
1264 }
1265 
test_quick_ushort(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1266 int test_quick_ushort(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1267     return run_test(deviceID, context, queue, num_elements, kUShort, QUICK_MATH_SHIFT_SIZE);
1268 }
1269 
test_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1270 int test_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1271     return run_test(deviceID, context, queue, num_elements, kChar, LONG_MATH_SHIFT_SIZE);
1272 }
1273 
test_quick_char(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1274 int test_quick_char(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1275     return run_test(deviceID, context, queue, num_elements, kChar, QUICK_MATH_SHIFT_SIZE);
1276 }
1277 
test_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1278 int test_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1279     return run_test(deviceID, context, queue, num_elements, kUChar, LONG_MATH_SHIFT_SIZE);
1280 }
1281 
test_quick_uchar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1282 int test_quick_uchar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements) {
1283     return run_test(deviceID, context, queue, num_elements, kUChar, QUICK_MATH_SHIFT_SIZE);
1284 }
1285 
1286 // Prototype for below
1287 int test_question_colon_op(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements,
1288                            int vectorSize, TestStyle style, ExplicitType type );
1289 
1290 // Run all the vector sizes for a given test in scalar-vector and vector-scalar modes
run_test_sizes(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num,int testID)1291 int run_test_sizes(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num, int testID)
1292 {
1293     int sizes[] = { 2, 3, 4, 8, 16, 0 };
1294     int errors = 0;
1295 
1296     for( int i = 0; sizes[ i ] != 0; i++ )
1297     {
1298         if( testID == 13 )
1299         {
1300             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, type );
1301             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, type );
1302             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], kVectorScalarScalar, type );
1303 
1304             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kBothVectors | kInputCAlsoScalar), type );
1305             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputAScalar | kInputCAlsoScalar), type );
1306             errors += test_question_colon_op( deviceID, context, queue, num_elements / sizes[i], sizes[i], (TestStyle)(kInputBScalar | kInputCAlsoScalar), type );
1307         }
1308         else
1309         {
1310             errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputAScalar, num, type, testID);
1311             errors += test_integer_ops_threaded(deviceID, context, queue, num_elements / sizes[i], sizes[i], kInputBScalar, num, type, testID);
1312         }
1313     }
1314     return errors;
1315 }
1316 
1317 // Run all the tests for scalar-vector and vector-scalar for a given type
run_vector_scalar_tests(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,ExplicitType type,int num)1318 int run_vector_scalar_tests( cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, ExplicitType type, int num )
1319 {
1320     int errors = 0;
1321     size_t i;
1322 
1323     // Shift operators:
1324     // a) cannot take scalars as first parameter and vectors as second
1325     // b) have the vector >> scalar case tested by tests 10 and 11
1326     // so they get skipped entirely
1327 
1328     int testsToRun[] = { 0, 1, 2, 3, 4, 5, 6, 7,
1329         13, 14, 15, 16, 17, 18, 19, 20, 21 };
1330     for (i=0; i< sizeof(testsToRun)/sizeof(testsToRun[0]); i++)
1331     {
1332         errors += run_test_sizes(deviceID, context, queue, 2048, type, num, testsToRun[i]);
1333     }
1334     return errors;
1335 }
1336 
test_vector_scalar(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)1337 int test_vector_scalar(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
1338 {
1339     int errors = 0;
1340     int numTypes = sizeof( types ) / sizeof( types[ 0 ] );
1341 
1342     for( int t = 0; t < numTypes; t++ )
1343     {
1344         if ((types[ t ] == kLong || types[ t ] == kULong) && !gHasLong)
1345             continue;
1346 
1347         errors += run_vector_scalar_tests( deviceID, context, queue, num_elements, types[ t ], 1 );
1348         break;
1349     }
1350 
1351     return errors;
1352 }
1353 
generate_random_bool_data(size_t count,MTdata d,cl_char * outData,size_t outDataSize)1354 void generate_random_bool_data( size_t count, MTdata d, cl_char *outData, size_t outDataSize )
1355 {
1356     cl_uint bits = genrand_int32(d);
1357     cl_uint bitsLeft = 32;
1358 
1359     memset( outData, 0, outDataSize * count );
1360 
1361     for( size_t i = 0; i < count; i++ )
1362     {
1363         if( 0 == bitsLeft)
1364         {
1365             bits = genrand_int32(d);
1366             bitsLeft = 32;
1367         }
1368 
1369         // Note: we will be setting just any bit non-zero for the type, so we can easily skip past
1370         // and just write bytes (assuming the entire output buffer is already zeroed, which we did)
1371         *outData = ( bits & 1 ) ? 0xff : 0;
1372 
1373         bits >>= 1; bitsLeft -= 1;
1374 
1375         outData += outDataSize;
1376     }
1377 }
1378 
1379 static const char *kernel_question_colon_full =
1380 "__kernel void test(__global %s%s *srcA, __global %s%s *srcB, __global %s%s *srcC, __global %s%s *dst)\n"
1381 "{\n"
1382 "    int  tid = get_global_id(0);\n"
1383 "\n"
1384 "    %s%s valA = %ssrcA%s"
1385 "    %s%s valB = %ssrcB%s"
1386 "    %s%s valC = %ssrcC%s"
1387 "    %s%s destVal = valC ? valA : valB;\n"
1388 "    %s"
1389 "}\n";
1390 
1391 static const char *kernel_qc_load_plain_prefix = "";
1392 static const char *kernel_qc_load_plain_suffix = "[ tid ];\n";
1393 
1394 static const char *kernel_qc_load_vec3_prefix = "vload3( tid, ";
1395 static const char *kernel_qc_load_vec3_suffix = ");\n";
1396 
1397 static const char *kernel_qc_store_plain = "dst[ tid ] = destVal;\n";
1398 static const char *kernel_qc_store_vec3 = "vstore3( destVal, tid, dst );\n";
1399 
test_question_colon_op(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,int vectorSize,TestStyle style,ExplicitType type)1400 int test_question_colon_op(cl_device_id deviceID, cl_context context,
1401                            cl_command_queue queue, int num_elements,
1402                            int vectorSize, TestStyle style, ExplicitType type )
1403 {
1404     cl_mem              streams[4];
1405     cl_int              *input_ptr[3], *output_ptr;
1406     cl_program          program;
1407     cl_kernel           kernel;
1408     size_t              threads[1];
1409     int                 err;
1410     int inputAVecSize, inputBVecSize, inputCVecSize;
1411     const char * sizeNames[] = { "", "", "2", "3", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
1412     // Identical to sizeNames but with a blank for 3, since we use vload/store there
1413     const char * paramSizeNames[] = { "", "", "2", "", "4", "", "", "", "8", "", "", "", "", "", "", "", "16" };
1414     MTdata s_randStates;
1415 
1416     inputAVecSize = inputBVecSize = inputCVecSize = vectorSize;
1417     if( style & kInputCAlsoScalar )
1418     {
1419         style = (TestStyle)( style & ~kInputCAlsoScalar );
1420         inputCVecSize = 1;
1421     }
1422     if( style == kInputAScalar )
1423         inputAVecSize = 1;
1424     else if( style == kInputBScalar )
1425         inputBVecSize = 1;
1426     else if( style == kVectorScalarScalar )
1427         inputAVecSize = inputBVecSize = 1;
1428 
1429     log_info("Testing \"?:\" on %s%d (%s?%s:%s inputs)\n",
1430              get_explicit_type_name(type), vectorSize, ( inputCVecSize == 1 ) ? "scalar" : "vector",
1431              ( inputAVecSize == 1 ) ? "scalar" : "vector",
1432              ( inputBVecSize == 1 ) ? "scalar" : "vector" );
1433 
1434 
1435     const char *type_name = get_explicit_type_name(type);
1436     size_t type_size = get_explicit_type_size(type);
1437 
1438     // Create and initialize I/O buffers
1439 
1440     input_ptr[0] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1441     input_ptr[1] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1442     input_ptr[2] = (cl_int*)malloc(type_size * num_elements * vectorSize);
1443     output_ptr = (cl_int*)malloc(type_size * num_elements * vectorSize);
1444 
1445     s_randStates = init_genrand( gRandomSeed );
1446 
1447     generate_random_data( type, num_elements * inputAVecSize, s_randStates, input_ptr[ 0 ] );
1448     generate_random_data( type, num_elements * inputBVecSize, s_randStates, input_ptr[ 1 ] );
1449     generate_random_bool_data( num_elements * inputCVecSize, s_randStates, (cl_char *)input_ptr[ 2 ], type_size );
1450 
1451     streams[0] = clCreateBuffer(
1452         context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1453         type_size * num_elements * inputAVecSize, input_ptr[0], &err);
1454     test_error(err, "clCreateBuffer failed");
1455     streams[1] = clCreateBuffer(
1456         context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1457         type_size * num_elements * inputBVecSize, input_ptr[1], &err);
1458     test_error(err, "clCreateBuffer failed");
1459     streams[2] = clCreateBuffer(
1460         context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
1461         type_size * num_elements * inputCVecSize, input_ptr[2], &err);
1462     test_error(err, "clCreateBuffer failed");
1463     streams[3] =
1464         clCreateBuffer(context, CL_MEM_WRITE_ONLY,
1465                        type_size * num_elements * vectorSize, NULL, &err);
1466     test_error(err, "clCreateBuffer failed");
1467 
1468     const char *vectorString = sizeNames[ vectorSize ];
1469     const char *inputAVectorString = sizeNames[ inputAVecSize ];
1470     const char *inputBVectorString = sizeNames[ inputBVecSize ];
1471     const char *inputCVectorString = sizeNames[ inputCVecSize ];
1472 
1473     char programString[4096];
1474     const char *ptr;
1475 
1476     sprintf( programString, kernel_question_colon_full, type_name, paramSizeNames[ inputAVecSize ],
1477             type_name, paramSizeNames[ inputBVecSize ],
1478             type_name, paramSizeNames[ inputCVecSize ],
1479          type_name, paramSizeNames[ vectorSize ],
1480             // Loads
1481             type_name, inputAVectorString, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputAVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1482             type_name, inputBVectorString, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputBVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1483             type_name, inputCVectorString, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_prefix : kernel_qc_load_plain_prefix, ( inputCVecSize == 3 ) ? kernel_qc_load_vec3_suffix : kernel_qc_load_plain_suffix,
1484             // Dest type
1485             type_name, vectorString,
1486             // Store
1487             ( vectorSize == 3 ) ? kernel_qc_store_vec3 : kernel_qc_store_plain );
1488 
1489     ptr = programString;
1490     err = create_single_kernel_helper( context, &program, &kernel, 1, &ptr, "test" );
1491     test_error( err, "Unable to create test kernel" );
1492 
1493     err = clSetKernelArg( kernel, 0, sizeof streams[0], &streams[0] );
1494     err |= clSetKernelArg( kernel, 1, sizeof streams[1], &streams[1] );
1495     err |= clSetKernelArg( kernel, 2, sizeof streams[2], &streams[2] );
1496     err |= clSetKernelArg( kernel, 3, sizeof streams[3], &streams[3] );
1497     test_error(err, "clSetKernelArgs failed");
1498 
1499     // Run
1500     threads[0] = (size_t)num_elements;
1501 
1502     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, NULL, 0, NULL, NULL );
1503     test_error(err, "clEnqueueNDRangeKernel failed");
1504 
1505     // Read and verify results
1506     err = clEnqueueReadBuffer( queue, streams[3], CL_TRUE, 0, type_size*num_elements * vectorSize, (void *)output_ptr, 0, NULL, NULL );
1507     test_error(err, "clEnqueueReadBuffer failed");
1508 
1509     // log_info("Performing verification\n");
1510     int error_count = 0;
1511 
1512     char *inputAPtr = (char *)input_ptr[ 0 ];
1513     char *inputBPtr = (char *)input_ptr[ 1 ];
1514     cl_char *inputCPtr = (cl_char *)input_ptr[ 2 ];
1515     char *actualPtr = (char *)output_ptr;
1516 
1517     for( int i = 0; i < num_elements; i++ )
1518     {
1519         for( int j = 0; j < vectorSize; j++ )
1520         {
1521             char *expectedPtr = ( *inputCPtr ) ? inputAPtr : inputBPtr;
1522             if( memcmp( expectedPtr, actualPtr, type_size ) != 0 )
1523             {
1524 #if 0
1525                 char expectedStr[ 128 ], actualStr[ 128 ], inputAStr[ 128 ], inputBStr[ 128 ];
1526                 print_type_to_string( type, inputAPtr, inputAStr );
1527                 print_type_to_string( type, inputBPtr, inputBStr );
1528                 print_type_to_string( type, expectedPtr, expectedStr );
1529                 print_type_to_string( type, actualPtr, actualStr );
1530                 log_error( "cl_%s verification failed at element %d:%d (expected %s, got %s, inputs: %s, %s, %s)\n",
1531                           type_name, i, j, expectedStr, actualStr, inputAStr, inputBStr, ( *inputCPtr ) ? "true" : "false" );
1532 #endif
1533                 error_count++;
1534             }
1535             // Advance for each element member. Note if any of the vec sizes are 1, they don't advance here
1536             inputAPtr += ( inputAVecSize == 1 ) ? 0 : type_size;
1537             inputBPtr += ( inputBVecSize == 1 ) ? 0 : type_size;
1538             inputCPtr += ( inputCVecSize == 1 ) ? 0 : type_size;
1539             actualPtr += ( vectorSize == 1 ) ? 0 : type_size;
1540         }
1541         // Reverse for the member advance. If the vec sizes are 1, we need to advance, but otherwise they're already correct
1542         inputAPtr += ( inputAVecSize == 1 ) ? type_size : 0;
1543         inputBPtr += ( inputBVecSize == 1 ) ? type_size : 0;
1544         inputCPtr += ( inputCVecSize == 1 ) ? type_size : 0;
1545         actualPtr += ( vectorSize == 1 ) ? type_size : 0;
1546     }
1547 
1548     // cleanup
1549     clReleaseMemObject(streams[0]);
1550     clReleaseMemObject(streams[1]);
1551     clReleaseMemObject(streams[2]);
1552     clReleaseMemObject(streams[3]);
1553     clReleaseKernel(kernel);
1554     clReleaseProgram(program);
1555     free(input_ptr[0]);
1556     free(input_ptr[1]);
1557     free(input_ptr[2]);
1558     free(output_ptr);
1559     free_mtdata( s_randStates );
1560 
1561     return error_count;
1562 }
1563