1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 
24 
25 #include "procs.h"
26 
hi_offset(int index,int vectorSize)27 int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
lo_offset(int index,int vectorSize)28 int lo_offset( int index, int vectorSize) { return index; }
even_offset(int index,int vectorSize)29 int even_offset( int index, int vectorSize ) { return index * 2; }
odd_offset(int index,int vectorSize)30 int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
31 
32 typedef int (*OffsetFunc)( int index, int vectorSize );
33 static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
34 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
35 static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
36 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
37 
38 static const unsigned int vector_sizes[] =     { 1, 2, 3, 4, 8, 16};
39 static const unsigned int vector_aligns[] =    { 1, 2, 4, 4, 8, 16};
40 static const unsigned int out_vector_idx[] =   { 0, 0, 1, 1, 3, 4};
41 // if input is size vector_sizes[i], output is size
42 // vector_sizes[out_vector_idx[i]]
43 // input type name is strcat(gentype, vector_size_names[i]);
44 // and output type name is
45 // strcat(gentype, vector_size_names[out_vector_idx[i]]);
46 static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
47     -1,-1,-1,-1,-1,-1,-1,5};
48 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
49 
50 static const size_t  kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
51 static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
52 
test_hiloeo(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)53 int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
54 {
55     cl_int *input_ptr, *output_ptr, *p;
56     int err;
57     cl_uint i;
58     int hasDouble = is_extension_available( device, "cl_khr_fp64" );
59     cl_uint vectorSize, operatorToUse;
60     cl_uint type;
61     MTdata d;
62 
63     int expressionMode;
64     int numExpressionModes = 2;
65 
66     size_t length = sizeof(cl_int) * 4 * n_elems;
67 
68     input_ptr   = (cl_int*)malloc(length);
69     output_ptr  = (cl_int*)malloc(length);
70 
71     p = input_ptr;
72     d = init_genrand( gRandomSeed );
73     for (i=0; i<4 * (cl_uint) n_elems; i++)
74         p[i] = genrand_int32(d);
75     free_mtdata(d); d = NULL;
76 
77     for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
78     {
79         // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
80         size_t elementCount = length / kSizes[type];
81         cl_mem streams[2];
82 
83         // skip double if unavailable
84         if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
85             continue;
86 
87         if( !gHasLong &&
88             (( 0 == strcmp( test_str_names[type], "long" )) ||
89             ( 0 == strcmp( test_str_names[type], "ulong" ))))
90             continue;
91 
92         log_info( "%s", test_str_names[type] );
93         fflush( stdout );
94 
95         // Set up data streams for the type
96         streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
97         if (!streams[0])
98         {
99             log_error("clCreateBuffer failed\n");
100             return -1;
101         }
102         streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
103         if (!streams[1])
104         {
105             log_error("clCreateBuffer failed\n");
106             return -1;
107         }
108 
109         err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
110         if (err != CL_SUCCESS)
111         {
112             log_error("clEnqueueWriteBuffer failed\n");
113             return -1;
114         }
115 
116         for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
117         {
118             log_info( " %s", operatorToUse_names[ operatorToUse ] );
119             fflush( stdout );
120             for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
121                 for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
122 
123                     cl_program program = NULL;
124                     cl_kernel kernel = NULL;
125                     cl_uint outVectorSize = out_vector_idx[vectorSize];
126                     char expression[1024];
127 
128                     const char *source[] = {
129                         "", // optional pragma string
130                         "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
131                         "(__global ", test_str_names[type], vector_size_names[vectorSize],
132                         " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
133                         " *dst)\n"
134                         "{\n"
135                         "    int  tid = get_global_id(0);\n"
136                         "\n"
137                         "    ", test_str_names[type],
138                         vector_size_names[out_vector_idx[vectorSize]],
139                         " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
140                         "    dst[tid] = tmp;\n"
141                         "}\n"
142                     };
143 
144                     if(expressionMode == 0) {
145                         sprintf(expression, "srcA[tid]");
146                     } else if(expressionMode == 1) {
147                         switch(vector_sizes[vectorSize]) {
148                             case 16:
149                                 sprintf(expression,
150                                         "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
151                                         test_str_names[type]
152                                         );
153                                 break;
154                             case 8:
155                                 sprintf(expression,
156                                         "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
157                                         test_str_names[type]
158                                         );
159                                 break;
160                             case 4:
161                                 sprintf(expression,
162                                         "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
163                                         test_str_names[type]
164                                         );
165                                 break;
166                             case 3:
167                                 sprintf(expression,
168                                         "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
169                                         test_str_names[type]
170                                         );
171                                 break;
172                             case 2:
173                                 sprintf(expression,
174                                         "((%s2)(srcA[tid].s0, srcA[tid].s1))",
175                                         test_str_names[type]
176                                         );
177                                 break;
178                             default :
179                                 sprintf(expression, "srcA[tid]");
180                                 log_info("Default\n");
181                         }
182                     } else {
183                         sprintf(expression, "srcA[tid]");
184                     }
185 
186                     if (0 == strcmp( test_str_names[type], "double" ))
187                         source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
188 
189                     char kernelName[128];
190                     snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
191                     err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
192                     if (err)
193                         return -1;
194 
195                     err  = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
196                     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
197                     if (err != CL_SUCCESS)
198                     {
199                         log_error("clSetKernelArgs failed\n");
200                         return -1;
201                     }
202 
203                     //Wipe the output buffer clean
204                     uint32_t pattern = 0xdeadbeef;
205                     memset_pattern4( output_ptr, &pattern, length );
206                     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
207                     if (err != CL_SUCCESS)
208                     {
209                         log_error("clEnqueueWriteBuffer failed\n");
210                         return -1;
211                     }
212 
213                     size_t size = elementCount / (vector_aligns[vectorSize]);
214                     err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
215                     if (err != CL_SUCCESS)
216                     {
217                         log_error("clEnqueueNDRangeKernel failed\n");
218                         return -1;
219                     }
220 
221                     err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
222                     if (err != CL_SUCCESS)
223                     {
224                         log_error("clEnqueueReadBuffer failed\n");
225                         return -1;
226                     }
227 
228                     char *inP = (char *)input_ptr;
229                     char *outP = (char *)output_ptr;
230                     outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
231                                             ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
232                     // was                outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
233                     for( size_t e = 0; e < size; e++ )
234                     {
235                         if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
236 
237                             log_info("e is %d\n", (int)e);
238                             fflush(stdout);
239                             // break;
240                             return -1;
241                         }
242                         inP += kSizes[type] * ( vector_aligns[vectorSize] );
243                         outP += kSizes[type] * ( vector_aligns[outVectorSize] );
244                     }
245 
246                     clReleaseKernel( kernel );
247                     clReleaseProgram( program );
248                     log_info( "." );
249                     fflush( stdout );
250                 }
251             }
252         }
253 
254         clReleaseMemObject( streams[0] );
255         clReleaseMemObject( streams[1] );
256         log_info( "done\n" );
257     }
258 
259     log_info("HiLoEO test passed\n");
260 
261     free(input_ptr);
262     free(output_ptr);
263 
264     return err;
265 }
266 
CheckResults(void * in,void * out,size_t elementCount,int type,int vectorSize,int operatorToUse)267 static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
268 {
269     cl_ulong  array[8];
270     void *p = array;
271     size_t halfVectorSize  = vector_sizes[out_vector_idx[vectorSize]];
272     size_t cmpVectorSize =  vector_sizes[out_vector_idx[vectorSize]];
273     // was 1 << (vectorSize-1);
274     OffsetFunc f = offsetFuncs[ operatorToUse ];
275     size_t elementSize =  kSizes[type];
276 
277     if(vector_size_names[vectorSize][0] == '3') {
278         if(operatorToUse_names[operatorToUse][0] == 'h' ||
279            operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
280         {
281             cmpVectorSize = 1; // special case for vec3 ignored values
282         }
283     }
284 
285     switch( elementSize )
286     {
287         case 1:
288         {
289             char *i = (char*)in;
290             char *o = (char*)out;
291             size_t j;
292             cl_uint k;
293             OffsetFunc f = offsetFuncs[ operatorToUse ];
294 
295             for( k = 0; k  < elementCount; k++ )
296             {
297                 char *o2 = (char*)p;
298                 for( j = 0; j < halfVectorSize; j++ )
299                     o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
300 
301                 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
302                 {
303                     log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
304                     for( j = 1; j < halfVectorSize * 2; j++ )
305                         log_info( ", %d", i[j] );
306                     log_info( " } --> { %d", o[0] );
307                     for( j = 1; j < halfVectorSize; j++ )
308                         log_info( ", %d", o[j] );
309                     log_info( " }\n" );
310                     return -1;
311                 }
312                 i += 2 * halfVectorSize;
313                 o += halfVectorSize;
314             }
315         }
316             break;
317 
318         case 2:
319         {
320             short *i = (short*)in;
321             short *o = (short*)out;
322             size_t j;
323             cl_uint k;
324 
325             for( k = 0; k  < elementCount; k++ )
326             {
327                 short *o2 = (short*)p;
328                 for( j = 0; j < halfVectorSize; j++ )
329                     o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
330 
331                 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
332                 {
333                     log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
334                     for( j = 1; j < halfVectorSize * 2; j++ )
335                         log_info( ", %d", i[j] );
336                     log_info( " } --> { %d", o[0] );
337                     for( j = 1; j < halfVectorSize; j++ )
338                         log_info( ", %d", o[j] );
339                     log_info( " }\n" );
340                     return -1;
341                 }
342                 i += 2 * halfVectorSize;
343                 o += halfVectorSize;
344             }
345         }
346             break;
347 
348         case 4:
349         {
350             int *i = (int*)in;
351             int *o = (int*)out;
352             size_t j;
353             cl_uint k;
354 
355             for( k = 0; k  < elementCount; k++ )
356             {
357                 int *o2 = (int *)p;
358                 for( j = 0; j < halfVectorSize; j++ )
359                     o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
360 
361                 for( j = 0; j < cmpVectorSize; j++ )
362         {
363             /* Allow float nans to be binary different */
364             if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
365             {
366                 log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
367             for( j = 1; j < halfVectorSize * 2; j++ )
368                 log_info( ", 0x%8.8x", i[j] );
369             log_info( " } --> { 0x%8.8x", o[0] );
370             for( j = 1; j < halfVectorSize; j++ )
371                 log_info( ", 0x%8.8x", o[j] );
372             log_info( " }\n" );
373             return -1;
374             }
375         }
376         i += 2 * halfVectorSize;
377         o += halfVectorSize;
378             }
379         }
380             break;
381 
382         case 8:
383         {
384             cl_ulong *i = (cl_ulong*)in;
385             cl_ulong *o = (cl_ulong*)out;
386             size_t j;
387             cl_uint k;
388 
389             for( k = 0; k  < elementCount; k++ )
390             {
391                 cl_ulong *o2 = (cl_ulong*)p;
392                 for( j = 0; j < halfVectorSize; j++ )
393                     o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
394 
395                 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
396                 {
397                     log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
398                     for( j = 1; j < halfVectorSize * 2; j++ )
399                         log_info( ", 0x%16.16llx", i[j] );
400                     log_info( " } --> { 0x%16.16llx", o[0] );
401                     for( j = 1; j < halfVectorSize; j++ )
402                         log_info( ", 0x%16.16llx", o[j] );
403                     log_info( " }\n" );
404                     return -1;
405                 }
406                 i += 2 * halfVectorSize;
407                 o += halfVectorSize;
408             }
409         }
410             break;
411 
412         default:
413             log_info( "Internal error. Unknown data type\n" );
414             return -2;
415     }
416 
417     return 0;
418 }
419 
420 
421 
422