1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <limits.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23
24
25 #include "procs.h"
26
hi_offset(int index,int vectorSize)27 int hi_offset( int index, int vectorSize) { return index + vectorSize / 2; }
lo_offset(int index,int vectorSize)28 int lo_offset( int index, int vectorSize) { return index; }
even_offset(int index,int vectorSize)29 int even_offset( int index, int vectorSize ) { return index * 2; }
odd_offset(int index,int vectorSize)30 int odd_offset( int index, int vectorSize ) { return index * 2 + 1; }
31
32 typedef int (*OffsetFunc)( int index, int vectorSize );
33 static const OffsetFunc offsetFuncs[4] = { hi_offset, lo_offset, even_offset, odd_offset };
34 typedef int (*verifyFunc)( const void *, const void *, const void *, int n, const char *sizeName );
35 static const char *operatorToUse_names[] = { "hi", "lo", "even", "odd" };
36 static const char *test_str_names[] = { "char", "uchar", "short", "ushort", "int", "uint", "long", "ulong", "float", "double" };
37
38 static const unsigned int vector_sizes[] = { 1, 2, 3, 4, 8, 16};
39 static const unsigned int vector_aligns[] = { 1, 2, 4, 4, 8, 16};
40 static const unsigned int out_vector_idx[] = { 0, 0, 1, 1, 3, 4};
41 // if input is size vector_sizes[i], output is size
42 // vector_sizes[out_vector_idx[i]]
43 // input type name is strcat(gentype, vector_size_names[i]);
44 // and output type name is
45 // strcat(gentype, vector_size_names[out_vector_idx[i]]);
46 static const int size_to_idx[] = {-1,0,1,2,3,-1,-1,-1,4,
47 -1,-1,-1,-1,-1,-1,-1,5};
48 static const char *vector_size_names[] = { "", "2", "3", "4", "8", "16"};
49
50 static const size_t kSizes[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 };
51 static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse );
52
test_hiloeo(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)53 int test_hiloeo(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
54 {
55 cl_int *input_ptr, *output_ptr, *p;
56 int err;
57 cl_uint i;
58 int hasDouble = is_extension_available( device, "cl_khr_fp64" );
59 cl_uint vectorSize, operatorToUse;
60 cl_uint type;
61 MTdata d;
62
63 int expressionMode;
64 int numExpressionModes = 2;
65
66 size_t length = sizeof(cl_int) * 4 * n_elems;
67
68 input_ptr = (cl_int*)malloc(length);
69 output_ptr = (cl_int*)malloc(length);
70
71 p = input_ptr;
72 d = init_genrand( gRandomSeed );
73 for (i=0; i<4 * (cl_uint) n_elems; i++)
74 p[i] = genrand_int32(d);
75 free_mtdata(d); d = NULL;
76
77 for( type = 0; type < sizeof( test_str_names ) / sizeof( test_str_names[0] ); type++ )
78 {
79 // Note: restrict the element count here so we don't end up overrunning the output buffer if we're compensating for 32-bit writes
80 size_t elementCount = length / kSizes[type];
81 cl_mem streams[2];
82
83 // skip double if unavailable
84 if( !hasDouble && ( 0 == strcmp( test_str_names[type], "double" )))
85 continue;
86
87 if( !gHasLong &&
88 (( 0 == strcmp( test_str_names[type], "long" )) ||
89 ( 0 == strcmp( test_str_names[type], "ulong" ))))
90 continue;
91
92 log_info( "%s", test_str_names[type] );
93 fflush( stdout );
94
95 // Set up data streams for the type
96 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
97 if (!streams[0])
98 {
99 log_error("clCreateBuffer failed\n");
100 return -1;
101 }
102 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, NULL);
103 if (!streams[1])
104 {
105 log_error("clCreateBuffer failed\n");
106 return -1;
107 }
108
109 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr, 0, NULL, NULL);
110 if (err != CL_SUCCESS)
111 {
112 log_error("clEnqueueWriteBuffer failed\n");
113 return -1;
114 }
115
116 for( operatorToUse = 0; operatorToUse < sizeof( operatorToUse_names ) / sizeof( operatorToUse_names[0] ); operatorToUse++ )
117 {
118 log_info( " %s", operatorToUse_names[ operatorToUse ] );
119 fflush( stdout );
120 for( vectorSize = 1; vectorSize < sizeof( vector_size_names ) / sizeof( vector_size_names[0] ); vectorSize++ ) {
121 for(expressionMode = 0; expressionMode < numExpressionModes; ++expressionMode) {
122
123 cl_program program = NULL;
124 cl_kernel kernel = NULL;
125 cl_uint outVectorSize = out_vector_idx[vectorSize];
126 char expression[1024];
127
128 const char *source[] = {
129 "", // optional pragma string
130 "__kernel void test_", operatorToUse_names[ operatorToUse ], "_", test_str_names[type], vector_size_names[vectorSize],
131 "(__global ", test_str_names[type], vector_size_names[vectorSize],
132 " *srcA, __global ", test_str_names[type], vector_size_names[outVectorSize],
133 " *dst)\n"
134 "{\n"
135 " int tid = get_global_id(0);\n"
136 "\n"
137 " ", test_str_names[type],
138 vector_size_names[out_vector_idx[vectorSize]],
139 " tmp = ", expression, ".", operatorToUse_names[ operatorToUse ], ";\n"
140 " dst[tid] = tmp;\n"
141 "}\n"
142 };
143
144 if(expressionMode == 0) {
145 sprintf(expression, "srcA[tid]");
146 } else if(expressionMode == 1) {
147 switch(vector_sizes[vectorSize]) {
148 case 16:
149 sprintf(expression,
150 "((%s16)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7, srcA[tid].s8, srcA[tid].s9, srcA[tid].sA, srcA[tid].sB, srcA[tid].sC, srcA[tid].sD, srcA[tid].sE, srcA[tid].sf))",
151 test_str_names[type]
152 );
153 break;
154 case 8:
155 sprintf(expression,
156 "((%s8)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3, srcA[tid].s4, srcA[tid].s5, srcA[tid].s6, srcA[tid].s7))",
157 test_str_names[type]
158 );
159 break;
160 case 4:
161 sprintf(expression,
162 "((%s4)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2, srcA[tid].s3))",
163 test_str_names[type]
164 );
165 break;
166 case 3:
167 sprintf(expression,
168 "((%s3)(srcA[tid].s0, srcA[tid].s1, srcA[tid].s2))",
169 test_str_names[type]
170 );
171 break;
172 case 2:
173 sprintf(expression,
174 "((%s2)(srcA[tid].s0, srcA[tid].s1))",
175 test_str_names[type]
176 );
177 break;
178 default :
179 sprintf(expression, "srcA[tid]");
180 log_info("Default\n");
181 }
182 } else {
183 sprintf(expression, "srcA[tid]");
184 }
185
186 if (0 == strcmp( test_str_names[type], "double" ))
187 source[0] = "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n";
188
189 char kernelName[128];
190 snprintf( kernelName, sizeof( kernelName ), "test_%s_%s%s", operatorToUse_names[ operatorToUse ], test_str_names[type], vector_size_names[vectorSize] );
191 err = create_single_kernel_helper(context, &program, &kernel, sizeof( source ) / sizeof( source[0] ), source, kernelName );
192 if (err)
193 return -1;
194
195 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0]);
196 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1]);
197 if (err != CL_SUCCESS)
198 {
199 log_error("clSetKernelArgs failed\n");
200 return -1;
201 }
202
203 //Wipe the output buffer clean
204 uint32_t pattern = 0xdeadbeef;
205 memset_pattern4( output_ptr, &pattern, length );
206 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
207 if (err != CL_SUCCESS)
208 {
209 log_error("clEnqueueWriteBuffer failed\n");
210 return -1;
211 }
212
213 size_t size = elementCount / (vector_aligns[vectorSize]);
214 err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &size, NULL, 0, NULL, NULL);
215 if (err != CL_SUCCESS)
216 {
217 log_error("clEnqueueNDRangeKernel failed\n");
218 return -1;
219 }
220
221 err = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
222 if (err != CL_SUCCESS)
223 {
224 log_error("clEnqueueReadBuffer failed\n");
225 return -1;
226 }
227
228 char *inP = (char *)input_ptr;
229 char *outP = (char *)output_ptr;
230 outP += kSizes[type] * ( ( vector_sizes[outVectorSize] ) -
231 ( vector_sizes[ out_vector_idx[vectorSize] ] ) );
232 // was outP += kSizes[type] * ( ( 1 << outVectorSize ) - ( 1 << ( vectorSize - 1 ) ) );
233 for( size_t e = 0; e < size; e++ )
234 {
235 if( CheckResults( inP, outP, 1, type, vectorSize, operatorToUse ) ) {
236
237 log_info("e is %d\n", (int)e);
238 fflush(stdout);
239 // break;
240 return -1;
241 }
242 inP += kSizes[type] * ( vector_aligns[vectorSize] );
243 outP += kSizes[type] * ( vector_aligns[outVectorSize] );
244 }
245
246 clReleaseKernel( kernel );
247 clReleaseProgram( program );
248 log_info( "." );
249 fflush( stdout );
250 }
251 }
252 }
253
254 clReleaseMemObject( streams[0] );
255 clReleaseMemObject( streams[1] );
256 log_info( "done\n" );
257 }
258
259 log_info("HiLoEO test passed\n");
260
261 free(input_ptr);
262 free(output_ptr);
263
264 return err;
265 }
266
CheckResults(void * in,void * out,size_t elementCount,int type,int vectorSize,int operatorToUse)267 static int CheckResults( void *in, void *out, size_t elementCount, int type, int vectorSize, int operatorToUse )
268 {
269 cl_ulong array[8];
270 void *p = array;
271 size_t halfVectorSize = vector_sizes[out_vector_idx[vectorSize]];
272 size_t cmpVectorSize = vector_sizes[out_vector_idx[vectorSize]];
273 // was 1 << (vectorSize-1);
274 OffsetFunc f = offsetFuncs[ operatorToUse ];
275 size_t elementSize = kSizes[type];
276
277 if(vector_size_names[vectorSize][0] == '3') {
278 if(operatorToUse_names[operatorToUse][0] == 'h' ||
279 operatorToUse_names[operatorToUse][0] == 'o') // hi or odd
280 {
281 cmpVectorSize = 1; // special case for vec3 ignored values
282 }
283 }
284
285 switch( elementSize )
286 {
287 case 1:
288 {
289 char *i = (char*)in;
290 char *o = (char*)out;
291 size_t j;
292 cl_uint k;
293 OffsetFunc f = offsetFuncs[ operatorToUse ];
294
295 for( k = 0; k < elementCount; k++ )
296 {
297 char *o2 = (char*)p;
298 for( j = 0; j < halfVectorSize; j++ )
299 o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
300
301 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
302 {
303 log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
304 for( j = 1; j < halfVectorSize * 2; j++ )
305 log_info( ", %d", i[j] );
306 log_info( " } --> { %d", o[0] );
307 for( j = 1; j < halfVectorSize; j++ )
308 log_info( ", %d", o[j] );
309 log_info( " }\n" );
310 return -1;
311 }
312 i += 2 * halfVectorSize;
313 o += halfVectorSize;
314 }
315 }
316 break;
317
318 case 2:
319 {
320 short *i = (short*)in;
321 short *o = (short*)out;
322 size_t j;
323 cl_uint k;
324
325 for( k = 0; k < elementCount; k++ )
326 {
327 short *o2 = (short*)p;
328 for( j = 0; j < halfVectorSize; j++ )
329 o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
330
331 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
332 {
333 log_info( "\n%d) Failure for %s%s.%s { %d", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
334 for( j = 1; j < halfVectorSize * 2; j++ )
335 log_info( ", %d", i[j] );
336 log_info( " } --> { %d", o[0] );
337 for( j = 1; j < halfVectorSize; j++ )
338 log_info( ", %d", o[j] );
339 log_info( " }\n" );
340 return -1;
341 }
342 i += 2 * halfVectorSize;
343 o += halfVectorSize;
344 }
345 }
346 break;
347
348 case 4:
349 {
350 int *i = (int*)in;
351 int *o = (int*)out;
352 size_t j;
353 cl_uint k;
354
355 for( k = 0; k < elementCount; k++ )
356 {
357 int *o2 = (int *)p;
358 for( j = 0; j < halfVectorSize; j++ )
359 o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
360
361 for( j = 0; j < cmpVectorSize; j++ )
362 {
363 /* Allow float nans to be binary different */
364 if( memcmp( &o[j], &o2[j], elementSize ) && !((strcmp(test_str_names[type], "float") == 0) && isnan(((float *)o)[j]) && isnan(((float *)o2)[j])))
365 {
366 log_info( "\n%d) Failure for %s%s.%s { 0x%8.8x", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
367 for( j = 1; j < halfVectorSize * 2; j++ )
368 log_info( ", 0x%8.8x", i[j] );
369 log_info( " } --> { 0x%8.8x", o[0] );
370 for( j = 1; j < halfVectorSize; j++ )
371 log_info( ", 0x%8.8x", o[j] );
372 log_info( " }\n" );
373 return -1;
374 }
375 }
376 i += 2 * halfVectorSize;
377 o += halfVectorSize;
378 }
379 }
380 break;
381
382 case 8:
383 {
384 cl_ulong *i = (cl_ulong*)in;
385 cl_ulong *o = (cl_ulong*)out;
386 size_t j;
387 cl_uint k;
388
389 for( k = 0; k < elementCount; k++ )
390 {
391 cl_ulong *o2 = (cl_ulong*)p;
392 for( j = 0; j < halfVectorSize; j++ )
393 o2[j] = i[ f((int)j, (int)halfVectorSize*2) ];
394
395 if( memcmp( o, o2, elementSize * cmpVectorSize ) )
396 {
397 log_info( "\n%d) Failure for %s%s.%s { 0x%16.16llx", k, test_str_names[type], vector_size_names[ vectorSize ], operatorToUse_names[ operatorToUse ], i[0] );
398 for( j = 1; j < halfVectorSize * 2; j++ )
399 log_info( ", 0x%16.16llx", i[j] );
400 log_info( " } --> { 0x%16.16llx", o[0] );
401 for( j = 1; j < halfVectorSize; j++ )
402 log_info( ", 0x%16.16llx", o[j] );
403 log_info( " }\n" );
404 return -1;
405 }
406 i += 2 * halfVectorSize;
407 o += halfVectorSize;
408 }
409 }
410 break;
411
412 default:
413 log_info( "Internal error. Unknown data type\n" );
414 return -2;
415 }
416
417 return 0;
418 }
419
420
421
422