1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/conversions.h"
18 #include "harness/typeWrappers.h"
19 #include "harness/testHarness.h"
20 
21 // #define USE_NEW_SYNTAX    1
22 // The number of shuffles to test per test
23 #define NUM_TESTS 32
24 // The number of times to run each combination of shuffles
25 #define NUM_ITERATIONS_PER_TEST 2
26 #define MAX_PROGRAM_SIZE NUM_TESTS*1024
27 #define PRINT_SHUFFLE_KERNEL_SOURCE 0
28 #define SPEW_ORDER_DETAILS 0
29 
30 enum ShuffleMode
31 {
32     kNormalMode = 0,
33     kFunctionCallMode,
34     kArrayAccessMode,
35     kBuiltInFnMode,
36     kBuiltInDualInputFnMode
37 };
38 
39 static const char *shuffleKernelPattern[3] =  {
40     "__kernel void sample_test( __global %s%s *source, __global %s%s *dest )\n"
41     "{\n"
42     "    if (get_global_id(0) != 0) return;\n"
43     "     //%s%s src1 %s, src2%s;\n",// Here's a comma...
44                                     // Above code is commented out for now, but keeping around for testing local storage options
45     "}\n" };
46 
47 static const char *shuffleTempPattern = "  %s%s tmp;\n";
48 
49 static const char *clearTempPattern = "        tmp = (%s%s)((%s)0);\n";
50 
51 static const char *shuffleSinglePattern =
52 "        tmp%s%s = source[%d]%s%s;\n"
53 "        dest[%d] = tmp;\n"
54 ;
55 
56 static const char * shuffleSinglePatternV3src =
57 "           tmp%s%s = vload3(%d, source)%s%s;\n"
58 "        dest[%d] = tmp;\n";
59 
60 static const char * shuffleSinglePatternV3dst =
61 "        tmp%s%s = source[%d]%s%s;\n"
62 "           vstore3(tmp, %d, dest);\n";
63 
64 
65 static const char * shuffleSinglePatternV3srcV3dst =
66 "tmp%s%s = vload3(%d, source)%s%s;\n"
67 "vstore3(tmp, %d, dest);\n";
68 
69 static const char *shuffleFnLinePattern = "%s%s shuffle_fn( %s%s source );\n%s%s shuffle_fn( %s%s source ) { return source; }\n\n";
70 
71 static const char *shuffleFnPattern =
72 "        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
73 "        dest[%d] = tmp;\n"
74 ;
75 
76 
77 static const char *shuffleFnPatternV3src =
78 "        tmp%s%s = shuffle_fn( vload3(%d, source) )%s%s;\n"
79 "        dest[%d] = tmp;\n"
80 ;
81 
82 
83 static const char *shuffleFnPatternV3dst =
84 "        tmp%s%s = shuffle_fn( source[%d] )%s%s;\n"
85 "               vstore3(tmp, %d, dest);\n"
86 ;
87 
88 
89 static const char *shuffleFnPatternV3srcV3dst =
90 "        tmp%s%s = shuffle_fn(vload3(%d, source) )%s%s;\n"
91 "               vstore3(tmp, %d, dest);\n"
92 ;
93 
94 // shuffle() built-in function patterns
95 static const char *shuffleBuiltInPattern =
96 "        {\n"
97 "            %s%s src1 = %s;\n"
98 "            %s%s%s mask = (%s%s%s)( %s );\n"
99 "            tmp = shuffle( src1, mask );\n"
100 "            %s;\n"
101 "        }\n"
102 ;
103 
104 // shuffle() built-in dual-input function patterns
105 static const char *shuffleBuiltInDualPattern =
106 "        {\n"
107 "            %s%s src1 = %s;\n"
108 "            %s%s src2 = %s;\n"
109 "            %s%s%s mask = (%s%s%s)( %s );\n"
110 "            tmp = shuffle2( src1, src2, mask );\n"
111 "            %s;\n"
112 "        }\n"
113 ;
114 
115 
116 typedef unsigned char ShuffleOrder[ 16 ];
117 
incrementShuffleOrder(ShuffleOrder & order,size_t orderSize,size_t orderRange)118 void incrementShuffleOrder( ShuffleOrder &order, size_t orderSize, size_t orderRange )
119 {
120     for( size_t i = 0; i < orderSize; i++ )
121     {
122         order[ i ]++;
123         if( order[ i ] < orderRange )
124             return;
125         order[ i ] = 0;
126     }
127 }
128 
shuffleOrderContainsDuplicates(ShuffleOrder & order,size_t orderSize)129 bool shuffleOrderContainsDuplicates( ShuffleOrder &order, size_t orderSize )
130 {
131     bool flags[ 16 ] = { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false };
132     for( size_t i = 0; i < orderSize; i++ )
133     {
134         if( flags[ order[ i ] ] )
135             return true;
136         flags[ order[ i ] ] = true;
137     }
138     return false;
139 }
140 
shuffleVector(unsigned char * inVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)141 static void shuffleVector( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
142 {
143     for(size_t i = 0; i < lengthToUse; i++ )
144     {
145         unsigned char *inPtr = inVector + typeSize *order[ i ];
146         memcpy( outVector, inPtr, typeSize );
147         outVector += typeSize;
148     }
149 }
150 
shuffleVector2(unsigned char * inVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)151 static void shuffleVector2( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
152 {
153     for(size_t i = 0; i < lengthToUse; i++ )
154     {
155         unsigned char *outPtr = outVector + typeSize *order[ i ];
156         memcpy( outPtr, inVector, typeSize );
157         inVector += typeSize;
158     }
159 }
160 
shuffleVectorDual(unsigned char * inVector,unsigned char * inSecondVector,unsigned char * outVector,ShuffleOrder order,size_t vecSize,size_t typeSize,cl_uint lengthToUse)161 static void shuffleVectorDual( unsigned char *inVector, unsigned char *inSecondVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse )
162 {
163     // This is tricky: the indices of each shuffle are in a range (0-srcVecSize * 2-1),
164     // where (srcVecSize-srcVecSize*2-1) refers to the second input.
165     size_t uphalfMask = (size_t)vecSize;
166     size_t lowerBits = (size_t)( vecSize - 1 );
167 
168     for(size_t i = 0; i < lengthToUse; i++ )
169     {
170         unsigned char *inPtr;
171 #if SPEW_ORDER_DETAILS
172         log_info("order[%d] is %d, or %d of %s\n", (int)i,
173                  (int)(order[i]),
174                  (int)(order[i] & lowerBits),
175                  ((order[i]&uphalfMask) == 0)?"lower num":"upper num");
176 #endif
177         if( order[ i ] & uphalfMask )
178             inPtr = inSecondVector + typeSize * ( order[ i ] & lowerBits );
179         else
180             inPtr = inVector + typeSize * ( order[ i ] & lowerBits );
181         memcpy( outVector, inPtr, typeSize );
182         outVector += typeSize;
183     }
184 }
185 
186 
187 static ShuffleOrder sNaturalOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
188 
189 static int useNumbersFlip = 0;
get_order_string(ShuffleOrder & order,size_t vecSize,cl_uint lengthToUse,bool byNumber,MTdata d)190 const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengthToUse, bool byNumber, MTdata d )
191 {
192     // NOTE: names are only valid for hex characters (up to F) but for debugging, we use
193     // this to print out orders for dual inputs, which actually can be valid up to position 31 (two 16-element vectors)
194     // so we go ahead and fake the rest of the alphabet for those other 16 positions, so we have
195     // some (indirectly) meaningful output
196     char names[] = "0123456789abcdefghijklmnopqrstuv";
197     char namesUpperCase[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV";
198     char names2[] = "xyzw!!!!!!!!!!!!";
199 
200     static char orderString[ 18 ];
201 
202     size_t j, idx;
203 
204     // Assume we don't have to use numbers
205     byNumber = 0;
206     // Check to see
207     for( j = 0; j < lengthToUse; j++ )
208     {
209         if (order[j] > 3) {
210             // An index is > xyzw so we need to use numbers
211             byNumber = 1;
212             break;
213         }
214     }
215     // If we can use numbers, do so half the time.
216     if (!byNumber) {
217         byNumber = (useNumbersFlip++)%2;
218     }
219     // Do not use xyzw for vectors whose length is not 2 or 4 per the spec.
220     if (vecSize != 2 || vecSize != 4 || vecSize != 3)
221         byNumber = 1;
222 
223     if( byNumber || vecSize > 4 )
224     {
225         idx = 0;
226         // Randomly chose upper and lower case S
227         orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S';
228         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
229             // Randomly choose upper and lower case.
230             orderString[ idx++ ] = random_in_range(0, 1, d) ? names[ (int)order[ j ] ] : namesUpperCase[ (int)order[ j ] ];
231         }
232         orderString[ idx++ ] = 0;
233     }
234     else
235     {
236         for( j = 0; j < vecSize && j < lengthToUse; j++ ) {
237             // Randomly choose upper and lower case.
238             orderString[ j ] = names2[ (int)order[ j ] ];
239         }
240         orderString[ j ] = 0;
241     }
242 
243     return orderString;
244 }
245 
get_order_name(ExplicitType vecType,size_t inVecSize,size_t outVecSize,ShuffleOrder & inOrder,ShuffleOrder & outOrder,cl_uint lengthToUse,MTdata d,bool inUseNumerics,bool outUseNumerics)246 char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize, ShuffleOrder &inOrder, ShuffleOrder &outOrder, cl_uint lengthToUse, MTdata d, bool inUseNumerics, bool outUseNumerics )
247 {
248     static char orderName[ 512 ] = "";
249     char inOrderStr[ 512 ], outOrderStr[ 512 ];
250 
251     if( inVecSize == 1 )
252         inOrderStr[ 0 ] = 0;
253     else
254         sprintf( inOrderStr, "%d.%s", (int)inVecSize, get_order_string( inOrder, outVecSize, lengthToUse, inUseNumerics, d ) );
255     if( outVecSize == 1 )
256         outOrderStr[ 0 ] = 0;
257     else
258         sprintf( outOrderStr, "%d.%s", (int)outVecSize, get_order_string( outOrder, outVecSize, lengthToUse, outUseNumerics, d ) );
259 
260     sprintf( orderName, "order %s%s -> %s%s",
261             get_explicit_type_name( vecType ), inOrderStr, get_explicit_type_name( vecType ), outOrderStr );
262     return orderName;
263 }
264 
print_hex_mem_dump(const unsigned char * inDataPtr,const unsigned char * inDataPtr2,const unsigned char * expected,const unsigned char * outDataPtr,size_t inVecSize,size_t outVecSize,size_t typeSize)265 void    print_hex_mem_dump( const unsigned char *inDataPtr, const unsigned char * inDataPtr2, const unsigned char *expected, const unsigned char *outDataPtr, size_t inVecSize, size_t outVecSize, size_t typeSize )
266 {
267     char error [4096] = "";
268     strcat(error, "      Source: ");
269     for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
270     {
271         sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr[ j ] );
272     }
273     if( inDataPtr2 != NULL )
274     {
275         strcat(error, "\n    Source 2: ");
276         for( unsigned int j = 0; j < inVecSize * typeSize; j++ )
277         {
278             sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr2[ j ] );
279         }
280     }
281     strcat(error, "\n    Expected: " );
282     for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
283     {
284         sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)expected[ j ] );
285     }
286     strcat(error, "\n      Actual: " );
287     for( unsigned int j = 0; j < outVecSize * typeSize; j++ )
288     {
289         sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)outDataPtr[ j ] );
290     }
291     log_info("%s\n", error);
292 }
293 
generate_shuffle_mask(char * outMaskString,size_t maskSize,const ShuffleOrder * order)294 void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order )
295 {
296     outMaskString[ 0 ] = 0;
297     if( order != NULL )
298     {
299         for( size_t jj = 0; jj < maskSize; jj++ )
300         {
301             char thisMask[ 16 ];
302             sprintf( thisMask, "%s%d", ( jj == 0 ) ? "" : ", ", (*order)[ jj ] );
303             strcat( outMaskString, thisMask );
304         }
305     }
306     else
307     {
308         for( size_t jj = 0; jj < maskSize; jj++ )
309         {
310             char thisMask[ 16 ];
311             sprintf( thisMask, "%s%ld", ( jj == 0 ) ? "" : ", ", jj );
312             strcat( outMaskString, thisMask );
313         }
314     }
315 }
316 
create_shuffle_kernel(cl_context context,cl_program * outProgram,cl_kernel * outKernel,size_t * outRealVecSize,ExplicitType vecType,size_t inVecSize,size_t outVecSize,cl_uint * lengthToUse,bool inUseNumerics,bool outUseNumerics,size_t numOrders,ShuffleOrder * inOrders,ShuffleOrder * outOrders,MTdata d,ShuffleMode shuffleMode=kNormalMode)317 static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl_kernel *outKernel,
318                                  size_t *outRealVecSize,
319                                  ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, bool inUseNumerics, bool outUseNumerics,
320                                  size_t numOrders, ShuffleOrder *inOrders, ShuffleOrder *outOrders,
321                                  MTdata d, ShuffleMode shuffleMode = kNormalMode )
322 {
323     char inOrder[18], shuffledOrder[18];
324     size_t typeSize;
325     char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ];
326     char *programPtr;
327     char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4];
328     char outSizeNameTmpVar[4];
329 
330 
331     /* Create the source; note vec size is the vector length we are testing */
332     if( inVecSize == 1 ) //|| (inVecSize == 3)) // just have arrays if we go with size 3
333         inSizeName[ 0 ] = 0;
334     else
335         sprintf( inSizeName, "%ld", inVecSize );
336     if( inVecSize == 3 )
337         inSizeArgName[ 0 ] = 0;
338     else
339         strcpy( inSizeArgName, inSizeName );
340 
341 
342     typeSize = get_explicit_type_size( vecType );
343 
344     *outRealVecSize = outVecSize;
345 
346     if( outVecSize == 1 ||  (outVecSize == 3))
347         outSizeName[ 0 ] = 0;
348     else
349         sprintf( outSizeName, "%d", (int)outVecSize );
350 
351     if(outVecSize == 1) {
352         outSizeNameTmpVar[0] = 0;
353     } else {
354         sprintf(outSizeNameTmpVar, "%d", (int)outVecSize);
355     }
356 
357     if( *outRealVecSize == 1 || ( *outRealVecSize == 3))
358         outRealSizeName[ 0 ] = 0;
359     else
360         sprintf( outRealSizeName, "%d", (int)*outRealVecSize );
361 
362 
363     // Loop through and create the source for all order strings
364     kernelSource[ 0 ] = 0;
365     if (vecType == kDouble) {
366         strcat(kernelSource, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n");
367     }
368 
369     if( shuffleMode == kFunctionCallMode )
370     {
371         sprintf( progLine, shuffleFnLinePattern, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName,
372                 get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName );
373         strcat(kernelSource, progLine);
374     }
375 
376     // We're going to play a REALLY NASTY trick here. We're going to use the inSize insert point
377     // to put in an entire third parameter if we need it
378     char inParamSizeString[ 1024 ];
379     if( shuffleMode == kBuiltInDualInputFnMode )
380         sprintf( inParamSizeString, "%s *secondSource, __global %s%s", inSizeArgName, get_explicit_type_name( vecType ), inSizeArgName );
381     else
382         strcpy( inParamSizeString, inSizeArgName );
383 
384     // These two take care of unused variable warnings
385     const char * src2EnableA = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "/*";
386     const char * src2EnableB = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "*/";
387 
388     sprintf( progLine, shuffleKernelPattern[ 0 ], get_explicit_type_name( vecType ), inParamSizeString,
389             get_explicit_type_name( vecType ), outRealSizeName, get_explicit_type_name( vecType ), inSizeName,
390             src2EnableA, src2EnableB );
391     strcat(kernelSource, progLine);
392     if( inOrders == NULL )
393         strcpy( inOrder, get_order_string( sNaturalOrder, outVecSize, (cl_uint)outVecSize, inUseNumerics, d ) );
394 
395     sprintf( progLine, shuffleTempPattern, get_explicit_type_name( vecType ), outSizeNameTmpVar);
396     strcat(kernelSource, progLine);
397 
398     for( unsigned int i = 0; i < numOrders; i++ )
399     {
400         if( inOrders != NULL )
401             strcpy( inOrder, get_order_string( inOrders[ i ], outVecSize, lengthToUse[i], inUseNumerics, d ) );
402         strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) );
403 
404 
405         sprintf( progLine, clearTempPattern, get_explicit_type_name( vecType ), outSizeName,get_explicit_type_name( vecType ));
406         strcat(kernelSource, progLine);
407 
408 
409         if( shuffleMode == kNormalMode )
410         {
411             if(outVecSize == 3 && inVecSize == 3) {
412                 // shuffleSinglePatternV3srcV3dst
413                 sprintf( progLine, shuffleSinglePatternV3srcV3dst,
414                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
415                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
416             } else if(inVecSize == 3) {
417                 // shuffleSinglePatternV3src
418                 sprintf( progLine, shuffleSinglePatternV3src,
419                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
420                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
421             } else if(outVecSize == 3) {
422                 // shuffleSinglePatternV3dst
423                 sprintf( progLine, shuffleSinglePatternV3dst,
424                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
425                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
426                         (int)i );
427             } else {
428                 sprintf( progLine, shuffleSinglePattern,
429                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
430                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
431             }
432         }
433         else if( shuffleMode == kFunctionCallMode )
434         {
435             // log_info("About to make a shuffle line\n");
436             // fflush(stdout);
437             if(inVecSize == 3 && outVecSize == 3) { // swap last two
438                 sprintf( progLine, shuffleFnPatternV3srcV3dst,
439                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
440                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "",
441                         (int)i );
442             } else if(outVecSize == 3)  { // swap last two
443                                           // log_info("Here\n\n");
444                                           // fflush(stdout);
445                 sprintf( progLine, shuffleFnPatternV3dst,
446                         outVecSize > 1 ? "." : "",
447                         outVecSize > 1 ? shuffledOrder : "",
448                         (int)i,
449                         inVecSize > 1 ? "." : "",
450                         inVecSize > 1 ? inOrder : "",
451                         (int)i );
452                 // log_info("\n%s\n", progLine);
453                 // fflush(stdout);
454             } else if(inVecSize == 3) {
455                 sprintf( progLine, shuffleFnPatternV3src,
456                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
457                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
458             } else  {
459                 sprintf( progLine, shuffleFnPattern,
460                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i,
461                         inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i );
462             }
463         }
464         else if( shuffleMode == kArrayAccessMode )
465         { // now we want to replace inSizeName with inSizeNameShuffleFn
466             int vectorSizeToCastTo = 16;
467             cl_uint item;
468             for (item =0; item<lengthToUse[i]; item++) {
469                 int absoluteIndex = i*(int)inVecSize+(int)inOrders[i][item];
470                 int castVectorIndex = absoluteIndex/vectorSizeToCastTo;
471                 size_t castElementIndex = absoluteIndex % vectorSizeToCastTo;
472                 ShuffleOrder myOutOrders, myInOrders;
473                 myOutOrders[0]  = outOrders[i][item];
474                 myInOrders[0] = castElementIndex;
475 
476                 strcpy( inOrder, get_order_string( myInOrders, 1, 1, 0, d ) );
477                 strcpy( shuffledOrder, get_order_string( myOutOrders, 1, 1, 0, d ) );
478 
479                 sprintf(progLine, "     tmp%s%s = ((__global %s%d *)source)[%d]%s%s;\n",
480                         outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "",
481                         get_explicit_type_name( vecType ), vectorSizeToCastTo,
482                         castVectorIndex,
483                         vectorSizeToCastTo > 1 ? "." : "", vectorSizeToCastTo > 1 ? inOrder : "");
484                 strcat(kernelSource, progLine);
485             }
486             if(outVecSize == 3) {
487                 sprintf(progLine,"     vstore3(tmp, %d, (__global %s *)dest);\n",
488                         i, get_explicit_type_name( vecType ));
489                 // probably don't need that last
490                 // cast to (__global %s *) where %s is get_explicit_type_name( vecType)
491             } else {
492                 sprintf(progLine,"     dest[%d] = tmp;\n", i );
493             }
494         }
495         else // shuffleMode == kBuiltInFnMode or kBuiltInDualInputFnMode
496         {
497             if(inVecSize == 3 || outVecSize == 3 ||
498                inVecSize == 1 || outVecSize == 1) {
499                 // log_info("Skipping test for size 3\n");
500                 continue;
501             }
502             ExplicitType maskType = vecType;
503             if( maskType == kFloat )
504                 maskType = kUInt;
505             if( maskType == kDouble) {
506                 maskType = kULong;
507             }
508 
509             char maskString[ 1024 ] = "";
510             size_t maskSize = outVecSize;// ( shuffleMode == kBuiltInDualInputFnMode ) ? ( outVecSize << 1 ) : outVecSize;
511             generate_shuffle_mask( maskString, maskSize, ( outOrders != NULL ) ? &outOrders[ i ] : NULL );
512 
513             // Set up a quick prefix, so mask gets unsigned type regardless of the input/output type
514             char maskPrefix[ 2 ] = "u";
515             if( get_explicit_type_name( maskType )[ 0 ] == 'u' )
516                 maskPrefix[ 0 ] = 0;
517 
518             char progLine2[ 10240 ];
519             if( shuffleMode == kBuiltInDualInputFnMode )
520             {
521                 sprintf( progLine2, shuffleBuiltInDualPattern, get_explicit_type_name( vecType ), inSizeName,
522                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
523                         get_explicit_type_name( vecType ), inSizeName,
524                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)secondSource )" : "secondSource[ %ld ]",
525                         maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
526                         maskString,
527                         ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
528 
529                 if( outVecSize == 3 )
530                 {
531                     if( inVecSize == 3 )
532                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
533                     else
534                         sprintf( progLine, progLine2, i, i, i, get_explicit_type_name( vecType ) );
535                 }
536                 else
537                 {
538                     if( inVecSize == 3 )
539                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i );
540                     else
541                         sprintf( progLine, progLine2, i, i, i );
542                 }
543             }
544             else
545             {
546                 sprintf( progLine2, shuffleBuiltInPattern, get_explicit_type_name( vecType ), inSizeName,
547                         ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]",
548                         maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName,
549                         maskString,
550                         ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" );
551 
552                 if( outVecSize == 3 )
553                 {
554                     if( inVecSize == 3 )
555                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) );
556                     else
557                         sprintf( progLine, progLine2, i, i, get_explicit_type_name( vecType ) );
558                 }
559                 else
560                 {
561                     if( inVecSize == 3 )
562                         sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i );
563                     else
564                         sprintf( progLine, progLine2, i, i );
565                 }
566             }
567         }
568 
569         strcat( kernelSource, progLine );
570         if (strlen(kernelSource) > 0.9*MAX_PROGRAM_SIZE)
571             log_info("WARNING: Program has grown to 90%% (%d) of the defined max program size of %d\n", (int)strlen(kernelSource), (int)MAX_PROGRAM_SIZE);
572     }
573     strcat( kernelSource, shuffleKernelPattern[ 1 ] );
574 
575     // Print the kernel source
576     if (PRINT_SHUFFLE_KERNEL_SOURCE)
577         log_info( "Kernel:%s\n", kernelSource );
578 
579     /* Create kernel */
580     programPtr = kernelSource;
581     if( create_single_kernel_helper( context, outProgram, outKernel, 1, (const char **)&programPtr, "sample_test" ) )
582     {
583         return -1;
584     }
585     return 0;
586 }
587 
test_shuffle_dual_kernel(cl_context context,cl_command_queue queue,ExplicitType vecType,size_t inVecSize,size_t outVecSize,cl_uint * lengthToUse,size_t numOrders,ShuffleOrder * inOrderIdx,ShuffleOrder * outOrderIdx,bool inUseNumerics,bool outUseNumerics,MTdata d,ShuffleMode shuffleMode=kNormalMode)588 int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue,
589                              ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, size_t numOrders,
590                              ShuffleOrder *inOrderIdx, ShuffleOrder *outOrderIdx, bool inUseNumerics, bool outUseNumerics, MTdata d,
591                              ShuffleMode shuffleMode = kNormalMode )
592 {
593     clProgramWrapper program;
594     clKernelWrapper kernel;
595     int error;
596     size_t threads[1], localThreads[1];
597     size_t typeSize, outRealVecSize;
598     clMemWrapper streams[ 3 ];
599 
600     /* Create the source */
601     error = create_shuffle_kernel( context, &program, &kernel, &outRealVecSize, vecType,
602                                   inVecSize, outVecSize, lengthToUse, inUseNumerics, outUseNumerics, numOrders, inOrderIdx, outOrderIdx,
603                                   d, shuffleMode );
604     if( error != 0 )
605         return error;
606 
607     typeSize = get_explicit_type_size( vecType );
608 
609 #if !(defined(_WIN32) && defined (_MSC_VER))
610     cl_long inData[ inVecSize * numOrders ];
611     cl_long inSecondData[ inVecSize * numOrders ];
612     cl_long outData[ outRealVecSize * numOrders ];
613 #else
614     cl_long* inData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
615     cl_long* inSecondData  = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long));
616     cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long));
617 #endif
618     memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) );
619 
620     generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData );
621     if( shuffleMode == kBuiltInDualInputFnMode )
622         generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData );
623 
624     streams[0] =
625         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
626                        typeSize * inVecSize * numOrders, inData, &error);
627     test_error( error, "Unable to create input stream" );
628 
629     streams[1] =
630         clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
631                        typeSize * outRealVecSize * numOrders, outData, &error);
632     test_error( error, "Unable to create output stream" );
633 
634     int argIndex = 0;
635     if( shuffleMode == kBuiltInDualInputFnMode )
636     {
637         streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
638                                     typeSize * inVecSize * numOrders,
639                                     inSecondData, &error);
640         test_error( error, "Unable to create second input stream" );
641 
642         error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] );
643         test_error( error, "Unable to set kernel argument" );
644     }
645 
646     // Set kernel arguments
647     error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 0 ] ), &streams[ 0 ] );
648     test_error( error, "Unable to set kernel argument" );
649     error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 1 ] ), &streams[ 1 ] );
650     test_error( error, "Unable to set kernel argument" );
651 
652 
653     /* Run the kernel */
654     threads[0] = numOrders;
655 
656     error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] );
657     test_error( error, "Unable to get work group size to use" );
658 
659     error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL );
660     test_error( error, "Unable to execute test kernel" );
661 
662 
663     // Read the results back
664     error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL );
665     test_error( error, "Unable to read results" );
666 
667     unsigned char *inDataPtr = (unsigned char *)inData;
668     unsigned char *inSecondDataPtr = (unsigned char *)inSecondData;
669     unsigned char *outDataPtr = (unsigned char *)outData;
670     int ret = 0;
671     int errors_printed = 0;
672     for( size_t i = 0; i < numOrders; i++ )
673     {
674         unsigned char expected[ 1024 ];
675         unsigned char temp[ 1024 ];
676         memset(expected, 0, sizeof(expected));
677         memset(temp, 0, sizeof(temp));
678         if( shuffleMode == kBuiltInFnMode )
679             shuffleVector( inDataPtr, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
680         else if( shuffleMode == kBuiltInDualInputFnMode )
681             shuffleVectorDual( inDataPtr, inSecondDataPtr, expected, outOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
682         else
683         {
684             shuffleVector( inDataPtr, temp, inOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] );
685             shuffleVector2( temp, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] );
686         }
687 
688         if( memcmp( expected, outDataPtr, outVecSize * typeSize ) != 0 )
689         {
690             log_error( " ERROR: Shuffle test %d FAILED for %s (memory hex dump follows)\n", (int)i,
691                       get_order_name( vecType, inVecSize, outVecSize, inOrderIdx[ i ], outOrderIdx[ i ], lengthToUse[i], d, inUseNumerics, outUseNumerics ) );
692 
693             print_hex_mem_dump( inDataPtr, ( shuffleMode == kBuiltInDualInputFnMode ) ? inSecondDataPtr : NULL, expected, outDataPtr, inVecSize, outVecSize, typeSize );
694 
695             if( ( shuffleMode == kBuiltInFnMode ) || ( shuffleMode == kBuiltInDualInputFnMode ) )
696             {
697                 // Mask would've been different for every shuffle done, so we have to regen it to print it
698                 char maskString[ 1024 ];
699                 generate_shuffle_mask( maskString, outVecSize, ( outOrderIdx != NULL ) ? &outOrderIdx[ i ] : NULL );
700                 log_error( "        Mask:  %s\n", maskString );
701             }
702 
703             ret++;
704             errors_printed++;
705             if (errors_printed > MAX_ERRORS_TO_PRINT)
706             {
707                 log_info("Further errors suppressed.\n");
708                 return ret;
709             }
710         }
711         inDataPtr += inVecSize * typeSize;
712         inSecondDataPtr += inVecSize * typeSize;
713         outDataPtr += outRealVecSize * typeSize;
714     }
715 
716     return ret;
717 }
718 
build_random_shuffle_order(ShuffleOrder & outIndices,unsigned int length,unsigned int selectLength,bool allowRepeats,MTdata d)719 void    build_random_shuffle_order( ShuffleOrder &outIndices, unsigned int length, unsigned int selectLength, bool allowRepeats, MTdata d )
720 {
721     char flags[ 16 ];
722 
723     memset( flags, 0, sizeof( flags ) );
724 
725     for( unsigned int i = 0; i < length; i++ )
726     {
727         char selector = (char)random_in_range( 0, selectLength - 1, d );
728         if( !allowRepeats )
729         {
730             while( flags[ (int)selector ] )
731                 selector = (char)random_in_range( 0, selectLength - 1, d );
732             flags[ (int)selector ] = true;
733         }
734         outIndices[ i ] = selector;
735     }
736 }
737 
738 class shuffleBuffer
739 {
740 public:
741 
shuffleBuffer(cl_context ctx,cl_command_queue queue,ExplicitType type,size_t inSize,size_t outSize,ShuffleMode mode)742     shuffleBuffer( cl_context ctx, cl_command_queue queue, ExplicitType type, size_t inSize, size_t outSize, ShuffleMode mode )
743     {
744         mContext = ctx;
745         mQueue = queue;
746         mVecType = type;
747         mInVecSize = inSize;
748         mOutVecSize = outSize;
749         mShuffleMode = mode;
750 
751         mCount = 0;
752 
753         // Here's the deal with mLengthToUse[i].
754         // if you have, for instance
755         // uchar4 dst;
756         // uchar8 src;
757         // you can do
758         // src.s0213 = dst.s1045;
759         // but you can also do
760         // src.s02 = dst.s10;
761         // which has a different effect
762         // The intent with these "sub lengths" is to test all such
763         // possibilities
764         // Calculate a range of sub-lengths within the vector to copy.
765         int i;
766         size_t maxSize = (mInVecSize < mOutVecSize) ? mInVecSize : mOutVecSize;
767         for(i=0; i<NUM_TESTS; i++)
768         {
769             // Built-in fns can't select sub-lengths (the mask must be the length of the dest vector).
770             // Well, at least for these tests...
771             if( ( mode == kBuiltInFnMode ) || ( mode == kBuiltInDualInputFnMode ) )
772                 mLengthToUse[i]    = (cl_int)mOutVecSize;
773             else
774             {
775                 mLengthToUse[i] = (cl_uint)(((double)i/NUM_TESTS)*maxSize) + 1;
776                 // Force the length to be a valid vector length.
777                 if( ( mLengthToUse[i] == 1 ) && ( mode != kBuiltInFnMode ) )
778                     mLengthToUse[i] = 1;
779                 else if (mLengthToUse[i] < 4)
780                     mLengthToUse[i] = 2;
781                 else if (mLengthToUse[i] < 8)
782                     mLengthToUse[i] = 4;
783                 else if (mLengthToUse[i] < 16)
784                     mLengthToUse[i] = 8;
785                 else
786                     mLengthToUse[i] = 16;
787             }
788         }
789     }
790 
AddRun(ShuffleOrder & inOrder,ShuffleOrder & outOrder,MTdata d)791     int    AddRun( ShuffleOrder &inOrder, ShuffleOrder &outOrder, MTdata d )
792     {
793         memcpy( &mInOrders[ mCount ], &inOrder, sizeof( inOrder ) );
794         memcpy( &mOutOrders[ mCount ], &outOrder, sizeof( outOrder ) );
795         mCount++;
796 
797         if( mCount == NUM_TESTS )
798             return Flush(d);
799 
800         return CL_SUCCESS;
801     }
802 
Flush(MTdata d)803     int Flush( MTdata d )
804     {
805         int err = CL_SUCCESS;
806         if( mCount > 0 )
807         {
808             err = test_shuffle_dual_kernel( mContext, mQueue, mVecType, mInVecSize, mOutVecSize, mLengthToUse,
809                                            mCount, mInOrders, mOutOrders, true, true, d, mShuffleMode );
810             mCount = 0;
811         }
812         return err;
813     }
814 
815 protected:
816     cl_context            mContext;
817     cl_command_queue    mQueue;
818     ExplicitType        mVecType;
819     size_t                mInVecSize, mOutVecSize, mCount;
820     ShuffleMode            mShuffleMode;
821     cl_uint             mLengthToUse[ NUM_TESTS ];
822 
823     ShuffleOrder        mInOrders[ NUM_TESTS ], mOutOrders[ NUM_TESTS ];
824 };
825 
826 
test_shuffle_random(cl_device_id device,cl_context context,cl_command_queue queue,ShuffleMode shuffleMode,MTdata d)827 int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queue queue, ShuffleMode shuffleMode, MTdata d )
828 {
829     ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble };
830     unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
831     unsigned int srcIdx, dstIdx, typeIndex;
832     int error = 0, totalError = 0, prevTotalError = 0;
833     RandomSeed seed(gRandomSeed);
834 
835     for( typeIndex = 0; typeIndex < 10; typeIndex++ )
836     {
837         //log_info( "\n\t%s... ", get_explicit_type_name( vecType[ typeIndex ] ) );
838         //fflush( stdout );
839         if (vecType[typeIndex] == kDouble) {
840             if (!is_extension_available(device, "cl_khr_fp64")) {
841                 log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n");
842                 continue;
843             }
844             log_info("Testing doubles.\n");
845         }
846 
847         if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong )
848         {
849             log_info("Long types are unsupported, skipping.");
850             continue;
851         }
852 
853         error = 0;
854         for( srcIdx = 0; vecSizes[ srcIdx ] != 0 /*&& error == 0*/; srcIdx++ )
855         {
856             for( dstIdx = 0; vecSizes[ dstIdx ] != 0 /*&& error == 0*/; dstIdx++ )
857             {
858                 if( ( ( shuffleMode == kBuiltInDualInputFnMode ) || ( shuffleMode == kBuiltInFnMode ) ) &&
859                    ( ( vecSizes[ dstIdx ] & 1 ) || ( vecSizes[ srcIdx ] & 1 ) ) )
860                 {
861                     // Built-in shuffle functions don't work on size 1 (scalars) or size 3 (vec3s)
862                     continue;
863                 }
864 
865                 log_info("Testing [%s%d to %s%d]... ", get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[srcIdx], get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[dstIdx]);
866                 shuffleBuffer buffer( context, queue, vecType[ typeIndex ], vecSizes[ srcIdx ], vecSizes[ dstIdx ], shuffleMode );
867 
868                 int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST;
869                 for( int i = 0; i < numTests /*&& error == 0*/; i++ )
870                 {
871                     ShuffleOrder src, dst;
872                     if( shuffleMode == kBuiltInFnMode )
873                     {
874                         build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
875                     }
876                     else if(shuffleMode == kBuiltInDualInputFnMode)
877                     {
878                         build_random_shuffle_order(dst, vecSizes[dstIdx], 2*vecSizes[srcIdx], true, d);
879                     }
880                     else
881                     {
882                         build_random_shuffle_order( src, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d );
883                         build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ dstIdx ], false, d );
884                     }
885 
886                     error = buffer.AddRun( src, dst, seed );
887                     if (error)
888                         totalError++;
889                 }
890                 int test_error = buffer.Flush(seed);
891                 if (test_error)
892                     totalError++;
893 
894                 if (totalError == prevTotalError)
895                     log_info("\tPassed.\n");
896                 else
897                 {
898                     log_error("\tFAILED.\n");
899                     prevTotalError = totalError;
900                 }
901             }
902         }
903     }
904     return totalError;
905 }
906 
test_shuffle_copy(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)907 int test_shuffle_copy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
908 {
909     RandomSeed seed(gRandomSeed);
910     return test_shuffle_random( device, context, queue, kNormalMode, seed );
911 }
912 
test_shuffle_function_call(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)913 int test_shuffle_function_call(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
914 {
915     RandomSeed seed(gRandomSeed);
916     return test_shuffle_random( device, context, queue, kFunctionCallMode, seed );
917 }
918 
test_shuffle_array_cast(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)919 int test_shuffle_array_cast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
920 {
921     RandomSeed seed(gRandomSeed);
922     return test_shuffle_random( device, context, queue, kArrayAccessMode, seed );
923 }
924 
test_shuffle_built_in(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)925 int test_shuffle_built_in(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
926 {
927     RandomSeed seed(gRandomSeed);
928     return test_shuffle_random( device, context, queue, kBuiltInFnMode, seed );
929 }
930 
test_shuffle_built_in_dual_input(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)931 int test_shuffle_built_in_dual_input(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
932 {
933     RandomSeed seed(gRandomSeed);
934     return test_shuffle_random( device, context, queue, kBuiltInDualInputFnMode, seed );
935 }
936 
937