1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/testHarness.h"
19 #include "harness/conversions.h"
20 
21 #include <vector>
22 
23 typedef long long int lld;
24 typedef long long unsigned llu;
25 
26 const char *test_kernels[] = {
27 "__kernel void kernelA(__global int *dst)\n"
28 "{\n"
29 "\n"
30 " dst[get_global_id(0)]*=3;\n"
31 "\n"
32 "}\n"
33 "__kernel void kernelB(__global int *dst)\n"
34 "{\n"
35 "\n"
36 " dst[get_global_id(0)]++;\n"
37 "\n"
38 "}\n"
39 };
40 
41 #define TEST_SIZE 512
42 #define MAX_QUEUES 1000
43 
printPartition(cl_device_partition_property partition)44 const char *printPartition(cl_device_partition_property partition)
45 {
46   switch (partition) {
47     case (0):                                      return "<NONE>";
48     case (CL_DEVICE_PARTITION_EQUALLY):            return "CL_DEVICE_PARTITION_EQUALLY";
49     case (CL_DEVICE_PARTITION_BY_COUNTS):          return "CL_DEVICE_PARTITION_BY_COUNTS";
50     case (CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN): return "CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN";
51     default:                                       return "<unknown>";
52   } // switch
53 }
54 
printAffinity(cl_device_affinity_domain affinity)55 const char *printAffinity(cl_device_affinity_domain affinity)
56 {
57   switch (affinity) {
58     case (0):                                            return "<NONE>";
59     case (CL_DEVICE_AFFINITY_DOMAIN_NUMA):               return "CL_DEVICE_AFFINITY_DOMAIN_NUMA";
60     case (CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE";
61     case (CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE";
62     case (CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE";
63     case (CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE):           return "CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE";
64     case (CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE): return "CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE";
65     default:                                             return "<unknown>";
66   } // switch
67 }
create_single_kernel_helper(cl_context context,cl_program * outProgram,cl_kernel * outKernel,unsigned int numKernelLines,const char ** kernelProgram,const char * kernelName,const cl_device_id * parentDevice)68 int create_single_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const cl_device_id *parentDevice )
69 {
70     int error = CL_SUCCESS;
71 
72     /* Create the program object from source */
73     error = create_single_kernel_helper_create_program(context, outProgram, numKernelLines, kernelProgram);
74     if( *outProgram == NULL || error != CL_SUCCESS)
75     {
76         print_error( error, "clCreateProgramWithSource failed" );
77         return error;
78     }
79 
80     /* Compile the program */
81     int buildProgramFailed = 0;
82     int printedSource = 0;
83     error = clBuildProgram( *outProgram, ((parentDevice == NULL) ? 0 : 1), parentDevice, NULL, NULL, NULL );
84     if (error != CL_SUCCESS)
85     {
86         unsigned int i;
87         print_error(error, "clBuildProgram failed");
88         buildProgramFailed = 1;
89         printedSource = 1;
90         log_error( "Original source is: ------------\n" );
91         for( i = 0; i < numKernelLines; i++ )
92             log_error( "%s", kernelProgram[ i ] );
93     }
94 
95     // Verify the build status on all devices
96     cl_uint deviceCount = 0;
97     error = clGetProgramInfo( *outProgram, CL_PROGRAM_NUM_DEVICES, sizeof( deviceCount ), &deviceCount, NULL );
98     if (error != CL_SUCCESS) {
99         print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
100         return error;
101     }
102 
103     if (deviceCount == 0) {
104         log_error("No devices found for program.\n");
105         return -1;
106     }
107 
108     cl_device_id    *devices = (cl_device_id*) malloc( deviceCount * sizeof( cl_device_id ) );
109     if( NULL == devices )
110         return -1;
111     memset( devices, 0, deviceCount * sizeof( cl_device_id ));
112     error = clGetProgramInfo( *outProgram, CL_PROGRAM_DEVICES, sizeof( cl_device_id ) * deviceCount, devices, NULL );
113     if (error != CL_SUCCESS) {
114         print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
115         free( devices );
116         return error;
117     }
118 
119     cl_uint z;
120     for( z = 0; z < deviceCount; z++ )
121     {
122         char deviceName[4096] = "";
123         error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof( deviceName), deviceName, NULL);
124         if (error != CL_SUCCESS || deviceName[0] == '\0') {
125             log_error("Device \"%d\" failed to return a name\n", z);
126             print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
127         }
128 
129         cl_build_status buildStatus;
130         error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL);
131         if (error != CL_SUCCESS) {
132             print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
133             free( devices );
134             return error;
135         }
136 
137         if (buildStatus != CL_BUILD_SUCCESS || buildProgramFailed) {
138             char log[10240] = "";
139             if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed) log_error("clBuildProgram returned an error, but buildStatus is marked as CL_BUILD_SUCCESS.\n");
140 
141             char statusString[64] = "";
142             if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
143                 sprintf(statusString, "CL_BUILD_SUCCESS");
144             else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
145                 sprintf(statusString, "CL_BUILD_NONE");
146             else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
147                 sprintf(statusString, "CL_BUILD_ERROR");
148             else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
149                 sprintf(statusString, "CL_BUILD_IN_PROGRESS");
150             else
151                 sprintf(statusString, "UNKNOWN (%d)", buildStatus);
152 
153             if (buildStatus != CL_BUILD_SUCCESS) log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString);
154             error = clGetProgramBuildInfo( *outProgram, devices[z], CL_PROGRAM_BUILD_LOG, sizeof(log), log, NULL );
155             if (error != CL_SUCCESS || log[0]=='\0'){
156                 log_error("Device %d (%s) failed to return a build log\n", z, deviceName);
157                 if (error) {
158                     print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
159                     free( devices );
160                     return error;
161                 } else {
162                     log_error("clGetProgramBuildInfo returned an empty log.\n");
163                     free( devices );
164                     return -1;
165                 }
166             }
167             // In this case we've already printed out the code above.
168             if (!printedSource)
169             {
170                 unsigned int i;
171                 log_error( "Original source is: ------------\n" );
172                 for( i = 0; i < numKernelLines; i++ )
173                     log_error( "%s", kernelProgram[ i ] );
174                 printedSource = 1;
175             }
176             log_error( "Build log for device \"%s\" is: ------------\n", deviceName );
177             log_error( "%s\n", log );
178             log_error( "\n----------\n" );
179             free( devices );
180             return -1;
181         }
182     }
183 
184     /* And create a kernel from it */
185     *outKernel = clCreateKernel( *outProgram, kernelName, &error );
186     if( *outKernel == NULL || error != CL_SUCCESS)
187     {
188         print_error( error, "Unable to create kernel" );
189         free( devices );
190         return error;
191     }
192 
193     free( devices );
194     return 0;
195 }
196 
197 template<class T>
198 class AutoDestructArray
199 {
200 public:
AutoDestructArray(T * arr)201     AutoDestructArray(T* arr) : m_arr(arr) {}
~AutoDestructArray()202     ~AutoDestructArray() { if (m_arr) delete [] m_arr; }
203 
204 private:
205     T* m_arr;
206 };
207 
test_device_set(size_t deviceCount,size_t queueCount,cl_device_id * devices,int num_elements,cl_device_id * parentDevice=NULL)208 int test_device_set(size_t deviceCount, size_t queueCount, cl_device_id *devices, int num_elements, cl_device_id *parentDevice = NULL)
209 {
210     int error;
211     clContextWrapper context;
212     clProgramWrapper program;
213     clKernelWrapper kernels[2];
214     clMemWrapper  stream;
215     clCommandQueueWrapper queues[MAX_QUEUES];
216     size_t threads[1], localThreads[1];
217     int data[TEST_SIZE];
218     int outputData[TEST_SIZE];
219     int expectedResults[TEST_SIZE];
220     int *expectedResultsOneDeviceArray = new int[deviceCount * TEST_SIZE];
221     int **expectedResultsOneDevice = (int**)alloca(sizeof(int**) * deviceCount);
222     size_t i;
223     AutoDestructArray<int> autoDestruct(expectedResultsOneDeviceArray);
224 
225     for (i=0; i<deviceCount; i++) {
226         expectedResultsOneDevice[i] = expectedResultsOneDeviceArray + (i * TEST_SIZE);
227     }
228 
229     memset(queues, 0, sizeof(queues));
230 
231     RandomSeed seed( gRandomSeed );
232 
233     if (queueCount > MAX_QUEUES) {
234         log_error("Number of queues (%ld) is greater than the number for which the test was written (%d).", queueCount, MAX_QUEUES);
235         return -1;
236     }
237 
238     log_info("Testing with %ld queues on %ld devices, %ld kernel executions.\n", queueCount, deviceCount, queueCount*num_elements/TEST_SIZE);
239 
240     for (i=0; i<deviceCount; i++) {
241         size_t deviceNameSize;
242         error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 0, NULL, &deviceNameSize);
243         test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
244         char *deviceName = (char *)alloca(deviceNameSize * (sizeof(char)));
245         error = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, deviceNameSize, deviceName, NULL);
246         test_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
247         log_info("Device %ld is \"%s\".\n", i, deviceName);
248     }
249 
250     /* Create a context */
251     context = clCreateContext( NULL, (cl_uint)deviceCount, devices, notify_callback, NULL, &error );
252     test_error( error, "Unable to create testing context" );
253 
254     /* Create our kernels (they all have the same arguments so we don't need multiple ones for each device) */
255     if( create_single_kernel_helper( context, &program, &kernels[0], 1, test_kernels, "kernelA", parentDevice ) != 0 )
256     {
257         return -1;
258     }
259 
260     kernels[1] = clCreateKernel(program, "kernelB", &error);
261     test_error(error, "clCreateKernel failed");
262 
263 
264     /* Now create I/O streams */
265     for( i = 0; i < TEST_SIZE; i++ )
266         data[i] = genrand_int32(seed);
267 
268     stream = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
269                             sizeof(cl_int) * TEST_SIZE, data, &error);
270     test_error( error, "Unable to create test array" );
271 
272     // Update the expected results
273     for( i = 0; i < TEST_SIZE; i++ ) {
274         expectedResults[i] = data[i];
275         for (size_t j=0; j<deviceCount; j++)
276             expectedResultsOneDevice[j][i] = data[i];
277     }
278 
279 
280     // Set the arguments
281     error = clSetKernelArg( kernels[0], 0, sizeof( stream ), &stream);
282     test_error( error, "Unable to set kernel arguments" );
283     error = clSetKernelArg( kernels[1], 0, sizeof( stream ), &stream);
284     test_error( error, "Unable to set kernel arguments" );
285 
286     /* Run the test */
287     threads[0] = (size_t)TEST_SIZE;
288 
289     error = get_max_common_work_group_size( context, kernels[0], threads[0], &localThreads[ 0 ] );
290     test_error( error, "Unable to calc work group size" );
291 
292     /* Create work queues */
293     for( i = 0; i < queueCount; i++ )
294     {
295         queues[i] = clCreateCommandQueueWithProperties( context, devices[ i % deviceCount ], 0, &error );
296         if (error != CL_SUCCESS || queues[i] == NULL) {
297             log_info("Could not create queue[%d].\n", (int)i);
298             queueCount = i;
299             break;
300         }
301     }
302     log_info("Testing with %d queues.\n", (int)queueCount);
303 
304     /* Enqueue executions */
305     for( int z = 0; z<num_elements/TEST_SIZE; z++) {
306         for( i = 0; i < queueCount; i++ )
307         {
308             // Randomly choose a kernel to execute.
309             int kernel_selection = (int)get_random_float(0, 2, seed);
310             error = clEnqueueNDRangeKernel( queues[ i ], kernels[ kernel_selection ], 1, NULL, threads, localThreads, 0, NULL, NULL );
311             test_error( error, "Kernel execution failed" );
312 
313             // Update the expected results
314             for( int j = 0; j < TEST_SIZE; j++ ) {
315                 expectedResults[j] = (kernel_selection) ? expectedResults[j]+1 : expectedResults[j]*3;
316                 expectedResultsOneDevice[i % deviceCount][j] = (kernel_selection) ? expectedResultsOneDevice[i % deviceCount][j]+1 : expectedResultsOneDevice[i % deviceCount][j]*3;
317             }
318 
319             // Force the queue to finish so the next one will be in sync
320             error = clFinish(queues[i]);
321             test_error( error, "clFinish failed");
322         }
323     }
324 
325     /* Read results */
326     int errors = 0;
327     for (int q = 0; q<(int)queueCount; q++) {
328         error = clEnqueueReadBuffer( queues[ q ], stream, CL_TRUE, 0, sizeof(cl_int)*TEST_SIZE, (char *)outputData, 0, NULL, NULL );
329         test_error( error, "Unable to get result data set" );
330 
331         int errorsThisTime = 0;
332         /* Verify all of the data now */
333         for( i = 0; i < TEST_SIZE; i++ )
334         {
335             if( expectedResults[ i ] != outputData[ i ] )
336             {
337                 log_error( "ERROR: Sample data did not verify for queue %d on device %ld (sample %d, expected %d, got %d)\n",
338                     q, q % deviceCount, (int)i, expectedResults[ i ], outputData[ i ] );
339                 for (size_t j=0; j<deviceCount; j++) {
340                     if (expectedResultsOneDevice[j][i] == outputData[i])
341                         log_info("Sample consistent with only device %ld having modified the data.\n", j);
342                 }
343                 errorsThisTime++;
344                 break;
345             }
346         }
347         if (errorsThisTime)
348             errors++;
349     }
350 
351     /* All done now! */
352     if (errors)
353         return -1;
354     return 0;
355 }
356 
357 
init_device_partition_test(cl_device_id parentDevice,cl_uint & maxComputeUnits,cl_uint & maxSubDevices)358 int init_device_partition_test(cl_device_id parentDevice, cl_uint &maxComputeUnits, cl_uint &maxSubDevices)
359 {
360     int err = clGetDeviceInfo(parentDevice, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(maxComputeUnits), &maxComputeUnits, NULL);
361     test_error( err, "Unable to get maximal number of compute units" );
362     err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
363     test_error( err, "Unable to get maximal number of sub-devices" );
364 
365     log_info("Maximal number of sub-devices on device %p is %d.\n", parentDevice, maxSubDevices );
366     return 0;
367 }
368 
test_device_partition_type_support(cl_device_id parentDevice,const cl_device_partition_property partitionType,const cl_device_affinity_domain affinityDomain)369 int test_device_partition_type_support(cl_device_id parentDevice, const cl_device_partition_property partitionType, const cl_device_affinity_domain affinityDomain)
370 {
371     typedef std::vector< cl_device_partition_property > properties_t;
372     properties_t supportedProps( 3 ); // only 3 types defined in the spec (but implementation can define more)
373     size_t const propSize = sizeof( cl_device_partition_property ); // Size of one property in bytes.
374     size_t size;    // size of all properties in bytes.
375     cl_int err;
376     size = 0;
377     err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, 0, NULL, & size );
378     if ( err == CL_SUCCESS ) {
379         if ( size % propSize != 0 ) {
380             log_error( "ERROR: clGetDeviceInfo: Bad size of returned partition properties (%llu), it must me a multiply of partition property size (%llu)\n", llu( size ), llu( propSize ) );
381             return -1;
382         }
383         supportedProps.resize( size / propSize );
384         size = 0;
385         err = clGetDeviceInfo( parentDevice, CL_DEVICE_PARTITION_PROPERTIES, supportedProps.size() * propSize, & supportedProps.front(), & size );
386         test_error_ret( err, "Unable to get device partition properties (2)", -1 );
387     } else if ( err == CL_INVALID_VALUE ) {
388         log_error( "ERROR: clGetDeviceInfo: CL_DEVICE_PARTITION_PROPERTIES is not supported.\n" );
389         return -1;
390     } else {
391         test_error_ret( err, "Unable to get device partition properties (1)", -1 );
392     };
393     for ( int i = 0; i < supportedProps.size(); i++)
394     {
395         if (supportedProps[i] == partitionType)
396         {
397            if (partitionType == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
398            {
399               cl_device_affinity_domain supportedAffinityDomain;
400               err = clGetDeviceInfo(parentDevice, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, sizeof(supportedAffinityDomain), &supportedAffinityDomain, NULL);
401               test_error( err, "Unable to get supported affinity domains" );
402               if (supportedAffinityDomain & affinityDomain)
403                 return 0;
404            }
405            else
406             return 0;
407         }
408     }
409 
410     return -1;
411 }
412 
test_partition_of_device(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements,cl_device_partition_property * partition_type,cl_uint starting_property,cl_uint ending_property)413 int test_partition_of_device(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, cl_device_partition_property *partition_type,
414                              cl_uint starting_property, cl_uint ending_property)
415 {
416     cl_uint maxComputeUnits;
417     cl_uint maxSubDevices;    // maximal number of sub-devices that can be created in one call to clCreateSubDevices
418     int err = 0;
419 
420     if (init_device_partition_test(deviceID, maxComputeUnits, maxSubDevices) != 0)
421         return -1;
422 
423     if (maxComputeUnits <= 1)
424         return 0;
425     // confirm that this devices reports how it was partitioned
426     if (partition_type != NULL)
427     { // if we're not the root device
428       size_t psize;
429       err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, 0,  NULL, &psize);
430       test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
431       cl_device_partition_property *properties_returned = (cl_device_partition_property *)alloca(psize);
432       err = clGetDeviceInfo(deviceID, CL_DEVICE_PARTITION_TYPE, psize, (void *) properties_returned, NULL);
433       test_error( err, "Unable to get CL_DEVICE_PARTITION_TYPE" );
434 
435       // test returned type
436       for (cl_uint i = 0;i < psize / sizeof(cl_device_partition_property);i++) {
437         if (properties_returned[i] != partition_type[i]) {
438           if (!(partition_type[0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN &&
439               i == 1 && partition_type[1] == CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE &&
440               (properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_NUMA     ||
441                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE ||
442                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE ||
443                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE ||
444                properties_returned[1] == CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE))) {
445             log_error("properties_returned[%d] 0x%x != 0x%x partition_type[%d].", i, properties_returned[i], partition_type[i], i);
446             return -1;
447               }
448         }
449       } // for
450     }
451 
452 #define PROPERTY_TYPES 8
453     cl_device_partition_property partitionProp[PROPERTY_TYPES][5] = {
454         { CL_DEVICE_PARTITION_EQUALLY, maxComputeUnits / 2, 0, 0, 0 } ,
455         { CL_DEVICE_PARTITION_BY_COUNTS, 1, maxComputeUnits - 1, CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 } ,
456         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NUMA, 0, 0, 0 } ,
457         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE, 0, 0, 0 } ,
458         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE, 0, 0, 0 } ,
459         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE, 0, 0, 0 } ,
460         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE, 0, 0, 0 } ,
461         { CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE, 0, 0, 0 }
462     };
463 
464     // loop thru each type, creating sub-devices for each type
465     for (cl_uint i = starting_property;i < ending_property;i++) {
466 
467       if (test_device_partition_type_support(deviceID, partitionProp[i][0], partitionProp[i][1]) != 0)
468       {
469         if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
470         {
471           log_info( "Device partition type \"%s\" \"%s\" is not supported on device %p. Skipping test...\n",
472                       printPartition(partitionProp[i][0]),
473                       printAffinity(partitionProp[i][1]), deviceID);
474         }
475         else
476         {
477           log_info( "Device partition type \"%s\" is not supported on device %p. Skipping test...\n",
478                       printPartition(partitionProp[i][0]), deviceID);
479         }
480         continue;
481       }
482 
483       if (partitionProp[i][0] == CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN)
484       {
485         log_info("Testing on device %p partition type \"%s\" \"%s\"\n", deviceID, printPartition(partitionProp[i][0]),
486                   printAffinity(partitionProp[i][1]));
487       }
488       else
489       {
490         log_info("Testing on device %p partition type \"%s\" (%d,%d)\n", deviceID, printPartition(partitionProp[i][0]),
491                   partitionProp[i][1], partitionProp[i][2]);
492       }
493 
494       cl_uint deviceCount;
495 
496       // how many sub-devices can we create?
497       err = clCreateSubDevices(deviceID, partitionProp[i], 0, NULL, &deviceCount);
498       if ( err == CL_DEVICE_PARTITION_FAILED ) {
499           log_info( "The device %p could not be further partitioned.\n", deviceID );
500           continue;
501       }
502       test_error( err, "Failed to get number of sub-devices" );
503 
504       // get the list of subDevices
505       //  create room for 1 more device_id, so that we can put the parent device in there.
506       cl_device_id *subDevices = (cl_device_id*)alloca(sizeof(cl_device_id) * (deviceCount + 1));
507       err = clCreateSubDevices(deviceID, partitionProp[i], deviceCount, subDevices, &deviceCount);
508       test_error( err, "Actual creation of sub-devices failed" );
509 
510       log_info("Testing on all devices in context\n");
511       err = test_device_set(deviceCount, deviceCount, subDevices, num_elements);
512       if (err == 0)
513       {
514           log_info("Testing on a parent device for context\n");
515 
516           // add the parent device
517           subDevices[deviceCount] = deviceID;
518           err = test_device_set(deviceCount + 1, deviceCount, subDevices, num_elements, &deviceID);
519       }
520       if (err != 0)
521       {
522           printf("error! returning %d\n",err);
523           return err;
524       }
525 
526       // now, recurse and test the FIRST of these sub-devices, to make sure it can be further partitioned
527       err = test_partition_of_device(subDevices[0], context, queue, num_elements, partitionProp[i], starting_property, ending_property);
528       if (err != 0)
529       {
530           printf("error! returning %d\n",err);
531           return err;
532       }
533 
534       for (cl_uint j=0;j < deviceCount;j++)
535       {
536         err = clReleaseDevice(subDevices[j]);
537         test_error( err, "\n Releasing sub-device failed \n" );
538       }
539 
540     } // for
541 
542     log_info("Testing on all device %p finished\n", deviceID);
543     return 0;
544 }
545 
546 
test_partition_equally(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)547 int test_partition_equally(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
548 {
549   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 1);
550 }
551 
test_partition_by_counts(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)552 int test_partition_by_counts(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
553 {
554   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 1, 2);
555 }
556 
test_partition_by_affinity_domain_numa(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)557 int test_partition_by_affinity_domain_numa(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
558 {
559   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 2, 3);
560 }
561 
test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)562 int test_partition_by_affinity_domain_l4_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
563 {
564   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 3, 4);
565 }
566 
test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)567 int test_partition_by_affinity_domain_l3_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
568 {
569   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 4, 5);
570 }
571 
test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)572 int test_partition_by_affinity_domain_l2_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
573 {
574   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 5, 6);
575 }
576 
test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)577 int test_partition_by_affinity_domain_l1_cache(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
578 {
579   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 6, 7);
580 }
581 
test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)582 int test_partition_by_affinity_domain_next_partitionable(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
583 {
584   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 7, 8);
585 }
586 
test_partition_all(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)587 int test_partition_all(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
588 {
589   return test_partition_of_device(deviceID, context, queue, num_elements, NULL, 0, 8);
590 }
591