1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 
18 
19 #include "harness/conversions.h"
20 #include "harness/typeWrappers.h"
21 #include "harness/testHarness.h"
22 
23 #include "structs.h"
24 
25 #include "defines.h"
26 
27 #include "type_replacer.h"
28 
29 
get_align(size_t vecSize)30 size_t get_align(size_t vecSize)
31 {
32     if (vecSize == 3)
33     {
34         return 4;
35     }
36     return vecSize;
37 }
38 
39 /* // Lots of conditionals means this is not gonna be an optimal min on intel.
40  */
41 /* // That's okay, make sure we only call a few times per test, not for every */
42 /* // element */
43 /* size_t min_of_nonzero(size_t a, size_t b) */
44 /* { */
45 /*     if(a != 0 && (a<=b || b==0)) */
46 /*     { */
47 /*     return a; */
48 /*     } */
49 /*     if(b != 0 && (b<a || a==0)) */
50 /*     { */
51 /*     return b; */
52 /*     } */
53 /*     return 0; */
54 /* } */
55 
56 
57 /* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize,
58  */
59 /*                 size_t postSize, size_t typeMultiplePostSize, */
60 /*                 ExplicitType kType, size_t vecSize) */
61 /* { */
62 /*     size_t pre_min = min_of_nonzero(preSize,  */
63 /*                     typeMultiplePreSize* */
64 /*                     get_explicit_type_size(kType)); */
65 /*     size_t post_min = min_of_nonzero(postSize,  */
66 /*                     typeMultiplePostSize* */
67 /*                     get_explicit_type_size(kType)); */
68 /*     size_t struct_min = min_of_nonzero(pre_min, post_min); */
69 /*     size_t result =  min_of_nonzero(struct_min, get_align(vecSize) */
70 /*                     *get_explicit_type_size(kType)); */
71 /*     return result; */
72 
73 /* } */
74 
75 
test_vec_internal(cl_device_id deviceID,cl_context context,cl_command_queue queue,const char * pattern,const char * testName,size_t bufSize,size_t preSize,size_t typeMultiplePreSize,size_t postSize,size_t typeMultiplePostSize)76 int test_vec_internal(cl_device_id deviceID, cl_context context,
77                       cl_command_queue queue, const char* pattern,
78                       const char* testName, size_t bufSize, size_t preSize,
79                       size_t typeMultiplePreSize, size_t postSize,
80                       size_t typeMultiplePostSize)
81 {
82     int err;
83     int typeIdx, vecSizeIdx;
84 
85     char tmpBuffer[2048];
86     char srcBuffer[2048];
87 
88     size_t preSizeBytes, postSizeBytes, typeSize, totSize;
89 
90     clState* pClState = newClState(deviceID, context, queue);
91     bufferStruct* pBuffers = newBufferStruct(
92         bufSize, bufSize * sizeof(cl_uint) / sizeof(cl_char), pClState);
93 
94     if (pBuffers == NULL)
95     {
96         destroyClState(pClState);
97         vlog_error("%s : Could not create buffer\n", testName);
98         return -1;
99     }
100 
101     for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
102     {
103 
104         // Skip doubles if it is not supported otherwise enable pragma
105         if (types[typeIdx] == kDouble)
106         {
107             if (!is_extension_available(deviceID, "cl_khr_fp64"))
108             {
109                 continue;
110             }
111             else
112             {
113                 doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.",
114                           "#pragma OPENCL EXTENSION cl_khr_fp64: ", ".STATE.",
115                           "enable");
116             }
117         }
118         else
119         {
120             if (types[typeIdx] == kLong || types[typeIdx] == kULong)
121             {
122                 if (gIsEmbedded) continue;
123             }
124 
125             doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.", " ", ".STATE.",
126                       " ");
127         }
128 
129         typeSize = get_explicit_type_size(types[typeIdx]);
130         preSizeBytes = preSize + typeSize * typeMultiplePreSize;
131         postSizeBytes = postSize + typeSize * typeMultiplePostSize;
132 
133 
134         for (vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
135         {
136 
137             totSize = preSizeBytes + postSizeBytes
138                 + typeSize * get_align(g_arrVecSizes[vecSizeIdx]);
139 
140             doReplace(srcBuffer, 2048, tmpBuffer, ".TYPE.",
141                       g_arrTypeNames[typeIdx], ".NUM.",
142                       g_arrVecSizeNames[vecSizeIdx]);
143 
144             if (srcBuffer[0] == '\0')
145             {
146                 vlog_error("%s: failed to fill source buf for type %s%s\n",
147                            testName, g_arrTypeNames[typeIdx],
148                            g_arrVecSizeNames[vecSizeIdx]);
149                 destroyBufferStruct(pBuffers, pClState);
150                 destroyClState(pClState);
151                 return -1;
152             }
153 
154             // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
155             // fflush(stdout);
156 
157             err = clStateMakeProgram(pClState, srcBuffer, testName);
158             if (err)
159             {
160                 vlog_error("%s: Error compiling \"\n%s\n\"", testName,
161                            srcBuffer);
162                 destroyBufferStruct(pBuffers, pClState);
163                 destroyClState(pClState);
164                 return -1;
165             }
166 
167             err = pushArgs(pBuffers, pClState);
168             if (err != 0)
169             {
170                 vlog_error("%s: failed to push args %s%s\n", testName,
171                            g_arrTypeNames[typeIdx],
172                            g_arrVecSizeNames[vecSizeIdx]);
173                 destroyBufferStruct(pBuffers, pClState);
174                 destroyClState(pClState);
175                 return -1;
176             }
177 
178             // log_info("About to Run kernel\n"); fflush(stdout);
179             // now we run the kernel
180             err = runKernel(
181                 pClState,
182                 bufSize
183                     / (g_arrVecSizes[vecSizeIdx] * g_arrTypeSizes[typeIdx]));
184             if (err != 0)
185             {
186                 vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
187                            pClState->m_numThreads, g_arrTypeNames[typeIdx],
188                            g_arrVecSizeNames[vecSizeIdx]);
189                 destroyBufferStruct(pBuffers, pClState);
190                 destroyClState(pClState);
191                 return -1;
192             }
193 
194             // log_info("About to retrieve results\n"); fflush(stdout);
195             err = retrieveResults(pBuffers, pClState);
196             if (err != 0)
197             {
198                 vlog_error("%s: failed to retrieve results %s%s\n", testName,
199                            g_arrTypeNames[typeIdx],
200                            g_arrVecSizeNames[vecSizeIdx]);
201                 destroyBufferStruct(pBuffers, pClState);
202                 destroyClState(pClState);
203                 return -1;
204             }
205 
206 
207             if (preSizeBytes + postSizeBytes == 0)
208             {
209                 // log_info("About to Check Correctness\n"); fflush(stdout);
210                 err = checkCorrectnessAlign(pBuffers, pClState,
211                                             get_align(g_arrVecSizes[vecSizeIdx])
212                                                 * typeSize);
213             }
214             else
215             {
216                 // we're checking for an aligned struct
217                 err = checkPackedCorrectness(pBuffers, pClState, totSize,
218                                              preSizeBytes);
219             }
220 
221             if (err != 0)
222             {
223                 vlog_error("%s: incorrect results %s%s\n", testName,
224                            g_arrTypeNames[typeIdx],
225                            g_arrVecSizeNames[vecSizeIdx]);
226                 vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
227                 destroyBufferStruct(pBuffers, pClState);
228                 destroyClState(pClState);
229                 return -1;
230             }
231 
232             clStateDestroyProgramAndKernel(pClState);
233         }
234     }
235 
236     destroyBufferStruct(pBuffers, pClState);
237 
238     destroyClState(pClState);
239 
240 
241     // vlog_error("%s : implementation incomplete : FAIL\n", testName);
242     return 0; // -1; // fails on account of not being written.
243 }
244 
245 
246 static const char* patterns[] = {
247     ".PRAGMA..STATE.\n"
248     "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, "
249     ".DST_SCOPE. uint *dest)\n"
250     "{\n"
251     "    int  tid = get_global_id(0);\n"
252     "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
253     "}\n",
254     ".PRAGMA..STATE.\n"
255     "typedef struct myUnpackedStruct { \n"
256     ".PRE."
257     "    .TYPE..NUM. vec;\n"
258     ".POST."
259     "} testStruct;\n"
260     "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, "
261     ".DST_SCOPE. uint *dest)\n"
262     "{\n"
263     "    .SRC_SCOPE. testStruct test;\n"
264     "    int  tid = get_global_id(0);\n"
265     "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
266     "}\n",
267     ".PRAGMA..STATE.\n"
268     "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
269     ".PRE."
270     "    .TYPE..NUM. vec;\n"
271     ".POST."
272     "} testStruct;\n"
273     "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. "
274     "*source, .DST_SCOPE. uint *dest)\n"
275     "{\n"
276     "    .SRC_SCOPE. testStruct test;\n"
277     "    int  tid = get_global_id(0);\n"
278     "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. "
279     "uchar *)&test);\n"
280     "}\n",
281     ".PRAGMA..STATE.\n"
282     "typedef struct myStruct { \n"
283     ".PRE."
284     "    .TYPE..NUM. vec;\n"
285     ".POST."
286     "} testStruct;\n"
287     "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, "
288     ".DST_SCOPE. uint *dest)\n"
289     "{\n"
290     "    int  tid = get_global_id(0);\n"
291     "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
292     "}\n",
293     ".PRAGMA..STATE.\n"
294     "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
295     ".PRE."
296     "    .TYPE..NUM. vec;\n"
297     ".POST."
298     "} testStruct;\n"
299     "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE.  testStruct "
300     "*source, .DST_SCOPE. uint *dest)\n"
301     "{\n"
302     "    int  tid = get_global_id(0);\n"
303     "    dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - "
304     "(.SRC_SCOPE. uchar *)&(source[0]));\n"
305     "}\n",
306     // __attribute__ ((packed))
307 };
308 
309 
310 const char* pre_substitution_arr[] = { "",
311                                        "char c;\n",
312                                        "short3 s;",
313                                        ".TYPE.3 tPre;\n",
314                                        ".TYPE. arrPre[5];\n",
315                                        ".TYPE. arrPre[12];\n",
316                                        NULL };
317 
318 
319 // alignments of everything in pre_substitution_arr as raw alignments
320 // 0 if such a thing is meaningless
321 size_t pre_align_arr[] = { 0,
322                            sizeof(cl_char),
323                            4 * sizeof(cl_short),
324                            0, // taken care of in type_multiple_pre_align_arr
325                            0,
326                            0 };
327 
328 // alignments of everything in pre_substitution_arr as multiples of
329 // sizeof(.TYPE.)
330 // 0 if such a thing is meaningless
331 size_t type_multiple_pre_align_arr[] = { 0, 0, 0, 4, 5, 12 };
332 
333 const char* post_substitution_arr[] = { "",
334                                         "char cPost;\n",
335                                         ".TYPE. arrPost[3];\n",
336                                         ".TYPE. arrPost[5];\n",
337                                         ".TYPE.3 arrPost;\n",
338                                         ".TYPE. arrPost[12];\n",
339                                         NULL };
340 
341 
342 // alignments of everything in post_substitution_arr as raw alignments
343 // 0 if such a thing is meaningless
344 size_t post_align_arr[] = { 0, sizeof(cl_char),
345                             0, // taken care of in type_multiple_post_align_arr
346                             0, 0,
347                             0 };
348 
349 // alignments of everything in post_substitution_arr as multiples of
350 // sizeof(.TYPE.)
351 // 0 if such a thing is meaningless
352 size_t type_multiple_post_align_arr[] = { 0, 0, 3, 5, 4, 12 };
353 
354 // there hsould be a packed version of this?
test_vec_align_array(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)355 int test_vec_align_array(cl_device_id deviceID, cl_context context,
356                          cl_command_queue queue, int num_elements)
357 {
358     char tmp[2048];
359     int result;
360 
361     log_info("Testing global\n");
362     doReplace(tmp, (size_t)2048, patterns[0], ".SRC_SCOPE.", "__global",
363               ".DST_SCOPE.", "__global"); //
364     result = test_vec_internal(deviceID, context, queue, tmp,
365                                "test_vec_align_array", BUFFER_SIZE, 0, 0, 0, 0);
366     return result;
367 }
368 
369 
test_vec_align_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)370 int test_vec_align_struct(cl_device_id deviceID, cl_context context,
371                           cl_command_queue queue, int num_elements)
372 {
373     char tmp1[2048], tmp2[2048];
374     int result = 0;
375     int preIdx, postIdx;
376 
377     log_info("testing __private\n");
378     doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__private",
379               ".DST_SCOPE.", "__global"); //
380 
381     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
382     {
383         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
384         {
385             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
386                       pre_substitution_arr[preIdx], ".POST.",
387                       post_substitution_arr[postIdx]);
388 
389             result =
390                 test_vec_internal(deviceID, context, queue, tmp1,
391                                   "test_vec_align_struct", 512, 0, 0, 0, 0);
392             if (result != 0)
393             {
394                 return result;
395             }
396         }
397     }
398 
399     log_info("testing __local\n");
400     doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__local",
401               ".DST_SCOPE.", "__global"); //
402 
403     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
404     {
405         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
406         {
407             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
408                       pre_substitution_arr[preIdx], ".POST.",
409                       post_substitution_arr[postIdx]);
410 
411             result =
412                 test_vec_internal(deviceID, context, queue, tmp1,
413                                   "test_vec_align_struct", 512, 0, 0, 0, 0);
414             if (result != 0)
415             {
416                 return result;
417             }
418         }
419     }
420     return 0;
421 }
422 
test_vec_align_packed_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)423 int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
424                                  cl_command_queue queue, int num_elements)
425 {
426     char tmp1[2048], tmp2[2048];
427     int result = 0;
428     int preIdx, postIdx;
429 
430 
431     log_info("Testing __private\n");
432     doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__private",
433               ".DST_SCOPE.", "__global"); //
434 
435     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
436     {
437         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
438         {
439             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
440                       pre_substitution_arr[preIdx], ".POST.",
441                       post_substitution_arr[postIdx]);
442 
443             result = test_vec_internal(
444                 deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
445                 512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
446                 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
447             if (result != 0)
448             {
449                 return result;
450             }
451         }
452     }
453 
454     log_info("testing __local\n");
455     doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__local",
456               ".DST_SCOPE.", "__global"); //
457 
458     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
459     {
460         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
461         {
462             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
463                       pre_substitution_arr[preIdx], ".POST.",
464                       post_substitution_arr[postIdx]);
465 
466             result = test_vec_internal(
467                 deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
468                 512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
469                 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
470             if (result != 0)
471             {
472                 return result;
473             }
474         }
475     }
476     return 0;
477 }
478 
test_vec_align_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)479 int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
480                               cl_command_queue queue, int num_elements)
481 {
482     char tmp1[2048], tmp2[2048];
483     int result = 0;
484     int preIdx, postIdx;
485 
486 
487     log_info("testing __global\n");
488     doReplace(tmp2, (size_t)2048, patterns[3], ".SRC_SCOPE.", "__global",
489               ".DST_SCOPE.", "__global"); //
490 
491     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
492     {
493         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
494         {
495             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
496                       pre_substitution_arr[preIdx], ".POST.",
497                       post_substitution_arr[postIdx]);
498 
499             result = test_vec_internal(deviceID, context, queue, tmp1,
500                                        "test_vec_align_struct_arr", BUFFER_SIZE,
501                                        0, 0, 0, 0);
502             if (result != 0)
503             {
504                 return result;
505             }
506         }
507     }
508     return 0;
509 }
510 
test_vec_align_packed_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)511 int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
512                                      cl_command_queue queue, int num_elements)
513 {
514     char tmp1[2048], tmp2[2048];
515     int result = 0;
516     int preIdx, postIdx;
517 
518 
519     log_info("Testing __global\n");
520     doReplace(tmp2, (size_t)2048, patterns[4], ".SRC_SCOPE.", "__global",
521               ".DST_SCOPE.", "__global"); //
522 
523     for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
524     {
525         for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
526         {
527             doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
528                       pre_substitution_arr[preIdx], ".POST.",
529                       post_substitution_arr[postIdx]);
530 
531             result = test_vec_internal(
532                 deviceID, context, queue, tmp1,
533                 "test_vec_align_packed_struct_arr", BUFFER_SIZE,
534                 pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
535                 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
536             if (result != 0) return result;
537         }
538     }
539     return 0;
540 }
541