1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17
18
19 #include "harness/conversions.h"
20 #include "harness/typeWrappers.h"
21 #include "harness/testHarness.h"
22
23 #include "structs.h"
24
25 #include "defines.h"
26
27 #include "type_replacer.h"
28
29
get_align(size_t vecSize)30 size_t get_align(size_t vecSize)
31 {
32 if (vecSize == 3)
33 {
34 return 4;
35 }
36 return vecSize;
37 }
38
39 /* // Lots of conditionals means this is not gonna be an optimal min on intel.
40 */
41 /* // That's okay, make sure we only call a few times per test, not for every */
42 /* // element */
43 /* size_t min_of_nonzero(size_t a, size_t b) */
44 /* { */
45 /* if(a != 0 && (a<=b || b==0)) */
46 /* { */
47 /* return a; */
48 /* } */
49 /* if(b != 0 && (b<a || a==0)) */
50 /* { */
51 /* return b; */
52 /* } */
53 /* return 0; */
54 /* } */
55
56
57 /* size_t get_min_packed_alignment(size_t preSize, size_t typeMultiplePreSize,
58 */
59 /* size_t postSize, size_t typeMultiplePostSize, */
60 /* ExplicitType kType, size_t vecSize) */
61 /* { */
62 /* size_t pre_min = min_of_nonzero(preSize, */
63 /* typeMultiplePreSize* */
64 /* get_explicit_type_size(kType)); */
65 /* size_t post_min = min_of_nonzero(postSize, */
66 /* typeMultiplePostSize* */
67 /* get_explicit_type_size(kType)); */
68 /* size_t struct_min = min_of_nonzero(pre_min, post_min); */
69 /* size_t result = min_of_nonzero(struct_min, get_align(vecSize) */
70 /* *get_explicit_type_size(kType)); */
71 /* return result; */
72
73 /* } */
74
75
test_vec_internal(cl_device_id deviceID,cl_context context,cl_command_queue queue,const char * pattern,const char * testName,size_t bufSize,size_t preSize,size_t typeMultiplePreSize,size_t postSize,size_t typeMultiplePostSize)76 int test_vec_internal(cl_device_id deviceID, cl_context context,
77 cl_command_queue queue, const char* pattern,
78 const char* testName, size_t bufSize, size_t preSize,
79 size_t typeMultiplePreSize, size_t postSize,
80 size_t typeMultiplePostSize)
81 {
82 int err;
83 int typeIdx, vecSizeIdx;
84
85 char tmpBuffer[2048];
86 char srcBuffer[2048];
87
88 size_t preSizeBytes, postSizeBytes, typeSize, totSize;
89
90 clState* pClState = newClState(deviceID, context, queue);
91 bufferStruct* pBuffers = newBufferStruct(
92 bufSize, bufSize * sizeof(cl_uint) / sizeof(cl_char), pClState);
93
94 if (pBuffers == NULL)
95 {
96 destroyClState(pClState);
97 vlog_error("%s : Could not create buffer\n", testName);
98 return -1;
99 }
100
101 for (typeIdx = 0; types[typeIdx] != kNumExplicitTypes; ++typeIdx)
102 {
103
104 // Skip doubles if it is not supported otherwise enable pragma
105 if (types[typeIdx] == kDouble)
106 {
107 if (!is_extension_available(deviceID, "cl_khr_fp64"))
108 {
109 continue;
110 }
111 else
112 {
113 doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.",
114 "#pragma OPENCL EXTENSION cl_khr_fp64: ", ".STATE.",
115 "enable");
116 }
117 }
118 else
119 {
120 if (types[typeIdx] == kLong || types[typeIdx] == kULong)
121 {
122 if (gIsEmbedded) continue;
123 }
124
125 doReplace(tmpBuffer, 2048, pattern, ".PRAGMA.", " ", ".STATE.",
126 " ");
127 }
128
129 typeSize = get_explicit_type_size(types[typeIdx]);
130 preSizeBytes = preSize + typeSize * typeMultiplePreSize;
131 postSizeBytes = postSize + typeSize * typeMultiplePostSize;
132
133
134 for (vecSizeIdx = 1; vecSizeIdx < NUM_VECTOR_SIZES; ++vecSizeIdx)
135 {
136
137 totSize = preSizeBytes + postSizeBytes
138 + typeSize * get_align(g_arrVecSizes[vecSizeIdx]);
139
140 doReplace(srcBuffer, 2048, tmpBuffer, ".TYPE.",
141 g_arrTypeNames[typeIdx], ".NUM.",
142 g_arrVecSizeNames[vecSizeIdx]);
143
144 if (srcBuffer[0] == '\0')
145 {
146 vlog_error("%s: failed to fill source buf for type %s%s\n",
147 testName, g_arrTypeNames[typeIdx],
148 g_arrVecSizeNames[vecSizeIdx]);
149 destroyBufferStruct(pBuffers, pClState);
150 destroyClState(pClState);
151 return -1;
152 }
153
154 // log_info("Buffer is \"\n%s\n\"\n", srcBuffer);
155 // fflush(stdout);
156
157 err = clStateMakeProgram(pClState, srcBuffer, testName);
158 if (err)
159 {
160 vlog_error("%s: Error compiling \"\n%s\n\"", testName,
161 srcBuffer);
162 destroyBufferStruct(pBuffers, pClState);
163 destroyClState(pClState);
164 return -1;
165 }
166
167 err = pushArgs(pBuffers, pClState);
168 if (err != 0)
169 {
170 vlog_error("%s: failed to push args %s%s\n", testName,
171 g_arrTypeNames[typeIdx],
172 g_arrVecSizeNames[vecSizeIdx]);
173 destroyBufferStruct(pBuffers, pClState);
174 destroyClState(pClState);
175 return -1;
176 }
177
178 // log_info("About to Run kernel\n"); fflush(stdout);
179 // now we run the kernel
180 err = runKernel(
181 pClState,
182 bufSize
183 / (g_arrVecSizes[vecSizeIdx] * g_arrTypeSizes[typeIdx]));
184 if (err != 0)
185 {
186 vlog_error("%s: runKernel fail (%ld threads) %s%s\n", testName,
187 pClState->m_numThreads, g_arrTypeNames[typeIdx],
188 g_arrVecSizeNames[vecSizeIdx]);
189 destroyBufferStruct(pBuffers, pClState);
190 destroyClState(pClState);
191 return -1;
192 }
193
194 // log_info("About to retrieve results\n"); fflush(stdout);
195 err = retrieveResults(pBuffers, pClState);
196 if (err != 0)
197 {
198 vlog_error("%s: failed to retrieve results %s%s\n", testName,
199 g_arrTypeNames[typeIdx],
200 g_arrVecSizeNames[vecSizeIdx]);
201 destroyBufferStruct(pBuffers, pClState);
202 destroyClState(pClState);
203 return -1;
204 }
205
206
207 if (preSizeBytes + postSizeBytes == 0)
208 {
209 // log_info("About to Check Correctness\n"); fflush(stdout);
210 err = checkCorrectnessAlign(pBuffers, pClState,
211 get_align(g_arrVecSizes[vecSizeIdx])
212 * typeSize);
213 }
214 else
215 {
216 // we're checking for an aligned struct
217 err = checkPackedCorrectness(pBuffers, pClState, totSize,
218 preSizeBytes);
219 }
220
221 if (err != 0)
222 {
223 vlog_error("%s: incorrect results %s%s\n", testName,
224 g_arrTypeNames[typeIdx],
225 g_arrVecSizeNames[vecSizeIdx]);
226 vlog_error("%s: Source was \"\n%s\n\"", testName, srcBuffer);
227 destroyBufferStruct(pBuffers, pClState);
228 destroyClState(pClState);
229 return -1;
230 }
231
232 clStateDestroyProgramAndKernel(pClState);
233 }
234 }
235
236 destroyBufferStruct(pBuffers, pClState);
237
238 destroyClState(pClState);
239
240
241 // vlog_error("%s : implementation incomplete : FAIL\n", testName);
242 return 0; // -1; // fails on account of not being written.
243 }
244
245
246 static const char* patterns[] = {
247 ".PRAGMA..STATE.\n"
248 "__kernel void test_vec_align_array(.SRC_SCOPE. .TYPE..NUM. *source, "
249 ".DST_SCOPE. uint *dest)\n"
250 "{\n"
251 " int tid = get_global_id(0);\n"
252 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)(source+tid));\n"
253 "}\n",
254 ".PRAGMA..STATE.\n"
255 "typedef struct myUnpackedStruct { \n"
256 ".PRE."
257 " .TYPE..NUM. vec;\n"
258 ".POST."
259 "} testStruct;\n"
260 "__kernel void test_vec_align_struct(__constant .TYPE..NUM. *source, "
261 ".DST_SCOPE. uint *dest)\n"
262 "{\n"
263 " .SRC_SCOPE. testStruct test;\n"
264 " int tid = get_global_id(0);\n"
265 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec));\n"
266 "}\n",
267 ".PRAGMA..STATE.\n"
268 "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
269 ".PRE."
270 " .TYPE..NUM. vec;\n"
271 ".POST."
272 "} testStruct;\n"
273 "__kernel void test_vec_align_packed_struct(__constant .TYPE..NUM. "
274 "*source, .DST_SCOPE. uint *dest)\n"
275 "{\n"
276 " .SRC_SCOPE. testStruct test;\n"
277 " int tid = get_global_id(0);\n"
278 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(test.vec) - (.SRC_SCOPE. "
279 "uchar *)&test);\n"
280 "}\n",
281 ".PRAGMA..STATE.\n"
282 "typedef struct myStruct { \n"
283 ".PRE."
284 " .TYPE..NUM. vec;\n"
285 ".POST."
286 "} testStruct;\n"
287 "__kernel void test_vec_align_struct_arr(.SRC_SCOPE. testStruct *source, "
288 ".DST_SCOPE. uint *dest)\n"
289 "{\n"
290 " int tid = get_global_id(0);\n"
291 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec));\n"
292 "}\n",
293 ".PRAGMA..STATE.\n"
294 "typedef struct __attribute__ ((packed)) myPackedStruct { \n"
295 ".PRE."
296 " .TYPE..NUM. vec;\n"
297 ".POST."
298 "} testStruct;\n"
299 "__kernel void test_vec_align_packed_struct_arr(.SRC_SCOPE. testStruct "
300 "*source, .DST_SCOPE. uint *dest)\n"
301 "{\n"
302 " int tid = get_global_id(0);\n"
303 " dest[tid] = (uint)((.SRC_SCOPE. uchar *)&(source[tid].vec) - "
304 "(.SRC_SCOPE. uchar *)&(source[0]));\n"
305 "}\n",
306 // __attribute__ ((packed))
307 };
308
309
310 const char* pre_substitution_arr[] = { "",
311 "char c;\n",
312 "short3 s;",
313 ".TYPE.3 tPre;\n",
314 ".TYPE. arrPre[5];\n",
315 ".TYPE. arrPre[12];\n",
316 NULL };
317
318
319 // alignments of everything in pre_substitution_arr as raw alignments
320 // 0 if such a thing is meaningless
321 size_t pre_align_arr[] = { 0,
322 sizeof(cl_char),
323 4 * sizeof(cl_short),
324 0, // taken care of in type_multiple_pre_align_arr
325 0,
326 0 };
327
328 // alignments of everything in pre_substitution_arr as multiples of
329 // sizeof(.TYPE.)
330 // 0 if such a thing is meaningless
331 size_t type_multiple_pre_align_arr[] = { 0, 0, 0, 4, 5, 12 };
332
333 const char* post_substitution_arr[] = { "",
334 "char cPost;\n",
335 ".TYPE. arrPost[3];\n",
336 ".TYPE. arrPost[5];\n",
337 ".TYPE.3 arrPost;\n",
338 ".TYPE. arrPost[12];\n",
339 NULL };
340
341
342 // alignments of everything in post_substitution_arr as raw alignments
343 // 0 if such a thing is meaningless
344 size_t post_align_arr[] = { 0, sizeof(cl_char),
345 0, // taken care of in type_multiple_post_align_arr
346 0, 0,
347 0 };
348
349 // alignments of everything in post_substitution_arr as multiples of
350 // sizeof(.TYPE.)
351 // 0 if such a thing is meaningless
352 size_t type_multiple_post_align_arr[] = { 0, 0, 3, 5, 4, 12 };
353
354 // there hsould be a packed version of this?
test_vec_align_array(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)355 int test_vec_align_array(cl_device_id deviceID, cl_context context,
356 cl_command_queue queue, int num_elements)
357 {
358 char tmp[2048];
359 int result;
360
361 log_info("Testing global\n");
362 doReplace(tmp, (size_t)2048, patterns[0], ".SRC_SCOPE.", "__global",
363 ".DST_SCOPE.", "__global"); //
364 result = test_vec_internal(deviceID, context, queue, tmp,
365 "test_vec_align_array", BUFFER_SIZE, 0, 0, 0, 0);
366 return result;
367 }
368
369
test_vec_align_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)370 int test_vec_align_struct(cl_device_id deviceID, cl_context context,
371 cl_command_queue queue, int num_elements)
372 {
373 char tmp1[2048], tmp2[2048];
374 int result = 0;
375 int preIdx, postIdx;
376
377 log_info("testing __private\n");
378 doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__private",
379 ".DST_SCOPE.", "__global"); //
380
381 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
382 {
383 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
384 {
385 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
386 pre_substitution_arr[preIdx], ".POST.",
387 post_substitution_arr[postIdx]);
388
389 result =
390 test_vec_internal(deviceID, context, queue, tmp1,
391 "test_vec_align_struct", 512, 0, 0, 0, 0);
392 if (result != 0)
393 {
394 return result;
395 }
396 }
397 }
398
399 log_info("testing __local\n");
400 doReplace(tmp2, (size_t)2048, patterns[1], ".SRC_SCOPE.", "__local",
401 ".DST_SCOPE.", "__global"); //
402
403 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
404 {
405 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
406 {
407 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
408 pre_substitution_arr[preIdx], ".POST.",
409 post_substitution_arr[postIdx]);
410
411 result =
412 test_vec_internal(deviceID, context, queue, tmp1,
413 "test_vec_align_struct", 512, 0, 0, 0, 0);
414 if (result != 0)
415 {
416 return result;
417 }
418 }
419 }
420 return 0;
421 }
422
test_vec_align_packed_struct(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)423 int test_vec_align_packed_struct(cl_device_id deviceID, cl_context context,
424 cl_command_queue queue, int num_elements)
425 {
426 char tmp1[2048], tmp2[2048];
427 int result = 0;
428 int preIdx, postIdx;
429
430
431 log_info("Testing __private\n");
432 doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__private",
433 ".DST_SCOPE.", "__global"); //
434
435 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
436 {
437 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
438 {
439 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
440 pre_substitution_arr[preIdx], ".POST.",
441 post_substitution_arr[postIdx]);
442
443 result = test_vec_internal(
444 deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
445 512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
446 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
447 if (result != 0)
448 {
449 return result;
450 }
451 }
452 }
453
454 log_info("testing __local\n");
455 doReplace(tmp2, (size_t)2048, patterns[2], ".SRC_SCOPE.", "__local",
456 ".DST_SCOPE.", "__global"); //
457
458 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
459 {
460 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
461 {
462 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
463 pre_substitution_arr[preIdx], ".POST.",
464 post_substitution_arr[postIdx]);
465
466 result = test_vec_internal(
467 deviceID, context, queue, tmp1, "test_vec_align_packed_struct",
468 512, pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
469 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
470 if (result != 0)
471 {
472 return result;
473 }
474 }
475 }
476 return 0;
477 }
478
test_vec_align_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)479 int test_vec_align_struct_arr(cl_device_id deviceID, cl_context context,
480 cl_command_queue queue, int num_elements)
481 {
482 char tmp1[2048], tmp2[2048];
483 int result = 0;
484 int preIdx, postIdx;
485
486
487 log_info("testing __global\n");
488 doReplace(tmp2, (size_t)2048, patterns[3], ".SRC_SCOPE.", "__global",
489 ".DST_SCOPE.", "__global"); //
490
491 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
492 {
493 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
494 {
495 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
496 pre_substitution_arr[preIdx], ".POST.",
497 post_substitution_arr[postIdx]);
498
499 result = test_vec_internal(deviceID, context, queue, tmp1,
500 "test_vec_align_struct_arr", BUFFER_SIZE,
501 0, 0, 0, 0);
502 if (result != 0)
503 {
504 return result;
505 }
506 }
507 }
508 return 0;
509 }
510
test_vec_align_packed_struct_arr(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)511 int test_vec_align_packed_struct_arr(cl_device_id deviceID, cl_context context,
512 cl_command_queue queue, int num_elements)
513 {
514 char tmp1[2048], tmp2[2048];
515 int result = 0;
516 int preIdx, postIdx;
517
518
519 log_info("Testing __global\n");
520 doReplace(tmp2, (size_t)2048, patterns[4], ".SRC_SCOPE.", "__global",
521 ".DST_SCOPE.", "__global"); //
522
523 for (preIdx = 0; pre_substitution_arr[preIdx] != NULL; ++preIdx)
524 {
525 for (postIdx = 0; post_substitution_arr[postIdx] != NULL; ++postIdx)
526 {
527 doReplace(tmp1, (size_t)2048, tmp2, ".PRE.",
528 pre_substitution_arr[preIdx], ".POST.",
529 post_substitution_arr[postIdx]);
530
531 result = test_vec_internal(
532 deviceID, context, queue, tmp1,
533 "test_vec_align_packed_struct_arr", BUFFER_SIZE,
534 pre_align_arr[preIdx], type_multiple_pre_align_arr[preIdx],
535 post_align_arr[postIdx], type_multiple_post_align_arr[postIdx]);
536 if (result != 0) return result;
537 }
538 }
539 return 0;
540 }
541