1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27
28 #ifdef CL_VERSION_2_0
29 extern int gWimpyMode;
30 static int nestingLevel = 3;
31
32 static const char* enqueue_1D_wg_size_single[] =
33 {
34 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
35 NL, "{"
36 NL, " size_t tidX = get_global_id(0);"
37 NL, " queue_t def_q = get_default_queue();"
38 NL, " if(--level < 0) return;"
39 NL, ""
40 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
41 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
42 NL, ""
43 NL, " const size_t gs = 64 * 64 * 64;"
44 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
45 NL, " ls = ls? ls: 1;"
46 NL, ""
47 NL, " ndrange_t ndrange = ndrange_1D(gs, ls);"
48 NL, ""
49 NL, " // Only 1 work-item enqueues block"
50 NL, " if(tidX == 0)"
51 NL, " {"
52 NL, " atomic_inc(&res[tidX % maxGlobalWorkSize]);"
53 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
54 NL, " if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
55 NL, " }"
56 NL, "}"
57 NL, ""
58 NL, "kernel void enqueue_1D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
59 NL, "{"
60 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
61 NL, "}"
62 NL
63 };
64
check_single(cl_int * results,cl_int len,cl_int nesting_level)65 static int check_single(cl_int* results, cl_int len, cl_int nesting_level)
66 {
67 for(size_t i = 0; i < len; ++i)
68 {
69 if(i == 0 && results[i] != nestingLevel)
70 {
71 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], nestingLevel, i);
72 return (int)i;
73 }
74
75 if(i > 0 && results[i] != 0)
76 {
77 log_error("ERROR: Kernel returned %d vs. expected 0, index: %d\n", results[i], i);
78 return (int)i;
79 }
80 }
81
82 return -1;
83 }
84
85 static const char* enqueue_1D_wg_size_some_eq[] =
86 {
87 NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
88 NL, "{"
89 NL, " size_t tidX = get_global_id(0);"
90 NL, " queue_t def_q = get_default_queue();"
91 NL, " if(--level < 0) return;"
92 NL, ""
93 NL, " void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
94 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
95 NL, ""
96 NL, " const size_t gs = 8 * 8 * 2;"
97 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
98 NL, " ls = ls? ls: 1;"
99 NL, ""
100 NL, " ndrange_t ndrange = ndrange_1D(gs, ls);"
101 NL, ""
102 NL, " // Some work-items enqueues nested blocks with the same level"
103 NL, " if((tidX % (maxGlobalWorkSize / 8)) == 0)"
104 NL, " {"
105 NL, " atomic_inc(&res[tidX % maxGlobalWorkSize]);"
106 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
107 NL, " if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
108 NL, " }"
109 NL, "}"
110 NL, ""
111 NL, "kernel void enqueue_1D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
112 NL, "{"
113 NL, " block_fn(res, level, maxGlobalWorkSize, rnd);"
114 NL, "}"
115 NL
116 };
117
generate_reference_results_some_eq_1D(std::vector<cl_int> & referenceResults,cl_int maxGlobalWorkSize,cl_int level)118 void generate_reference_results_some_eq_1D(std::vector<cl_int> &referenceResults, cl_int maxGlobalWorkSize, cl_int level)
119 {
120 size_t globalSize = (level == nestingLevel) ? maxGlobalWorkSize: (8 * 8 * 2);
121 if(--level < 0)
122 {
123 return;
124 }
125
126 for (size_t tidX = 0; tidX < globalSize; ++tidX)
127 {
128 if ((tidX % (maxGlobalWorkSize / 8)) == 0)
129 {
130 ++referenceResults[tidX % maxGlobalWorkSize];
131 generate_reference_results_some_eq_1D(referenceResults, maxGlobalWorkSize, level);
132 }
133 }
134 }
135
check_some_eq_1D(cl_int * results,cl_int len,cl_int nesting_level)136 static int check_some_eq_1D(cl_int* results, cl_int len, cl_int nesting_level)
137 {
138 std::vector<cl_int> referenceResults(len, 0);
139 generate_reference_results_some_eq_1D(referenceResults, len, nesting_level);
140
141 for(size_t i = 0; i < len; ++i)
142 {
143 if (results[i] != referenceResults[i])
144 {
145 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
146 return (int)i;
147 }
148 }
149
150 return -1;
151 }
152
153 static const char* enqueue_1D_wg_size_some_diff[] =
154 {
155 NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
156 NL, "{"
157 NL, " size_t tidX = get_global_id(0);"
158 NL, " queue_t def_q = get_default_queue();"
159 NL, " if(--level < 0) return;"
160 NL, ""
161 NL, " void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
162 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
163 NL, ""
164 NL, " const size_t gs = 8 * 8 * 8;"
165 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
166 NL, " ls = ls? ls: 1;"
167 NL, ""
168 NL, " ndrange_t ndrange = ndrange_1D(gs, ls);"
169 NL, ""
170 NL, " // Some work-items enqueues nested blocks with different levels"
171 NL, " if((tidX % 2) == 0)"
172 NL, " {"
173 NL, " atomic_inc(&res[tidX % maxGlobalWorkSize]);"
174 NL, " if(level >= tidX)"
175 NL, " {"
176 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
177 NL, " if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
178 NL, " }"
179 NL, " }"
180 NL, "}"
181 NL, ""
182 NL, "kernel void enqueue_1D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
183 NL, "{"
184 NL, " block_fn(res, level, maxGlobalWorkSize, rnd);"
185 NL, "}"
186 NL
187 };
188
generate_reference_results_some_diff_1D(std::vector<cl_int> & referenceResults,cl_int maxGlobalWorkSize,cl_int level)189 void generate_reference_results_some_diff_1D(std::vector<cl_int> &referenceResults, cl_int maxGlobalWorkSize, cl_int level)
190 {
191 size_t globalSize = (level == nestingLevel) ? maxGlobalWorkSize: (8 * 8 * 8);
192 if(--level < 0)
193 {
194 return;
195 }
196
197 for (size_t tidX = 0; tidX < globalSize; ++tidX)
198 {
199 if ((tidX % 2) == 0)
200 {
201 ++referenceResults[tidX % maxGlobalWorkSize];
202 if (level >= tidX)
203 {
204 generate_reference_results_some_diff_1D(referenceResults, maxGlobalWorkSize, level);
205 }
206 }
207 }
208 }
209
check_some_diff_1D(cl_int * results,cl_int maxGlobalWorkSize,cl_int nesting_level)210 static int check_some_diff_1D(cl_int* results, cl_int maxGlobalWorkSize, cl_int nesting_level)
211 {
212 std::vector<cl_int> referenceResults(maxGlobalWorkSize, 0);
213 generate_reference_results_some_diff_1D(referenceResults, maxGlobalWorkSize, nesting_level);
214
215 for(size_t i = 0; i < maxGlobalWorkSize; ++i)
216 {
217 if (results[i] != referenceResults[i])
218 {
219 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
220 return (int)i;
221 }
222 }
223
224 return -1;
225 }
226
227 static const char* enqueue_1D_wg_size_all_eq[] =
228 {
229 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
230 NL, "{"
231 NL, " size_t tidX = get_global_id(0);"
232 NL, " queue_t def_q = get_default_queue();"
233 NL, " if(--level < 0) return;"
234 NL, ""
235 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
236 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
237 NL, ""
238 NL, " const size_t gs = 8;"
239 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
240 NL, " ls = ls? ls: 1;"
241 NL, ""
242 NL, " ndrange_t ndrange = ndrange_1D(gs, ls);"
243 NL, ""
244 NL, " // All work-items enqueues nested blocks with the same level"
245 NL, " atomic_inc(&res[tidX % maxGlobalWorkSize]);"
246 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
247 NL, " if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
248 NL, "}"
249 NL, ""
250 NL, "kernel void enqueue_1D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
251 NL, "{"
252 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
253 NL, "}"
254 NL
255 };
256
generate_reference_results_all_eq_1D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)257 void generate_reference_results_all_eq_1D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
258 {
259 size_t globalSize = (level == nestingLevel) ? len: 8;
260 if(--level < 0)
261 {
262 return;
263 }
264
265 for (size_t tidX = 0; tidX < globalSize; ++tidX)
266 {
267 ++referenceResults[tidX % len];
268 generate_reference_results_all_eq_1D(referenceResults, len, level);
269 }
270 }
271
check_all_eq_1D(cl_int * results,cl_int len,cl_int nesting_level)272 static int check_all_eq_1D(cl_int* results, cl_int len, cl_int nesting_level)
273 {
274 std::vector<cl_int> referenceResults(len, 0);
275 generate_reference_results_all_eq_1D(referenceResults, len, nesting_level);
276
277 for(size_t i = 0; i < len; ++i)
278 {
279 if (results[i] != referenceResults[i])
280 {
281 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
282 return (int)i;
283 }
284 }
285
286 return -1;
287 }
288
289 static const char* enqueue_1D_wg_size_all_diff[] =
290 {
291 NL, "void block_fn(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
292 NL, "{"
293 NL, " size_t tidX = get_global_id(0);"
294 NL, " queue_t def_q = get_default_queue();"
295 NL, " if((--level) < 0) return;"
296 NL, ""
297 NL, " void (^kernelBlock)(void) = ^{ block_fn(res, level, maxGlobalWorkSize, rnd); };"
298 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
299 NL, ""
300 NL, " const size_t gs = 8 * 8 * 8;"
301 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
302 NL, " ls = ls? ls: 1;"
303 NL, ""
304 NL, " ndrange_t ndrange = ndrange_1D(gs, ls);"
305 NL, ""
306 NL, " // All work-items enqueues nested blocks with different levels"
307 NL, " atomic_inc(&res[tidX % maxGlobalWorkSize]);"
308 NL, " if(level >= tidX)"
309 NL, " {"
310 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
311 NL, " if(enq_res != CLK_SUCCESS) { res[tidX % maxGlobalWorkSize] = -1; return; }"
312 NL, " }"
313 NL, "}"
314 NL, ""
315 NL, "kernel void enqueue_1D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
316 NL, "{"
317 NL, " block_fn(res, level, maxGlobalWorkSize, rnd);"
318 NL, "}"
319 NL
320 };
321
generate_reference_results_all_diff_1D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)322 void generate_reference_results_all_diff_1D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
323 {
324 size_t globalSize = (level == nestingLevel) ? len: (8 * 8 * 8);
325 if((--level) < 0)
326 {
327 return;
328 }
329
330 for (size_t threadIdx = 0; threadIdx < globalSize; ++threadIdx)
331 {
332 ++referenceResults[threadIdx % len];
333 if (level >= threadIdx)
334 {
335 generate_reference_results_all_diff_1D(referenceResults, len, level);
336 }
337 }
338 }
339
check_all_diff_1D(cl_int * results,cl_int len,cl_int nesting_level)340 static int check_all_diff_1D(cl_int* results, cl_int len, cl_int nesting_level)
341 {
342 std::vector<cl_int> referenceResults(len, 0);
343 generate_reference_results_all_diff_1D(referenceResults, len, nesting_level);
344
345 for(size_t i = 0; i < len; ++i)
346 {
347 if (results[i] != referenceResults[i])
348 {
349 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
350 return (int)i;
351 }
352 }
353
354 return -1;
355 }
356
357 static const char* enqueue_2D_wg_size_single[] =
358 {
359 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
360 NL, "{"
361 NL, " size_t tidX = get_global_id(0);"
362 NL, " size_t tidY = get_global_id(1);"
363 NL, " size_t linearId = get_global_linear_id();"
364 NL, " queue_t def_q = get_default_queue();"
365 NL, " if(--level < 0) return;"
366 NL, ""
367 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
368 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
369 NL, ""
370 NL, " const size_t gs[] = { 64, 64 * 64 };"
371 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
372 NL, " ls[1] = ls[1]? ls[1]: 1;"
373 NL, " "
374 NL, " ndrange_t ndrange = ndrange_2D(gs, ls);"
375 NL, ""
376 NL, " // Only 1 work-item enqueues block"
377 NL, " if(tidX == 0 && tidY == 0)"
378 NL, " {"
379 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
380 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
381 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
382 NL, " }"
383 NL, "}"
384 NL, ""
385 NL, "kernel void enqueue_2D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
386 NL, "{"
387 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
388 NL, "}"
389 NL
390 };
391
392 static const char* enqueue_2D_wg_size_some_eq[] =
393 {
394 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
395 NL, "{"
396 NL, " size_t tidX = get_global_id(0);"
397 NL, " size_t tidY = get_global_id(1);"
398 NL, " size_t linearId = get_global_linear_id();"
399 NL, " queue_t def_q = get_default_queue();"
400 NL, " if(--level < 0) return;"
401 NL, ""
402 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
403 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
404 NL, ""
405 NL, " const size_t gs[] = { 4, 4 };"
406 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
407 NL, " ls[1] = ls[1]? ls[1]: 1;"
408 NL, " "
409 NL, " ndrange_t ndrange = ndrange_2D(gs, ls);"
410 NL, ""
411 NL, " // Some work-items enqueues nested blocks with the same level"
412 NL, " if((tidX < (get_global_size(0) >> 1)) && ((tidY < (get_global_size(1) >> 1)) || get_global_size(1) == 1))"
413 NL, " {"
414 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
415 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
416 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
417 NL, " }"
418 NL, "}"
419 NL, ""
420 NL, "kernel void enqueue_2D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
421 NL, "{"
422 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
423 NL, "}"
424 NL
425 };
426
generate_reference_results_some_eq_2D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)427 void generate_reference_results_some_eq_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
428 {
429 size_t globalSizeX = (level == nestingLevel) ? len: 4;
430 size_t globalSizeY = (level == nestingLevel) ? 1: 4;
431 if(--level < 0)
432 {
433 return;
434 }
435
436 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
437 {
438 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
439 {
440 if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1))
441 {
442 ++referenceResults[(globalSizeX * tidY + tidX) % len];
443 generate_reference_results_some_eq_2D(referenceResults, len, level);
444 }
445 }
446 }
447 }
448
check_some_eq_2D(cl_int * results,cl_int len,cl_int nesting_level)449 static int check_some_eq_2D(cl_int* results, cl_int len, cl_int nesting_level)
450 {
451 std::vector<cl_int> referenceResults(len, 0);
452 generate_reference_results_some_eq_2D(referenceResults, len, nesting_level);
453
454 for(size_t i = 0; i < len; ++i)
455 {
456 if (results[i] != referenceResults[i])
457 {
458 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
459 return (int)i;
460 }
461 }
462
463 return -1;
464 }
465
466 static const char* enqueue_2D_wg_size_some_diff[] =
467 {
468 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
469 NL, "{"
470 NL, " size_t tidX = get_global_id(0);"
471 NL, " size_t tidY = get_global_id(1);"
472 NL, " size_t linearId = get_global_linear_id();"
473 NL, " queue_t def_q = get_default_queue();"
474 NL, " if(--level < 0) return;"
475 NL, ""
476 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
477 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
478 NL, ""
479 NL, " const size_t gs[] = { 8, 8 };"
480 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
481 NL, " ls[1] = ls[1]? ls[1]: 1;"
482 NL, " "
483 NL, " ndrange_t ndrange = ndrange_2D(gs, ls);"
484 NL, ""
485 NL, " // Some work-items enqueues nested blocks with different levels"
486 NL, " if((tidX % 2) == 0 && (tidY % 2) == 0)"
487 NL, " {"
488 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
489 NL, " if(level >= tidX && level >= tidY)"
490 NL, " {"
491 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
492 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
493 NL, " }"
494 NL, " }"
495 NL, "}"
496 NL, ""
497 NL, "kernel void enqueue_2D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
498 NL, "{"
499 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
500 NL, "}"
501 NL
502 };
503
generate_reference_results_some_diff_2D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)504 void generate_reference_results_some_diff_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
505 {
506 size_t globalSizeX = (level == nestingLevel) ? len: 8;
507 size_t globalSizeY = (level == nestingLevel) ? 1: 8;
508 if(--level < 0)
509 {
510 return;
511 }
512
513 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
514 {
515 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
516 {
517 if ((tidX % 2) == 0 && (tidY % 2) == 0)
518 {
519 ++referenceResults[(globalSizeX * tidY + tidX) % len];
520 if (level >= tidX && level >= tidY)
521 {
522 generate_reference_results_some_diff_2D(referenceResults, len, level);
523 }
524 }
525 }
526 }
527 }
528
check_some_diff_2D(cl_int * results,cl_int len,cl_int nesting_level)529 static int check_some_diff_2D(cl_int* results, cl_int len, cl_int nesting_level)
530 {
531 std::vector<cl_int> referenceResults(len, 0);
532 generate_reference_results_some_diff_2D(referenceResults, len, nesting_level);
533
534 for(size_t i = 0; i < len; ++i)
535 {
536 if (results[i] != referenceResults[i])
537 {
538 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
539 return (int)i;
540 }
541 }
542
543 return -1;
544 }
545
546 static const char* enqueue_2D_wg_size_all_eq[] =
547 {
548 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
549 NL, "{"
550 NL, " size_t tidX = get_global_id(0);"
551 NL, " size_t tidY = get_global_id(1);"
552 NL, " size_t linearId = get_global_linear_id();"
553 NL, " queue_t def_q = get_default_queue();"
554 NL, " if(--level < 0) return;"
555 NL, ""
556 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
557 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
558 NL, ""
559 NL, " const size_t gs[] = { 2, 2 };"
560 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
561 NL, " ls[1] = ls[1]? ls[1]: 1;"
562 NL, " "
563 NL, " ndrange_t ndrange = ndrange_2D(gs, ls);"
564 NL, ""
565 NL, " // All work-items enqueues nested blocks with the same level"
566 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
567 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
568 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
569 NL, "}"
570 NL, ""
571 NL, "kernel void enqueue_2D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
572 NL, "{"
573 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
574 NL, "}"
575 NL
576 };
577
generate_reference_results_all_eq_2D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)578 void generate_reference_results_all_eq_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
579 {
580 size_t globalSizeX = (level == nestingLevel) ? len: 2;
581 size_t globalSizeY = (level == nestingLevel) ? 1: 2;
582 if(--level < 0)
583 {
584 return;
585 }
586
587 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
588 {
589 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
590 {
591 ++referenceResults[(globalSizeX * tidY + tidX) % len];
592 generate_reference_results_all_eq_2D(referenceResults, len, level);
593 }
594 }
595 }
596
check_all_eq_2D(cl_int * results,cl_int len,cl_int nesting_level)597 static int check_all_eq_2D(cl_int* results, cl_int len, cl_int nesting_level)
598 {
599 std::vector<cl_int> referenceResults(len, 0);
600 generate_reference_results_all_eq_2D(referenceResults, len, nesting_level);
601
602 for(size_t i = 0; i < len; ++i)
603 {
604 if (results[i] != referenceResults[i])
605 {
606 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
607 return (int)i;
608 }
609 }
610
611 return -1;
612 }
613
614 static const char* enqueue_2D_wg_size_all_diff[] =
615 {
616 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
617 NL, "{"
618 NL, " size_t tidX = get_global_id(0);"
619 NL, " size_t tidY = get_global_id(1);"
620 NL, " size_t linearId = get_global_linear_id();"
621 NL, " queue_t def_q = get_default_queue();"
622 NL, " if(--level < 0) return;"
623 NL, ""
624 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
625 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
626 NL, ""
627 NL, " size_t gs[] = { 8, 8 * 8 };"
628 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
629 NL, " ls[1] = ls[1]? ls[1]: 1;"
630 NL, " "
631 NL, " ndrange_t ndrange = ndrange_2D(gs, ls);"
632 NL, ""
633 NL, " // All work-items enqueues nested blocks with different levels"
634 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
635 NL, " if(level >= tidX && level >= tidY)"
636 NL, " {"
637 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
638 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
639 NL, " }"
640 NL, "}"
641 NL, ""
642 NL, "kernel void enqueue_2D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
643 NL, "{"
644 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
645 NL, "}"
646 NL
647 };
648
generate_reference_results_all_diff_2D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)649 void generate_reference_results_all_diff_2D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
650 {
651 size_t globalSizeX = (level == nestingLevel) ? len: 8;
652 size_t globalSizeY = (level == nestingLevel) ? 1: (8 * 8);
653 if(--level < 0)
654 {
655 return;
656 }
657
658 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
659 {
660 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
661 {
662 ++referenceResults[(globalSizeX * tidY + tidX) % len];
663 if (level >= tidX && level >= tidY)
664 {
665 generate_reference_results_all_diff_2D(referenceResults, len, level);
666 }
667 }
668 }
669 }
670
check_all_diff_2D(cl_int * results,cl_int len,cl_int nesting_level)671 static int check_all_diff_2D(cl_int* results, cl_int len, cl_int nesting_level)
672 {
673 std::vector<cl_int> referenceResults(len, 0);
674 generate_reference_results_all_diff_2D(referenceResults, len, nesting_level);
675
676 for(size_t i = 0; i < len; ++i)
677 {
678 if (results[i] != referenceResults[i])
679 {
680 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
681 return (int)i;
682 }
683 }
684
685 return -1;
686 }
687
688 static const char* enqueue_3D_wg_size_single[] =
689 {
690 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
691 NL, "{"
692 NL, " size_t tidX = get_global_id(0);"
693 NL, " size_t tidY = get_global_id(1);"
694 NL, " size_t tidZ = get_global_id(2);"
695 NL, " size_t linearId = get_global_linear_id();"
696 NL, " queue_t def_q = get_default_queue();"
697 NL, " if(--level < 0) return;"
698 NL, ""
699 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
700 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
701 NL, ""
702 NL, " const size_t gs[] = { 64, 64, 64 };"
703 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
704 NL, " ls[2] = ls[2]? ls[2]: 1;"
705 NL, " "
706 NL, " ndrange_t ndrange = ndrange_3D(gs, ls);"
707 NL, ""
708 NL, " // Only 1 work-item enqueues block"
709 NL, " if(tidX == 0 && tidY == 0 && tidZ == 0)"
710 NL, " {"
711 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
712 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
713 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
714 NL, " }"
715 NL, "}"
716 NL, ""
717 NL, "kernel void enqueue_3D_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
718 NL, "{"
719 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
720 NL, "}"
721 NL
722 };
723
724 static const char* enqueue_3D_wg_size_some_eq[] =
725 {
726 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
727 NL, "{"
728 NL, " size_t tidX = get_global_id(0);"
729 NL, " size_t tidY = get_global_id(1);"
730 NL, " size_t tidZ = get_global_id(2);"
731 NL, " size_t linearId = get_global_linear_id();"
732 NL, " queue_t def_q = get_default_queue();"
733 NL, " if(--level < 0) return;"
734 NL, ""
735 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
736 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
737 NL, ""
738 NL, " const size_t gs[] = { 4, 4, 4 };"
739 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
740 NL, " ls[2] = ls[2]? ls[2]: 1;"
741 NL, " "
742 NL, " ndrange_t ndrange = ndrange_3D(gs, ls);"
743 NL, ""
744 NL, " // Some work-items enqueues nested blocks with the same level"
745 NL, " if((tidX < (get_global_size(0) >> 1)) && "
746 NL, " ((tidY < (get_global_size(1) >> 1)) || get_global_size(1) == 1) &&"
747 NL, " ((tidZ < (get_global_size(2) >> 1)) || get_global_size(2) == 1))"
748 NL, " {"
749 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
750 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
751 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
752 NL, " }"
753 NL, "}"
754 NL, ""
755 NL, "kernel void enqueue_3D_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
756 NL, "{"
757 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
758 NL, "}"
759 NL
760 };
761
generate_reference_results_some_eq_3D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)762 void generate_reference_results_some_eq_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
763 {
764 size_t globalSizeX = (level == nestingLevel) ? len: 4;
765 size_t globalSizeY = (level == nestingLevel) ? 1: 4;
766 size_t globalSizeZ = (level == nestingLevel) ? 1: 4;
767 if(--level < 0)
768 {
769 return;
770 }
771
772 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
773 {
774 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
775 {
776 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
777 {
778 if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))
779 {
780 ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
781 generate_reference_results_some_eq_3D(referenceResults, len, level);
782 }
783 }
784 }
785 }
786 }
787
check_some_eq_3D(cl_int * results,cl_int len,cl_int nesting_level)788 static int check_some_eq_3D(cl_int* results, cl_int len, cl_int nesting_level)
789 {
790 std::vector<cl_int> referenceResults(len, 0);
791 generate_reference_results_some_eq_3D(referenceResults, len, nesting_level);
792
793 for(size_t i = 0; i < len; ++i)
794 {
795 if (results[i] != referenceResults[i])
796 {
797 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
798 return (int)i;
799 }
800 }
801
802 return -1;
803 }
804
805 static const char* enqueue_3D_wg_size_some_diff[] =
806 {
807 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
808 NL, "{"
809 NL, " size_t tidX = get_global_id(0);"
810 NL, " size_t tidY = get_global_id(1);"
811 NL, " size_t tidZ = get_global_id(2);"
812 NL, " size_t linearId = get_global_linear_id();"
813 NL, " queue_t def_q = get_default_queue();"
814 NL, " if(--level < 0) return;"
815 NL, ""
816 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
817 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
818 NL, ""
819 NL, " const size_t gs[] = { 8, 8, 8 };"
820 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
821 NL, " ls[2] = ls[2]? ls[2]: 1;"
822 NL, " "
823 NL, " ndrange_t ndrange = ndrange_3D(gs, ls);"
824 NL, ""
825 NL, " // Some work-items enqueues nested blocks with different levels"
826 NL, " if((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)"
827 NL, " {"
828 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
829 NL, " if(level >= tidX && level >= tidY && level >= tidZ)"
830 NL, " {"
831 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
832 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
833 NL, " }"
834 NL, " }"
835 NL, "}"
836 NL, ""
837 NL, "kernel void enqueue_3D_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
838 NL, "{"
839 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
840 NL, "}"
841 NL
842 };
843
generate_reference_results_some_diff_3D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)844 void generate_reference_results_some_diff_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
845 {
846 size_t globalSizeX = (level == nestingLevel) ? len: 8;
847 size_t globalSizeY = (level == nestingLevel) ? 1: 8;
848 size_t globalSizeZ = (level == nestingLevel) ? 1: 8;
849 if(--level < 0)
850 {
851 return;
852 }
853
854 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
855 {
856 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
857 {
858 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
859 {
860 if ((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)
861 {
862 ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
863 if (level >= tidX && level >= tidY && level >= tidZ)
864 {
865 generate_reference_results_some_diff_3D(referenceResults, len, level);
866 }
867 }
868 }
869 }
870 }
871 }
872
check_some_diff_3D(cl_int * results,cl_int len,cl_int nesting_level)873 static int check_some_diff_3D(cl_int* results, cl_int len, cl_int nesting_level)
874 {
875 std::vector<cl_int> referenceResults(len, 0);
876 generate_reference_results_some_diff_3D(referenceResults, len, nesting_level);
877
878 for(size_t i = 0; i < len; ++i)
879 {
880 if (results[i] != referenceResults[i])
881 {
882 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
883 return (int)i;
884 }
885 }
886
887 return -1;
888 }
889
890 static const char* enqueue_3D_wg_size_all_eq[] =
891 {
892 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
893 NL, "{"
894 NL, " size_t tidX = get_global_id(0);"
895 NL, " size_t tidY = get_global_id(1);"
896 NL, " size_t tidZ = get_global_id(2);"
897 NL, " size_t linearId = get_global_linear_id();"
898 NL, " queue_t def_q = get_default_queue();"
899 NL, " if(--level < 0) return;"
900 NL, ""
901 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
902 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
903 NL, ""
904 NL, " const size_t gs[] = { 2, 2, 2 };"
905 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
906 NL, " ls[2] = ls[2]? ls[2]: 1;"
907 NL, " "
908 NL, " ndrange_t ndrange = ndrange_3D(gs, ls);"
909 NL, ""
910 NL, " // All work-items enqueues nested blocks with the same level"
911 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
912 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
913 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
914 NL, "}"
915 NL, ""
916 NL, "kernel void enqueue_3D_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
917 NL, "{"
918 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
919 NL, "}"
920 NL
921 };
922
generate_reference_results_all_eq_3D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)923 void generate_reference_results_all_eq_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
924 {
925 size_t globalSizeX = (level == nestingLevel) ? len: 2;
926 size_t globalSizeY = (level == nestingLevel) ? 1: 2;
927 size_t globalSizeZ = (level == nestingLevel) ? 1: 2;
928 if(--level < 0)
929 {
930 return;
931 }
932
933 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
934 {
935 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
936 {
937 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
938 {
939 ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
940 generate_reference_results_all_eq_3D(referenceResults, len, level);
941 }
942 }
943 }
944 }
945
check_all_eq_3D(cl_int * results,cl_int len,cl_int nesting_level)946 static int check_all_eq_3D(cl_int* results, cl_int len, cl_int nesting_level)
947 {
948 std::vector<cl_int> referenceResults(len, 0);
949 generate_reference_results_all_eq_3D(referenceResults, len, nesting_level);
950
951 for(size_t i = 0; i < len; ++i)
952 {
953 if (results[i] != referenceResults[i])
954 {
955 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
956 return (int)i;
957 }
958 }
959
960 return -1;
961 }
962
963 static const char* enqueue_3D_wg_size_all_diff[] =
964 {
965 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
966 NL, "{"
967 NL, " size_t tidX = get_global_id(0);"
968 NL, " size_t tidY = get_global_id(1);"
969 NL, " size_t tidZ = get_global_id(2);"
970 NL, " size_t linearId = get_global_linear_id();"
971 NL, " queue_t def_q = get_default_queue();"
972 NL, " if(--level < 0) return;"
973 NL, ""
974 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
975 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
976 NL, ""
977 NL, " const size_t gs[] = { 8, 8, 8 };"
978 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
979 NL, " ls[2] = ls[2]? ls[2]: 1;"
980 NL, " "
981 NL, " ndrange_t ndrange = ndrange_3D(gs, ls);"
982 NL, ""
983 NL, " // All work-items enqueues nested blocks with different levels"
984 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
985 NL, " if(level >= tidX && level >= tidY && level >= tidZ)"
986 NL, " {"
987 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
988 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
989 NL, " }"
990 NL, "}"
991 NL, ""
992 NL, "kernel void enqueue_3D_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
993 NL, "{"
994 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
995 NL, "}"
996 NL
997 };
998
generate_reference_results_all_diff_3D(std::vector<cl_int> & referenceResults,cl_int len,cl_int level)999 void generate_reference_results_all_diff_3D(std::vector<cl_int> &referenceResults, cl_int len, cl_int level)
1000 {
1001 size_t globalSizeX = (level == nestingLevel) ? len: 8;
1002 size_t globalSizeY = (level == nestingLevel) ? 1: 8;
1003 size_t globalSizeZ = (level == nestingLevel) ? 1: 8;
1004 if(--level < 0)
1005 {
1006 return;
1007 }
1008
1009 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
1010 {
1011 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
1012 {
1013 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
1014 {
1015 ++referenceResults[(globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX) % len];
1016 if (level >= tidX && level >= tidY && level >= tidZ)
1017 {
1018 generate_reference_results_all_diff_3D(referenceResults, len, level);
1019 }
1020 }
1021 }
1022 }
1023 }
1024
check_all_diff_3D(cl_int * results,cl_int len,cl_int nesting_level)1025 static int check_all_diff_3D(cl_int* results, cl_int len, cl_int nesting_level)
1026 {
1027 std::vector<cl_int> referenceResults(len, 0);
1028 generate_reference_results_all_diff_3D(referenceResults, len, nesting_level);
1029
1030 for(size_t i = 0; i < len; ++i)
1031 {
1032 if (results[i] != referenceResults[i])
1033 {
1034 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
1035 return (int)i;
1036 }
1037 }
1038
1039 return -1;
1040 }
1041
1042 static const char* enqueue_mix_wg_size_single[] =
1043 {
1044 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
1045 NL, "{"
1046 NL, " size_t tidX = get_global_id(0);"
1047 NL, " size_t tidY = get_global_id(1);"
1048 NL, " size_t tidZ = get_global_id(2);"
1049 NL, " size_t linearId = get_global_linear_id();"
1050 NL, " queue_t def_q = get_default_queue();"
1051 NL, " if(--level < 0) return;"
1052 NL, ""
1053 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
1054 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
1055 NL, ""
1056 NL, " ndrange_t ndrange;"
1057 NL, " switch((linearId + level) % 3)"
1058 NL, " {"
1059 NL, " case 0:"
1060 NL, " {"
1061 NL, " const size_t gs = 64 * 64 * 64;"
1062 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
1063 NL, " ls = ls? ls: 1;"
1064 NL, " ndrange = ndrange_1D(gs, ls);"
1065 NL, " }"
1066 NL, " break;"
1067 NL, " case 1:"
1068 NL, " {"
1069 NL, " const size_t gs[] = { 64, 64 * 64 };"
1070 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
1071 NL, " ls[1] = ls[1]? ls[1]: 1;"
1072 NL, " ndrange = ndrange_2D(gs, ls);"
1073 NL, " }"
1074 NL, " break;"
1075 NL, " case 2:"
1076 NL, " {"
1077 NL, " const size_t gs[] = { 64, 64, 64 };"
1078 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
1079 NL, " ls[2] = ls[2]? ls[2]: 1;"
1080 NL, " ndrange = ndrange_3D(gs, ls);"
1081 NL, " }"
1082 NL, " break;"
1083 NL, " default:"
1084 NL, " break;"
1085 NL, " }"
1086 NL, ""
1087 NL, " // Only 1 work-item enqueues block"
1088 NL, " if(tidX == 0 && (tidY == 0 || get_global_size(1) == 1) && (tidZ == 0 || get_global_size(2) == 1))"
1089 NL, " {"
1090 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
1091 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
1092 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
1093 NL, " }"
1094 NL, "}"
1095 NL, ""
1096 NL, "kernel void enqueue_mix_wg_size_single(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
1097 NL, "{"
1098 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
1099 NL, "}"
1100 NL
1101 };
1102
1103 static const char* enqueue_mix_wg_size_some_eq[] =
1104 {
1105 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
1106 NL, "{"
1107 NL, " queue_t def_q = get_default_queue();"
1108 NL, " size_t tidX = get_global_id(0);"
1109 NL, " size_t tidY = get_global_id(1);"
1110 NL, " size_t tidZ = get_global_id(2);"
1111 NL, " size_t linearId = get_global_linear_id();"
1112 NL, " if(--level < 0) return;"
1113 NL, ""
1114 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
1115 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
1116 NL, ""
1117 NL, " ndrange_t ndrange;"
1118 NL, " switch((linearId + level) % 3)"
1119 NL, " {"
1120 NL, " case 0:"
1121 NL, " {"
1122 NL, " const size_t gs = 2 * 4 * 4;"
1123 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
1124 NL, " ls = ls? ls: 1;"
1125 NL, " ndrange = ndrange_1D(gs, ls);"
1126 NL, " }"
1127 NL, " break;"
1128 NL, " case 1:"
1129 NL, " {"
1130 NL, " const size_t gs[] = { 2, 4 * 4 };"
1131 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
1132 NL, " ls[1] = ls[1]? ls[1]: 1;"
1133 NL, " ndrange = ndrange_2D(gs, ls);"
1134 NL, " }"
1135 NL, " break;"
1136 NL, " case 2:"
1137 NL, " {"
1138 NL, " const size_t gs[] = { 2, 4, 4 };"
1139 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
1140 NL, " ls[2] = ls[2]? ls[2]: 1;"
1141 NL, " ndrange = ndrange_3D(gs, ls);"
1142 NL, " }"
1143 NL, " break;"
1144 NL, " default:"
1145 NL, " break;"
1146 NL, " }"
1147 NL, ""
1148 NL, " // Some work-items enqueues nested blocks with the same level"
1149 NL, " size_t globalSizeX = get_global_size(0);"
1150 NL, " size_t globalSizeY = get_global_size(1);"
1151 NL, " size_t globalSizeZ = get_global_size(2);"
1152 NL, " if((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))"
1153 NL, " {"
1154 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
1155 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
1156 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
1157 NL, " }"
1158 NL, "}"
1159 NL, ""
1160 NL, "kernel void enqueue_mix_wg_size_some_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
1161 NL, "{"
1162 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
1163 NL, "}"
1164 NL
1165 };
1166
generate_reference_results_some_eq_mix(std::vector<cl_int> & referenceResults,cl_int len,cl_int level,cl_int dim)1167 void generate_reference_results_some_eq_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
1168 {
1169 size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
1170 switch (dim)
1171 {
1172 case 0:
1173 globalSizeX = (level == nestingLevel) ? len: (2 * 4 * 4);
1174 break;
1175 case 1:
1176 globalSizeX = 2;
1177 globalSizeY = 4 * 4;
1178 break;
1179 case 2:
1180 globalSizeX = 2;
1181 globalSizeY = 4;
1182 globalSizeZ = 4;
1183 break;
1184 default:
1185 break;
1186 }
1187
1188 if(--level < 0)
1189 {
1190 return;
1191 }
1192
1193 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
1194 {
1195 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
1196 {
1197 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
1198 {
1199 size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
1200 cl_int nextDim = (linearID + level) % 3;
1201 if ((tidX < (globalSizeX >> 1)) && ((tidY < (globalSizeY >> 1)) || globalSizeY == 1) && ((tidZ < (globalSizeZ >> 1)) || globalSizeZ == 1))
1202 {
1203 ++referenceResults[linearID % len];
1204 generate_reference_results_some_eq_mix(referenceResults, len, level, nextDim);
1205 }
1206 }
1207 }
1208 }
1209 }
1210
check_some_eq_mix(cl_int * results,cl_int len,cl_int nesting_level)1211 static int check_some_eq_mix(cl_int* results, cl_int len, cl_int nesting_level)
1212 {
1213 std::vector<cl_int> referenceResults(len, 0);
1214 generate_reference_results_some_eq_mix(referenceResults, len, nesting_level, 0);
1215
1216 for(size_t i = 0; i < len; ++i)
1217 {
1218 if (results[i] != referenceResults[i])
1219 {
1220 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
1221 return (int)i;
1222 }
1223 }
1224
1225 return -1;
1226 }
1227
1228 static const char* enqueue_mix_wg_size_some_diff[] =
1229 {
1230 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
1231 NL, "{"
1232 NL, " queue_t def_q = get_default_queue();"
1233 NL, " size_t tidX = get_global_id(0);"
1234 NL, " size_t tidY = get_global_id(1);"
1235 NL, " size_t tidZ = get_global_id(2);"
1236 NL, " size_t linearId = get_global_linear_id();"
1237 NL, " if(--level < 0) return;"
1238 NL, ""
1239 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
1240 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
1241 NL, ""
1242 NL, " ndrange_t ndrange;"
1243 NL, " switch((linearId + level) % 3)"
1244 NL, " {"
1245 NL, " case 0:"
1246 NL, " {"
1247 NL, " const size_t gs = 8 * 8 * 8;"
1248 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
1249 NL, " ls = ls? ls: 1;"
1250 NL, " ndrange = ndrange_1D(gs, ls);"
1251 NL, " }"
1252 NL, " break;"
1253 NL, " case 1:"
1254 NL, " {"
1255 NL, " const size_t gs[] = { 8, 8 * 8 };"
1256 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
1257 NL, " ls[1] = ls[1]? ls[1]: 1;"
1258 NL, " ndrange = ndrange_2D(gs, ls);"
1259 NL, " }"
1260 NL, " break;"
1261 NL, " case 2:"
1262 NL, " {"
1263 NL, " const size_t gs[] = { 8, 8, 8 };"
1264 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
1265 NL, " ls[2] = ls[2]? ls[2]: 1;"
1266 NL, " ndrange = ndrange_3D(gs, ls);"
1267 NL, " }"
1268 NL, " break;"
1269 NL, " default:"
1270 NL, " break;"
1271 NL, " }"
1272 NL, ""
1273 NL, " // Some work-items enqueues nested blocks with different levels"
1274 NL, " if((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)"
1275 NL, " {"
1276 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
1277 NL, " if(level >= tidX && level >= tidY && level >= tidZ)"
1278 NL, " {"
1279 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
1280 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
1281 NL, " }"
1282 NL, " }"
1283 NL, "}"
1284 NL, ""
1285 NL, "kernel void enqueue_mix_wg_size_some_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
1286 NL, "{"
1287 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
1288 NL, "}"
1289 NL
1290 };
1291
generate_reference_results_some_diff_mix(std::vector<cl_int> & referenceResults,cl_int len,cl_int level,cl_int dim)1292 void generate_reference_results_some_diff_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
1293 {
1294 size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
1295 switch (dim)
1296 {
1297 case 0:
1298 globalSizeX = (level == nestingLevel) ? len: (8 * 8 * 8);
1299 break;
1300 case 1:
1301 globalSizeX = 8;
1302 globalSizeY = 8 * 8;
1303 break;
1304 case 2:
1305 globalSizeX = 8;
1306 globalSizeY = 8;
1307 globalSizeZ = 8;
1308 break;
1309 default:
1310 return;
1311 break;
1312 }
1313
1314 if(--level < 0)
1315 {
1316 return;
1317 }
1318
1319 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
1320 {
1321 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
1322 {
1323 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
1324 {
1325 size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
1326 cl_int nextDim = (linearID + level) % 3;
1327 if ((tidX % 2) == 0 && (tidY % 2) == 0 && (tidZ % 2) == 0)
1328 {
1329 ++referenceResults[linearID % len];
1330 if (level >= tidX && level >= tidY && level >= tidZ)
1331 {
1332 generate_reference_results_some_diff_mix(referenceResults, len, level, nextDim);
1333 }
1334 }
1335 }
1336 }
1337 }
1338 }
1339
check_some_diff_mix(cl_int * results,cl_int len,cl_int nesting_level)1340 static int check_some_diff_mix(cl_int* results, cl_int len, cl_int nesting_level)
1341 {
1342 std::vector<cl_int> referenceResults(len, 0);
1343 generate_reference_results_some_diff_mix(referenceResults, len, nesting_level, 0);
1344
1345 for(size_t i = 0; i < len; ++i)
1346 {
1347 if (results[i] != referenceResults[i])
1348 {
1349 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
1350 return (int)i;
1351 }
1352 }
1353
1354 return -1;
1355 }
1356
1357 static const char* enqueue_mix_wg_size_all_eq[] =
1358 {
1359 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
1360 NL, "{"
1361 NL, " queue_t def_q = get_default_queue();"
1362 NL, " size_t tidX = get_global_id(0);"
1363 NL, " size_t tidY = get_global_id(1);"
1364 NL, " size_t tidZ = get_global_id(2);"
1365 NL, " size_t linearId = get_global_linear_id();"
1366 NL, " if(--level < 0) return;"
1367 NL, ""
1368 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
1369 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
1370 NL, ""
1371 NL, " ndrange_t ndrange;"
1372 NL, " switch((linearId + level) % 3)"
1373 NL, " {"
1374 NL, " case 0:"
1375 NL, " {"
1376 NL, " const size_t gs = 2 * 2 * 2;"
1377 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
1378 NL, " ls = ls? ls: 1;"
1379 NL, " ndrange = ndrange_1D(gs, ls);"
1380 NL, " }"
1381 NL, " break;"
1382 NL, " case 1:"
1383 NL, " {"
1384 NL, " const size_t gs[] = { 2, 2 * 2 };"
1385 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
1386 NL, " ls[1] = ls[1]? ls[1]: 1;"
1387 NL, " ndrange = ndrange_2D(gs, ls);"
1388 NL, " }"
1389 NL, " break;"
1390 NL, " case 2:"
1391 NL, " {"
1392 NL, " const size_t gs[] = { 2, 2, 2 };"
1393 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
1394 NL, " ls[2] = ls[2]? ls[2]: 1;"
1395 NL, " ndrange = ndrange_3D(gs, ls);"
1396 NL, " }"
1397 NL, " break;"
1398 NL, " default:"
1399 NL, " break;"
1400 NL, " }"
1401 NL, ""
1402 NL, " // All work-items enqueues nested blocks with the same level"
1403 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
1404 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
1405 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
1406 NL, "}"
1407 NL, ""
1408 NL, "kernel void enqueue_mix_wg_size_all_eq(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
1409 NL, "{"
1410 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
1411 NL, "}"
1412 NL
1413 };
1414
generate_reference_results_all_eq_mix(std::vector<cl_int> & referenceResults,cl_int len,cl_int level,cl_int dim)1415 void generate_reference_results_all_eq_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
1416 {
1417 size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
1418 switch (dim)
1419 {
1420 case 0:
1421 globalSizeX = (level == nestingLevel) ? len: (2 * 2 * 2);
1422 break;
1423 case 1:
1424 globalSizeX = 2;
1425 globalSizeY = 2 * 2;
1426 break;
1427 case 2:
1428 globalSizeX = 2;
1429 globalSizeY = 2;
1430 globalSizeZ = 2;
1431 break;
1432 default:
1433 break;
1434 }
1435
1436 if(--level < 0)
1437 {
1438 return;
1439 }
1440
1441 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
1442 {
1443 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
1444 {
1445 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
1446 {
1447 size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
1448 cl_int nextDim = (linearID + level) % 3;
1449 ++referenceResults[linearID % len];
1450 generate_reference_results_all_eq_mix(referenceResults, len, level, nextDim);
1451 }
1452 }
1453 }
1454 }
1455
check_all_eq_mix(cl_int * results,cl_int len,cl_int nesting_level)1456 static int check_all_eq_mix(cl_int* results, cl_int len, cl_int nesting_level)
1457 {
1458 std::vector<cl_int> referenceResults(len, 0);
1459 generate_reference_results_all_eq_mix(referenceResults, len, nesting_level, 0);
1460
1461 for(size_t i = 0; i < len; ++i)
1462 {
1463 if (results[i] != referenceResults[i])
1464 {
1465 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
1466 return (int)i;
1467 }
1468 }
1469
1470 return -1;
1471 }
1472
1473 static const char* enqueue_mix_wg_size_all_diff[] =
1474 {
1475 NL, "void block_fn(int level, int maxGlobalWorkSize, __global int* rnd, __global int* res)"
1476 NL, "{"
1477 NL, " queue_t def_q = get_default_queue();"
1478 NL, " size_t tidX = get_global_id(0);"
1479 NL, " size_t tidY = get_global_id(1);"
1480 NL, " size_t tidZ = get_global_id(2);"
1481 NL, " size_t linearId = get_global_linear_id();"
1482 NL, " if(--level < 0) return;"
1483 NL, ""
1484 NL, " void (^kernelBlock)(void) = ^{ block_fn(level, maxGlobalWorkSize, rnd, res); };"
1485 NL, " uint wg = get_kernel_work_group_size(kernelBlock);"
1486 NL, ""
1487 NL, " ndrange_t ndrange;"
1488 NL, " switch((linearId + level) % 3)"
1489 NL, " {"
1490 NL, " case 0:"
1491 NL, " {"
1492 NL, " const size_t gs = 8 * 8 * 8;"
1493 NL, " size_t ls = rnd[tidX % maxGlobalWorkSize] % wg % gs;"
1494 NL, " ls = ls? ls: 1;"
1495 NL, " ndrange = ndrange_1D(gs, ls);"
1496 NL, " }"
1497 NL, " break;"
1498 NL, " case 1:"
1499 NL, " {"
1500 NL, " const size_t gs[] = { 8, 8 * 8 };"
1501 NL, " size_t ls[] = { 1, rnd[tidY % maxGlobalWorkSize] % wg % gs[1] };"
1502 NL, " ls[1] = ls[1]? ls[1]: 1;"
1503 NL, " ndrange = ndrange_2D(gs, ls);"
1504 NL, " }"
1505 NL, " break;"
1506 NL, " case 2:"
1507 NL, " {"
1508 NL, " const size_t gs[] = { 8, 8, 8 };"
1509 NL, " size_t ls[] = { 1, 1, rnd[tidZ % maxGlobalWorkSize] % wg % gs[2] };"
1510 NL, " ls[2] = ls[2]? ls[2]: 1;"
1511 NL, " ndrange = ndrange_3D(gs, ls);"
1512 NL, " }"
1513 NL, " break;"
1514 NL, " default:"
1515 NL, " break;"
1516 NL, " }"
1517 NL, ""
1518 NL, " // All work-items enqueues nested blocks with different levels"
1519 NL, " atomic_inc(&res[linearId % maxGlobalWorkSize]);"
1520 NL, " if(level >= tidX && level >= tidY && level >= tidZ)"
1521 NL, " {"
1522 NL, " int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);"
1523 NL, " if(enq_res != CLK_SUCCESS) { res[linearId % maxGlobalWorkSize] = -1; return; }"
1524 NL, " }"
1525 NL, "}"
1526 NL, ""
1527 NL, "kernel void enqueue_mix_wg_size_all_diff(__global int* res, int level, int maxGlobalWorkSize, __global int* rnd)"
1528 NL, "{"
1529 NL, " block_fn(level, maxGlobalWorkSize, rnd, res);"
1530 NL, "}"
1531 NL
1532 };
1533
generate_reference_results_all_diff_mix(std::vector<cl_int> & referenceResults,cl_int len,cl_int level,cl_int dim)1534 void generate_reference_results_all_diff_mix(std::vector<cl_int> &referenceResults, cl_int len, cl_int level, cl_int dim)
1535 {
1536 size_t globalSizeX = 1, globalSizeY = 1, globalSizeZ = 1;
1537 switch (dim)
1538 {
1539 case 0:
1540 globalSizeX = (level == nestingLevel) ? len: (8 * 8 * 8);
1541 break;
1542 case 1:
1543 globalSizeX = 8;
1544 globalSizeY = 8 * 8;
1545 break;
1546 case 2:
1547 globalSizeX = 8;
1548 globalSizeY = 8;
1549 globalSizeZ = 8;
1550 break;
1551 default:
1552 break;
1553 }
1554
1555 if(--level < 0)
1556 {
1557 return;
1558 }
1559
1560 for (size_t tidZ = 0; tidZ < globalSizeZ; ++tidZ)
1561 {
1562 for (size_t tidY = 0; tidY < globalSizeY; ++tidY)
1563 {
1564 for (size_t tidX = 0; tidX < globalSizeX; ++tidX)
1565 {
1566 size_t linearID = globalSizeX * globalSizeY * tidZ + globalSizeX * tidY + tidX;
1567 cl_int nextDim = (linearID + level) % 3;
1568 ++referenceResults[linearID % len];
1569 if (level >= tidX && level >= tidY && level >= tidZ)
1570 {
1571 generate_reference_results_all_diff_mix(referenceResults, len, level, nextDim);
1572 }
1573 }
1574 }
1575 }
1576 }
1577
check_all_diff_mix(cl_int * results,cl_int len,cl_int nesting_level)1578 static int check_all_diff_mix(cl_int* results, cl_int len, cl_int nesting_level)
1579 {
1580 std::vector<cl_int> referenceResults(len, 0);
1581 generate_reference_results_all_diff_mix(referenceResults, len, nesting_level, 0);
1582
1583 for(size_t i = 0; i < len; ++i)
1584 {
1585 if (results[i] != referenceResults[i])
1586 {
1587 log_error("ERROR: Kernel returned %d vs. expected %d, index: %d\n", results[i], referenceResults[i], i);
1588 return (int)i;
1589 }
1590 }
1591
1592 return -1;
1593 }
1594
1595 static const kernel_src_check sources_enqueue_wg_size[] =
1596 {
1597 { KERNEL(enqueue_1D_wg_size_single), check_single },
1598 { KERNEL(enqueue_1D_wg_size_some_eq), check_some_eq_1D },
1599 { KERNEL(enqueue_1D_wg_size_some_diff), check_some_diff_1D },
1600 { KERNEL(enqueue_1D_wg_size_all_eq), check_all_eq_1D },
1601 { KERNEL(enqueue_1D_wg_size_all_diff), check_all_diff_1D },
1602
1603 { KERNEL(enqueue_2D_wg_size_single), check_single },
1604 { KERNEL(enqueue_2D_wg_size_some_eq), check_some_eq_2D },
1605 { KERNEL(enqueue_2D_wg_size_some_diff), check_some_diff_2D },
1606 { KERNEL(enqueue_2D_wg_size_all_eq), check_all_eq_2D },
1607 { KERNEL(enqueue_2D_wg_size_all_diff), check_all_diff_2D },
1608
1609 { KERNEL(enqueue_3D_wg_size_single), check_single },
1610 { KERNEL(enqueue_3D_wg_size_some_eq), check_some_eq_3D },
1611 { KERNEL(enqueue_3D_wg_size_some_diff), check_some_diff_3D },
1612 { KERNEL(enqueue_3D_wg_size_all_eq), check_all_eq_3D },
1613 { KERNEL(enqueue_3D_wg_size_all_diff), check_all_diff_3D },
1614
1615 { KERNEL(enqueue_mix_wg_size_single), check_single },
1616 { KERNEL(enqueue_mix_wg_size_some_eq), check_some_eq_mix },
1617 { KERNEL(enqueue_mix_wg_size_some_diff), check_some_diff_mix },
1618 { KERNEL(enqueue_mix_wg_size_all_eq), check_all_eq_mix },
1619 { KERNEL(enqueue_mix_wg_size_all_diff), check_all_diff_mix }
1620 };
1621
test_enqueue_wg_size(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)1622 int test_enqueue_wg_size(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
1623 {
1624 MTdata d;
1625 cl_uint i, k;
1626 cl_int err_ret, res = 0;
1627 clCommandQueueWrapper dev_queue;
1628 const cl_int MAX_GLOBAL_WORK_SIZE = MAX_GWS / 4;
1629 cl_int kernel_results[MAX_GLOBAL_WORK_SIZE] = { 0 };
1630 cl_uint vrnd[MAX_GLOBAL_WORK_SIZE] = { 0 };
1631
1632 size_t ret_len;
1633 cl_uint max_queues = 1;
1634 cl_uint maxQueueSize = 0;
1635 d = init_genrand(gRandomSeed);
1636
1637 if(gWimpyMode)
1638 {
1639 nestingLevel = 2;
1640 vlog( "*** WARNING: Testing in Wimpy mode! ***\n" );
1641 vlog( "*** Wimpy mode is not sufficient to verify correctness. ***\n" );
1642 }
1643
1644 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, sizeof(maxQueueSize), &maxQueueSize, 0);
1645 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
1646
1647 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_ON_DEVICE_QUEUES, sizeof(max_queues), &max_queues, &ret_len);
1648 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_ON_DEVICE_QUEUES) failed");
1649
1650 size_t max_local_size = 1;
1651 err_ret = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(max_local_size), &max_local_size, &ret_len);
1652 test_error(err_ret, "clGetDeviceInfo(CL_DEVICE_MAX_WORK_GROUP_SIZE) failed");
1653
1654 cl_queue_properties queue_prop_def[] =
1655 {
1656 CL_QUEUE_PROPERTIES, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE|CL_QUEUE_ON_DEVICE|CL_QUEUE_ON_DEVICE_DEFAULT,
1657 CL_QUEUE_SIZE, maxQueueSize,
1658 0
1659 };
1660
1661 dev_queue = clCreateCommandQueueWithProperties(context, device, queue_prop_def, &err_ret);
1662 test_error(err_ret, "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_DEFAULT) failed");
1663
1664
1665 size_t failCnt = 0;
1666 for(k = 0; k < arr_size(sources_enqueue_wg_size); ++k)
1667 {
1668 if (!gKernelName.empty() && gKernelName != sources_enqueue_wg_size[k].src.kernel_name)
1669 continue;
1670
1671 log_info("Running '%s' kernel (%d of %d) ...\n", sources_enqueue_wg_size[k].src.kernel_name, k + 1, arr_size(sources_enqueue_wg_size));
1672 for(i = 0; i < MAX_GLOBAL_WORK_SIZE; ++i)
1673 {
1674 kernel_results[i] = 0;
1675 vrnd[i] = genrand_int32(d);
1676 }
1677
1678 // Fill some elements with prime numbers
1679 cl_uint prime[] = { 3, 5, 7, 11, 13, 17, 19, 23,
1680 29, 31, 37, 41, 43, 47, 53, 59,
1681 61, 67, 71, 73, 79, 83, 89, 97,
1682 101, 103, 107, 109, 113, 127 };
1683
1684 for(i = 0; i < arr_size(prime); ++i)
1685 {
1686 vrnd[genrand_int32(d) % MAX_GLOBAL_WORK_SIZE] = prime[i];
1687 }
1688
1689 clMemWrapper mem;
1690 mem = clCreateBuffer(context, CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, sizeof(vrnd), vrnd, &err_ret);
1691 test_error(err_ret, "clCreateBuffer() failed");
1692
1693 kernel_arg args[] =
1694 {
1695 { sizeof(cl_uint), &nestingLevel },
1696 { sizeof(cl_uint), &MAX_GLOBAL_WORK_SIZE },
1697 { sizeof(cl_mem), &mem }
1698 };
1699
1700 size_t global_size = MAX_GLOBAL_WORK_SIZE;
1701 size_t local_size = (max_local_size > global_size) ? global_size : max_local_size;
1702
1703 err_ret = run_n_kernel_args(context, queue, sources_enqueue_wg_size[k].src.lines, sources_enqueue_wg_size[k].src.num_lines, sources_enqueue_wg_size[k].src.kernel_name, local_size, global_size, kernel_results, sizeof(kernel_results), arr_size(args), args);
1704
1705 //check results
1706 int fail = sources_enqueue_wg_size[k].check(kernel_results, global_size, nestingLevel);
1707
1708 if(check_error(err_ret, "'%s' kernel execution failed", sources_enqueue_wg_size[k].src.kernel_name)) { ++failCnt; res = -1; continue; }
1709 else if(fail >= 0 && check_error(-1, "'%s' kernel results validation failed: [%d]", sources_enqueue_wg_size[k].src.kernel_name, fail)) { ++failCnt; res = -1; continue; }
1710 else log_info("'%s' kernel is OK.\n", sources_enqueue_wg_size[k].src.kernel_name);
1711 }
1712
1713 if (failCnt > 0)
1714 {
1715 log_error("ERROR: %d of %d kernels failed.\n", failCnt, arr_size(sources_enqueue_wg_size));
1716 }
1717
1718 free_mtdata(d);
1719
1720 return res;
1721 }
1722
1723 #endif
1724
1725