1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "../../test_common/harness/compat.h"
17
18 #include <algorithm>
19 #include <stdio.h>
20 #include <stdlib.h>
21 #include <string.h>
22 #include <sys/stat.h>
23 #include <sys/types.h>
24
25 #include "../../test_common/harness/conversions.h"
26 #include "procs.h"
27
28 static const char *async_global_to_local_kernel3D =
29 "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
30 "%s\n" // optional pragma string
31 "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
32 "%s *localBuffer, int numElementsPerLine, int numLines, int "
33 "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
34 "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
35 "{\n"
36 " int i, j, k;\n"
37 // Zero the local storage first
38 " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
39 " for(j=0; j<numLines; j++)\n"
40 " for(k=0; k<numElementsPerLine; k++)\n"
41 " localBuffer[ (get_local_id( 0 "
42 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
43 "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
44 "dstLineStride) + k ] = (%s)(%s)0;\n"
45 // Do this to verify all kernels are done zeroing the local buffer before we
46 // try the copy
47 " barrier( CLK_LOCAL_MEM_FENCE );\n"
48 " event_t event;\n"
49 " event = async_work_group_copy_3D3D( (__local %s*)localBuffer, "
50 "(__global const "
51 "%s*)(src+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
52 "numElementsPerLine + numLines*srcLineStride + srcPlaneStride)), "
53 "(size_t)numElementsPerLine, (size_t)numLines, srcLineStride, "
54 "dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, dstPlaneStride, "
55 "0 );\n"
56 // Wait for the copy to complete, then verify by manually copying to the
57 // dest
58 " wait_group_events( 1, &event );\n"
59 " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
60 " for(j=0; j<numLines; j++)\n"
61 " for(k=0; k<numElementsPerLine; k++)\n"
62 " dst[ (get_global_id( 0 "
63 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
64 "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
65 "dstLineStride) + k ] = localBuffer[ (get_local_id( 0 "
66 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
67 "numLines*dstLineStride + dstPlaneStride) + j*(numElementsPerLine + "
68 "dstLineStride) + k ];\n"
69 "}\n";
70
71 static const char *async_local_to_global_kernel3D =
72 "#pragma OPENCL EXTENSION cl_khr_extended_async_copies : enable\n"
73 "%s\n" // optional pragma string
74 "__kernel void test_fn( const __global %s *src, __global %s *dst, __local "
75 "%s *localBuffer, int numElementsPerLine, int numLines, int "
76 "planesCopiesPerWorkgroup, int planesCopiesPerWorkItem, int srcLineStride, "
77 "int dstLineStride, int srcPlaneStride, int dstPlaneStride )\n"
78 "{\n"
79 " int i, j, k;\n"
80 // Zero the local storage first
81 " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
82 " for(j=0; j<numLines; j++)\n"
83 " for(k=0; k<numElementsPerLine; k++)\n"
84 " localBuffer[ (get_local_id( 0 "
85 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
86 "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
87 "srcLineStride) + k ] = (%s)(%s)0;\n"
88 // Do this to verify all kernels are done zeroing the local buffer before we
89 // try the copy
90 " barrier( CLK_LOCAL_MEM_FENCE );\n"
91 " for(i=0; i<planesCopiesPerWorkItem; i++)\n"
92 " for(j=0; j<numLines; j++)\n"
93 " for(k=0; k<numElementsPerLine; k++)\n"
94 " localBuffer[ (get_local_id( 0 "
95 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
96 "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
97 "srcLineStride) + k ] = src[ (get_global_id( 0 "
98 ")*planesCopiesPerWorkItem+i)*(numLines*numElementsPerLine + "
99 "numLines*srcLineStride + srcPlaneStride) + j*(numElementsPerLine + "
100 "srcLineStride) + k ];\n"
101 // Do this to verify all kernels are done copying to the local buffer before
102 // we try the copy
103 " barrier( CLK_LOCAL_MEM_FENCE );\n"
104 " event_t event;\n"
105 " event = async_work_group_copy_3D3D((__global "
106 "%s*)(dst+planesCopiesPerWorkgroup*get_group_id(0)*(numLines*"
107 "numElementsPerLine + numLines*dstLineStride + dstPlaneStride)), (__local "
108 "const %s*)localBuffer, (size_t)numElementsPerLine, (size_t)numLines, "
109 "srcLineStride, dstLineStride, planesCopiesPerWorkgroup, srcPlaneStride, "
110 "dstPlaneStride, 0 );\n"
111 " wait_group_events( 1, &event );\n"
112 "}\n";
113
test_copy3D(cl_device_id deviceID,cl_context context,cl_command_queue queue,const char * kernelCode,ExplicitType vecType,int vecSize,int srcLineStride,int dstLineStride,int srcPlaneStride,int dstPlaneStride,bool localIsDst)114 int test_copy3D(cl_device_id deviceID, cl_context context,
115 cl_command_queue queue, const char *kernelCode,
116 ExplicitType vecType, int vecSize, int srcLineStride,
117 int dstLineStride, int srcPlaneStride, int dstPlaneStride,
118 bool localIsDst)
119 {
120 int error;
121 clProgramWrapper program;
122 clKernelWrapper kernel;
123 clMemWrapper streams[2];
124 size_t threads[1], localThreads[1];
125 void *inBuffer, *outBuffer, *outBufferCopy;
126 MTdata d;
127 char vecNameString[64];
128 vecNameString[0] = 0;
129 if (vecSize == 1)
130 sprintf(vecNameString, "%s", get_explicit_type_name(vecType));
131 else
132 sprintf(vecNameString, "%s%d", get_explicit_type_name(vecType),
133 vecSize);
134
135 size_t elementSize = get_explicit_type_size(vecType) * vecSize;
136 log_info("Testing %s with srcLineStride = %d, dstLineStride = %d, "
137 "srcPlaneStride = %d, dstPlaneStride = %d\n",
138 vecNameString, srcLineStride, dstLineStride, srcPlaneStride,
139 dstPlaneStride);
140
141 cl_long max_local_mem_size;
142 error =
143 clGetDeviceInfo(deviceID, CL_DEVICE_LOCAL_MEM_SIZE,
144 sizeof(max_local_mem_size), &max_local_mem_size, NULL);
145 test_error(error, "clGetDeviceInfo for CL_DEVICE_LOCAL_MEM_SIZE failed.");
146
147 cl_long max_global_mem_size;
148 error = clGetDeviceInfo(deviceID, CL_DEVICE_GLOBAL_MEM_SIZE,
149 sizeof(max_global_mem_size), &max_global_mem_size,
150 NULL);
151 test_error(error, "clGetDeviceInfo for CL_DEVICE_GLOBAL_MEM_SIZE failed.");
152
153 cl_long max_alloc_size;
154 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
155 sizeof(max_alloc_size), &max_alloc_size, NULL);
156 test_error(error,
157 "clGetDeviceInfo for CL_DEVICE_MAX_MEM_ALLOC_SIZE failed.");
158
159 if (max_alloc_size > max_global_mem_size / 2)
160 max_alloc_size = max_global_mem_size / 2;
161
162 unsigned int num_of_compute_devices;
163 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_COMPUTE_UNITS,
164 sizeof(num_of_compute_devices),
165 &num_of_compute_devices, NULL);
166 test_error(error,
167 "clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed.");
168
169 char programSource[4096];
170 programSource[0] = 0;
171 char *programPtr;
172
173 sprintf(programSource, kernelCode,
174 vecType == kDouble ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable"
175 : "",
176 vecNameString, vecNameString, vecNameString, vecNameString,
177 get_explicit_type_name(vecType), vecNameString, vecNameString);
178 // log_info("program: %s\n", programSource);
179 programPtr = programSource;
180
181 error = create_single_kernel_helper(context, &program, &kernel, 1,
182 (const char **)&programPtr, "test_fn");
183 test_error(error, "Unable to create testing kernel");
184
185 size_t max_workgroup_size;
186 error = clGetKernelWorkGroupInfo(
187 kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof(max_workgroup_size),
188 &max_workgroup_size, NULL);
189 test_error(
190 error,
191 "clGetKernelWorkGroupInfo failed for CL_KERNEL_WORK_GROUP_SIZE.");
192
193 size_t max_local_workgroup_size[3];
194 error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES,
195 sizeof(max_local_workgroup_size),
196 max_local_workgroup_size, NULL);
197 test_error(error,
198 "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
199
200 // Pick the minimum of the device and the kernel
201 if (max_workgroup_size > max_local_workgroup_size[0])
202 max_workgroup_size = max_local_workgroup_size[0];
203
204 size_t numElementsPerLine = 10;
205 size_t numLines = 13;
206 size_t planesCopiesPerWorkItem = 2;
207 elementSize =
208 get_explicit_type_size(vecType) * ((vecSize == 3) ? 4 : vecSize);
209 size_t localStorageSpacePerWorkitem = elementSize
210 * (planesCopiesPerWorkItem
211 * (numLines * numElementsPerLine
212 + numLines * (localIsDst ? dstLineStride : srcLineStride)
213 + (localIsDst ? dstPlaneStride : srcPlaneStride)));
214 size_t maxLocalWorkgroupSize =
215 (((int)max_local_mem_size / 2) / localStorageSpacePerWorkitem);
216
217 // Calculation can return 0 on embedded devices due to 1KB local mem limit
218 if (maxLocalWorkgroupSize == 0)
219 {
220 maxLocalWorkgroupSize = 1;
221 }
222
223 size_t localWorkgroupSize = maxLocalWorkgroupSize;
224 if (maxLocalWorkgroupSize > max_workgroup_size)
225 localWorkgroupSize = max_workgroup_size;
226
227 size_t maxTotalPlanesIn = ((max_alloc_size / elementSize) + srcPlaneStride)
228 / ((numLines * numElementsPerLine + numLines * srcLineStride)
229 + srcPlaneStride);
230 size_t maxTotalPlanesOut = ((max_alloc_size / elementSize) + dstPlaneStride)
231 / ((numLines * numElementsPerLine + numLines * dstLineStride)
232 + dstPlaneStride);
233 size_t maxTotalPlanes = (std::min)(maxTotalPlanesIn, maxTotalPlanesOut);
234 size_t maxLocalWorkgroups =
235 maxTotalPlanes / (localWorkgroupSize * planesCopiesPerWorkItem);
236
237 size_t localBufferSize = localWorkgroupSize * localStorageSpacePerWorkitem
238 - (localIsDst ? dstPlaneStride : srcPlaneStride);
239 size_t numberOfLocalWorkgroups = (std::min)(1111, (int)maxLocalWorkgroups);
240 size_t totalPlanes =
241 numberOfLocalWorkgroups * localWorkgroupSize * planesCopiesPerWorkItem;
242 size_t inBufferSize = elementSize
243 * (totalPlanes
244 * (numLines * numElementsPerLine + numLines * srcLineStride)
245 + (totalPlanes - 1) * srcPlaneStride);
246 size_t outBufferSize = elementSize
247 * (totalPlanes
248 * (numLines * numElementsPerLine + numLines * dstLineStride)
249 + (totalPlanes - 1) * dstPlaneStride);
250 size_t globalWorkgroupSize = numberOfLocalWorkgroups * localWorkgroupSize;
251
252 inBuffer = (void *)malloc(inBufferSize);
253 outBuffer = (void *)malloc(outBufferSize);
254 outBufferCopy = (void *)malloc(outBufferSize);
255
256 cl_int planesCopiesPerWorkItemInt, numElementsPerLineInt, numLinesInt,
257 planesCopiesPerWorkgroup;
258 planesCopiesPerWorkItemInt = (int)planesCopiesPerWorkItem;
259 numElementsPerLineInt = (int)numElementsPerLine;
260 numLinesInt = (int)numLines;
261 planesCopiesPerWorkgroup =
262 (int)(planesCopiesPerWorkItem * localWorkgroupSize);
263
264 log_info("Global: %d, local %d, local buffer %db, global in buffer %db, "
265 "global out buffer %db, each work group will copy %d planes and "
266 "each work item item will copy %d planes.\n",
267 (int)globalWorkgroupSize, (int)localWorkgroupSize,
268 (int)localBufferSize, (int)inBufferSize, (int)outBufferSize,
269 planesCopiesPerWorkgroup, planesCopiesPerWorkItemInt);
270
271 threads[0] = globalWorkgroupSize;
272 localThreads[0] = localWorkgroupSize;
273
274 d = init_genrand(gRandomSeed);
275 generate_random_data(
276 vecType, inBufferSize / get_explicit_type_size(vecType), d, inBuffer);
277 generate_random_data(
278 vecType, outBufferSize / get_explicit_type_size(vecType), d, outBuffer);
279 free_mtdata(d);
280 d = NULL;
281 memcpy(outBufferCopy, outBuffer, outBufferSize);
282
283 streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, inBufferSize,
284 inBuffer, &error);
285 test_error(error, "Unable to create input buffer");
286 streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, outBufferSize,
287 outBuffer, &error);
288 test_error(error, "Unable to create output buffer");
289
290 error = clSetKernelArg(kernel, 0, sizeof(streams[0]), &streams[0]);
291 test_error(error, "Unable to set kernel argument");
292 error = clSetKernelArg(kernel, 1, sizeof(streams[1]), &streams[1]);
293 test_error(error, "Unable to set kernel argument");
294 error = clSetKernelArg(kernel, 2, localBufferSize, NULL);
295 test_error(error, "Unable to set kernel argument");
296 error = clSetKernelArg(kernel, 3, sizeof(numElementsPerLineInt),
297 &numElementsPerLineInt);
298 test_error(error, "Unable to set kernel argument");
299 error = clSetKernelArg(kernel, 4, sizeof(numLinesInt), &numLinesInt);
300 test_error(error, "Unable to set kernel argument");
301 error = clSetKernelArg(kernel, 5, sizeof(planesCopiesPerWorkgroup),
302 &planesCopiesPerWorkgroup);
303 test_error(error, "Unable to set kernel argument");
304 error = clSetKernelArg(kernel, 6, sizeof(planesCopiesPerWorkItemInt),
305 &planesCopiesPerWorkItemInt);
306 test_error(error, "Unable to set kernel argument");
307 error = clSetKernelArg(kernel, 7, sizeof(srcLineStride), &srcLineStride);
308 test_error(error, "Unable to set kernel argument");
309 error = clSetKernelArg(kernel, 8, sizeof(dstLineStride), &dstLineStride);
310 test_error(error, "Unable to set kernel argument");
311 error = clSetKernelArg(kernel, 9, sizeof(srcPlaneStride), &srcPlaneStride);
312 test_error(error, "Unable to set kernel argument");
313 error = clSetKernelArg(kernel, 10, sizeof(dstPlaneStride), &dstPlaneStride);
314 test_error(error, "Unable to set kernel argument");
315
316 // Enqueue
317 error = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, threads,
318 localThreads, 0, NULL, NULL);
319 test_error(error, "Unable to queue kernel");
320
321 // Read
322 error = clEnqueueReadBuffer(queue, streams[1], CL_TRUE, 0, outBufferSize,
323 outBuffer, 0, NULL, NULL);
324 test_error(error, "Unable to read results");
325
326 // Verify
327 int failuresPrinted = 0;
328 // Verify
329 size_t typeSize = get_explicit_type_size(vecType) * vecSize;
330 for (int i = 0;
331 i < (int)globalWorkgroupSize * planesCopiesPerWorkItem * elementSize;
332 i += elementSize)
333 {
334 for (int j = 0; j < (int)numLines * elementSize; j += elementSize)
335 {
336 for (int k = 0; k < (int)numElementsPerLine * elementSize;
337 k += elementSize)
338 {
339 int inIdx = i
340 * (numLines * numElementsPerLine
341 + numLines * srcLineStride + srcPlaneStride)
342 + j * (numElementsPerLine + srcLineStride) + k;
343 int outIdx = i
344 * (numLines * numElementsPerLine
345 + numLines * dstLineStride + dstPlaneStride)
346 + j * (numElementsPerLine + dstLineStride) + k;
347 if (memcmp(((char *)inBuffer) + inIdx,
348 ((char *)outBuffer) + outIdx, typeSize)
349 != 0)
350 {
351 unsigned char *inchar = (unsigned char *)inBuffer + inIdx;
352 unsigned char *outchar =
353 (unsigned char *)outBuffer + outIdx;
354 char values[4096];
355 values[0] = 0;
356
357 if (failuresPrinted == 0)
358 {
359 // Print first failure message
360 log_error("ERROR: Results of copy did not validate!");
361 }
362 sprintf(values + strlen(values), "%d -> [", inIdx);
363 for (int l = 0; l < (int)elementSize; l++)
364 sprintf(values + strlen(values), "%2x ", inchar[l]);
365 sprintf(values + strlen(values), "] != [");
366 for (int l = 0; l < (int)elementSize; l++)
367 sprintf(values + strlen(values), "%2x ", outchar[l]);
368 sprintf(values + strlen(values), "]");
369 log_error("%s\n", values);
370 failuresPrinted++;
371 }
372
373 if (failuresPrinted > 5)
374 {
375 log_error("Not printing further failures...\n");
376 return -1;
377 }
378 }
379 if (j < (int)numLines * elementSize)
380 {
381 int outIdx = i
382 * (numLines * numElementsPerLine
383 + numLines * dstLineStride + dstPlaneStride)
384 + j * (numElementsPerLine + dstLineStride)
385 + numElementsPerLine * elementSize;
386 if (memcmp(((char *)outBuffer) + outIdx,
387 ((char *)outBufferCopy) + outIdx,
388 dstLineStride * elementSize)
389 != 0)
390 {
391 if (failuresPrinted == 0)
392 {
393 // Print first failure message
394 log_error("ERROR: Results of copy did not validate!\n");
395 }
396 log_error(
397 "3D copy corrupted data in output buffer in the line "
398 "stride offset of plane %d line %d\n",
399 i, j);
400 failuresPrinted++;
401 }
402 if (failuresPrinted > 5)
403 {
404 log_error("Not printing further failures...\n");
405 return -1;
406 }
407 }
408 }
409 if (i < (int)(globalWorkgroupSize * planesCopiesPerWorkItem - 1)
410 * elementSize)
411 {
412 int outIdx = i
413 * (numLines * numElementsPerLine + numLines * dstLineStride
414 + dstPlaneStride)
415 + (numLines * elementSize) * (numElementsPerLine)
416 + (numLines * elementSize) * (dstLineStride);
417 if (memcmp(((char *)outBuffer) + outIdx,
418 ((char *)outBufferCopy) + outIdx,
419 dstPlaneStride * elementSize)
420 != 0)
421 {
422 if (failuresPrinted == 0)
423 {
424 // Print first failure message
425 log_error("ERROR: Results of copy did not validate!\n");
426 }
427 log_error("3D copy corrupted data in output buffer in the "
428 "plane stride "
429 "offset of plane %d\n",
430 i);
431 failuresPrinted++;
432 }
433 if (failuresPrinted > 5)
434 {
435 log_error("Not printing further failures...\n");
436 return -1;
437 }
438 }
439 }
440
441 free(inBuffer);
442 free(outBuffer);
443 free(outBufferCopy);
444
445 return failuresPrinted ? -1 : 0;
446 }
447
test_copy3D_all_types(cl_device_id deviceID,cl_context context,cl_command_queue queue,const char * kernelCode,bool localIsDst)448 int test_copy3D_all_types(cl_device_id deviceID, cl_context context,
449 cl_command_queue queue, const char *kernelCode,
450 bool localIsDst)
451 {
452 ExplicitType vecType[] = {
453 kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong,
454 kULong, kFloat, kDouble, kNumExplicitTypes
455 };
456 unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 };
457 unsigned int smallTypesStrideSizes[] = { 0, 10, 100 };
458 unsigned int size, typeIndex, srcLineStride, dstLineStride, srcPlaneStride,
459 dstPlaneStride;
460
461 int errors = 0;
462
463 if (!is_extension_available(deviceID, "cl_khr_extended_async_copies"))
464 {
465 log_info(
466 "Device does not support extended async copies. Skipping test.\n");
467 return 0;
468 }
469
470 for (typeIndex = 0; vecType[typeIndex] != kNumExplicitTypes; typeIndex++)
471 {
472 if (vecType[typeIndex] == kDouble
473 && !is_extension_available(deviceID, "cl_khr_fp64"))
474 continue;
475
476 if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong)
477 && !gHasLong)
478 continue;
479
480 for (size = 0; vecSizes[size] != 0; size++)
481 {
482 if (get_explicit_type_size(vecType[typeIndex]) * vecSizes[size]
483 <= 2) // small type
484 {
485 for (srcLineStride = 0;
486 srcLineStride < sizeof(smallTypesStrideSizes)
487 / sizeof(smallTypesStrideSizes[0]);
488 srcLineStride++)
489 {
490 for (dstLineStride = 0;
491 dstLineStride < sizeof(smallTypesStrideSizes)
492 / sizeof(smallTypesStrideSizes[0]);
493 dstLineStride++)
494 {
495 for (srcPlaneStride = 0;
496 srcPlaneStride < sizeof(smallTypesStrideSizes)
497 / sizeof(smallTypesStrideSizes[0]);
498 srcPlaneStride++)
499 {
500 for (dstPlaneStride = 0;
501 dstPlaneStride < sizeof(smallTypesStrideSizes)
502 / sizeof(smallTypesStrideSizes[0]);
503 dstPlaneStride++)
504 {
505 if (test_copy3D(
506 deviceID, context, queue, kernelCode,
507 vecType[typeIndex], vecSizes[size],
508 smallTypesStrideSizes[srcLineStride],
509 smallTypesStrideSizes[dstLineStride],
510 smallTypesStrideSizes[srcPlaneStride],
511 smallTypesStrideSizes[dstPlaneStride],
512 localIsDst))
513 {
514 errors++;
515 }
516 }
517 }
518 }
519 }
520 }
521 // not a small type, check only zero stride
522 else if (test_copy3D(deviceID, context, queue, kernelCode,
523 vecType[typeIndex], vecSizes[size], 0, 0, 0, 0,
524 localIsDst))
525 {
526 errors++;
527 }
528 }
529 }
530 if (errors) return -1;
531 return 0;
532 }
533
test_async_copy_global_to_local3D(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)534 int test_async_copy_global_to_local3D(cl_device_id deviceID, cl_context context,
535 cl_command_queue queue, int num_elements)
536 {
537 return test_copy3D_all_types(deviceID, context, queue,
538 async_global_to_local_kernel3D, true);
539 }
540
test_async_copy_local_to_global3D(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)541 int test_async_copy_local_to_global3D(cl_device_id deviceID, cl_context context,
542 cl_command_queue queue, int num_elements)
543 {
544 return test_copy3D_all_types(deviceID, context, queue,
545 async_local_to_global_kernel3D, false);
546 }
547