1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "testBase.h"
17 #include "harness/typeWrappers.h"
18 #include "harness/conversions.h"
19 
20 const char *subgroup_dispatch_kernel[] = {
21 "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n"
22 "__kernel void subgroup_dispatch_kernel(__global int *output)\n"
23 "{\n"
24 "    size_t size = get_num_sub_groups ();\n"
25 "\n"
26 "    output[0] = size;\n"
27 "\n"
28 "}\n" };
29 
flatten_ndrange(size_t * ndrange,size_t dim)30 size_t flatten_ndrange(size_t* ndrange, size_t dim)
31 {
32     switch(dim)
33     {
34     case 1:
35         return *ndrange;
36     case 2:
37         return ndrange[0] * ndrange[1];
38     case 3:
39         return ndrange[0] * ndrange[1] * ndrange[2];
40     default:
41         log_error("ERROR: bad ndrange value");
42         return 0;
43     }
44 }
45 
get_sub_group_num(cl_command_queue queue,cl_kernel kernel,clMemWrapper & out,size_t & size,size_t local_size,size_t dim)46 cl_int get_sub_group_num(cl_command_queue queue, cl_kernel kernel, clMemWrapper& out, size_t& size, size_t local_size, size_t dim)
47 {
48     size_t ndrange[3] = {local_size, 1, 1};
49     cl_int error = CL_SUCCESS;
50     size = 0;
51     error = clSetKernelArg(kernel, 0, sizeof(out), &out);
52     error += clEnqueueNDRangeKernel(queue, kernel, dim, NULL, ndrange, ndrange, 0, NULL, NULL);
53     error += clEnqueueReadBuffer(queue, out, CL_TRUE, 0, 4, &size, 0, NULL, NULL);
54     return error;
55 }
56 
test_sub_group_dispatch(cl_device_id deviceID,cl_context context,cl_command_queue queue,int num_elements)57 int test_sub_group_dispatch(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
58 {
59     static const size_t gsize0 = 80;
60     int i, error;
61     size_t realSize;
62     size_t kernel_max_subgroup_size, kernel_subgroup_count;
63     size_t global[] = {1,1,1};
64     size_t max_local;
65 
66     cl_platform_id platform;
67     clProgramWrapper program;
68     clKernelWrapper kernel;
69     clMemWrapper out;
70 
71     size_t ret_ndrange1d;
72     size_t ret_ndrange2d[2];
73     size_t ret_ndrange3d[3];
74 
75     size_t ret_ndrange2d_flattened;
76     size_t ret_ndrange3d_flattened;
77 
78     if (get_device_cl_version(deviceID) >= Version(3, 0))
79     {
80         int error;
81         cl_uint max_num_sub_groups;
82 
83         error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_NUM_SUB_GROUPS,
84                                 sizeof(max_num_sub_groups), &max_num_sub_groups,
85                                 NULL);
86         if (error != CL_SUCCESS)
87         {
88             print_error(error, "Unable to get max num subgroups");
89             return error;
90         }
91 
92         if (max_num_sub_groups == 0)
93         {
94             return TEST_SKIPPED_ITSELF;
95         }
96     }
97 
98     error = create_single_kernel_helper(context, &program, &kernel, 1,
99                                         subgroup_dispatch_kernel,
100                                         "subgroup_dispatch_kernel");
101     if (error != 0)
102         return error;
103 
104     out = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(size_t), NULL, &error);
105     test_error(error, "clCreateBuffer failed");
106 
107     error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &max_local, NULL);
108     test_error(error, "clGetDeviceInfo failed");
109 
110 
111     error = clGetDeviceInfo(deviceID, CL_DEVICE_PLATFORM, sizeof(platform), (void *)&platform, NULL);
112     test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
113 
114     // Get the max subgroup size
115     error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE,
116             sizeof(max_local), &max_local, sizeof(kernel_max_subgroup_size), (void *)&kernel_max_subgroup_size, &realSize);
117     test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE");
118     log_info("The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_max_subgroup_size);
119 
120     if (realSize != sizeof(kernel_max_subgroup_size)) {
121         log_error( "ERROR: Returned size of max sub group size not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_max_subgroup_size), (int)realSize );
122         return -1;
123     }
124 
125     // Get the number of subgroup for max local size
126     error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE,
127             sizeof(max_local), &max_local, sizeof(kernel_subgroup_count), (void *)&kernel_subgroup_count, &realSize);
128     test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE");
129     log_info("The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n", (int)kernel_subgroup_count);
130 
131     if (realSize != sizeof(kernel_subgroup_count)) {
132         log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
133         return -1;
134     }
135 
136     // test CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT
137     for (size_t i = kernel_subgroup_count; i > 0; --i)
138     {
139         // test all 3 different dimention of requested local size
140         size_t expect_size = kernel_max_subgroup_size * i;
141         size_t kernel_ret_size = 0;
142         error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
143         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
144         if (realSize != sizeof(ret_ndrange1d)) {
145             log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
146             return -1;
147         }
148 
149         if (ret_ndrange1d != expect_size)
150         {
151             log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange1d );
152             return -1;
153         }
154 
155         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange1d, 1);
156         test_error(error, "Failed to query number of subgroups from kernel");
157         if (i != kernel_ret_size)
158         {
159             log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
160             return -1;
161         }
162 
163         error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
164         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
165         if (realSize != sizeof(ret_ndrange2d)) {
166             log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
167             return -1;
168         }
169 
170         ret_ndrange2d_flattened = flatten_ndrange(ret_ndrange2d, 2);
171         if (ret_ndrange2d_flattened != expect_size ||
172             ret_ndrange2d[1] != 1)
173         {
174             log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange2d_flattened );
175             return -1;
176         }
177 
178         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange2d_flattened, 2);
179         test_error(error, "Failed to query number of subgroups from kernel");
180         if (i != kernel_ret_size)
181         {
182             log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
183             return -1;
184         }
185 
186         error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(i), &i, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
187         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
188         if (realSize != sizeof(ret_ndrange3d)) {
189             log_error( "ERROR: Returned size of sub group count not valid! (Expected %d, got %d)\n", (int)sizeof(kernel_subgroup_count), (int)realSize );
190             return -1;
191         }
192 
193         ret_ndrange3d_flattened = flatten_ndrange(ret_ndrange3d, 3);
194         if (ret_ndrange3d_flattened != expect_size ||
195             ret_ndrange3d[1] != 1 ||
196             ret_ndrange3d[2] != 1)
197         {
198             log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", (int)expect_size, (int)ret_ndrange3d_flattened );
199             return -1;
200         }
201 
202         error = get_sub_group_num(queue, kernel, out, kernel_ret_size, ret_ndrange3d_flattened, 3);
203         test_error(error, "Failed to query number of subgroups from kernel");
204         if (i != kernel_ret_size)
205         {
206             log_error( "ERROR: Mismatch between requested number of subgroups and what get_num_sub_groups() in kernel returned! (Expected %d, got %d)\n", (int)i, (int)kernel_ret_size );
207             return -1;
208         }
209     }
210 
211     // test when input subgroup count exceeds max wg size
212     size_t large_sg_size = kernel_subgroup_count + 1;
213     error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange1d), &ret_ndrange1d, &realSize);
214         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
215     if (ret_ndrange1d != 0)
216     {
217         log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT! (Expected %d, got %d)\n", 0, (int)ret_ndrange1d );
218             return -1;
219     }
220 
221     error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange2d), ret_ndrange2d, &realSize);
222         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
223     if (ret_ndrange2d[0] != 0 ||
224         ret_ndrange2d[1] != 0)
225     {
226         log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
227             return -1;
228     }
229 
230     error = clGetKernelSubGroupInfo(kernel, deviceID, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, sizeof(size_t), &large_sg_size, sizeof(ret_ndrange3d), ret_ndrange3d, &realSize);
231         test_error(error, "clGetKernelSubGroupInfo failed for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT");
232     if (ret_ndrange3d[0] != 0 ||
233         ret_ndrange3d[1] != 0 ||
234         ret_ndrange3d[2] != 0)
235     {
236         log_error( "ERROR: Incorrect value returned for CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT!" );
237             return -1;
238     }
239 
240     return 0;
241 }
242