1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20 
21 #include <vector>
22 
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26 
27 static int max_nestingLevel = 10;
28 
29 static const char* enqueue_multi_level = R"(
30     void block_fn(__global int* res, int level)
31     {
32       queue_t def_q = get_default_queue();
33       if(--level < 0) return;
34       void (^kernelBlock)(void) = ^{ block_fn(res, level); };
35       ndrange_t ndrange = ndrange_1D(1);
36       int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
37       if(enq_res != CLK_SUCCESS) { (*res) = -1; return; }
38       else if(*res != -1) { (*res)++; }
39     }
40     kernel void enqueue_multi_level(__global int* res, int level)
41     {
42       *res = 0;
43       block_fn(res, level);
44     })";
45 
test_enqueue_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)46 int test_enqueue_profiling(cl_device_id device, cl_context context,
47                            cl_command_queue queue, int num_elements)
48 {
49     cl_int err_ret, res = 0;
50     clCommandQueueWrapper dev_queue;
51     clCommandQueueWrapper host_queue;
52 
53     cl_uint maxQueueSize = 0;
54     err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
55                               sizeof(maxQueueSize), &maxQueueSize, 0);
56     test_error(err_ret,
57                "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
58 
59     cl_queue_properties dev_queue_prop_def[] = {
60         CL_QUEUE_PROPERTIES,
61         CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
62             | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
63         CL_QUEUE_SIZE, maxQueueSize, 0
64     };
65 
66     dev_queue = clCreateCommandQueueWithProperties(
67         context, device, dev_queue_prop_def, &err_ret);
68     test_error(err_ret,
69                "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
70                "DEFAULT) failed");
71 
72     cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES,
73                                                   CL_QUEUE_PROFILING_ENABLE,
74                                                   0 };
75 
76     host_queue = clCreateCommandQueueWithProperties(
77         context, device, host_queue_prop_def, &err_ret);
78     test_error(err_ret,
79                "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
80                "DEFAULT) failed");
81 
82     cl_int status;
83     size_t size = 1;
84     cl_int result = 0;
85 
86     clMemWrapper res_mem;
87     clProgramWrapper program;
88     clKernelWrapper kernel;
89 
90     cl_event kernel_event;
91 
92     err_ret = create_single_kernel_helper(context, &program, &kernel, 1,
93                                           &enqueue_multi_level,
94                                           "enqueue_multi_level");
95     if (check_error(err_ret, "Create single kernel failed")) return -1;
96 
97     res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
98                              sizeof(result), &result, &err_ret);
99     test_error(err_ret, "clCreateBuffer() failed");
100 
101     err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem);
102     test_error(err_ret, "clSetKernelArg(0) failed");
103 
104     for (int level = 0; level < max_nestingLevel; level++)
105     {
106         err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level);
107         test_error(err_ret, "clSetKernelArg(1) failed");
108 
109         err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size,
110                                          &size, 0, NULL, &kernel_event);
111         test_error(err_ret,
112                    "clEnqueueNDRangeKernel('enqueue_multi_level') failed");
113 
114         err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0,
115                                       sizeof(result), &result, 0, NULL, NULL);
116         test_error(err_ret, "clEnqueueReadBuffer() failed");
117 
118         if (result != level)
119         {
120             log_error("Kernel execution should return the maximum nesting "
121                       " level (got %d instead of %d)",
122                       result, level);
123             return -1;
124         }
125 
126         err_ret =
127             clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS,
128                            sizeof(status), &status, NULL);
129         test_error(err_ret, "clGetEventInfo() failed");
130 
131         if (check_error(status, "Kernel execution status %d", status))
132             return status;
133 
134         cl_ulong end;
135         err_ret = clGetEventProfilingInfo(
136             kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL);
137         test_error(err_ret, "clGetEventProfilingInfo() failed");
138 
139         cl_ulong complete;
140         err_ret =
141             clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE,
142                                     sizeof(complete), &complete, NULL);
143         test_error(err_ret, "clGetEventProfilingInfo() failed");
144 
145         if (end > complete)
146         {
147             log_error(
148                 "Profiling END should be smaller than or equal to COMPLETE for "
149                 "kernels that use the on-device queue");
150             return -1;
151         }
152 
153         log_info("Profiling info for '%s' kernel is OK for level %d.\n",
154                  "enqueue_multi_level", level);
155 
156         clReleaseEvent(kernel_event);
157     }
158 
159     return res;
160 }
161