1 //
2 // Copyright (c) 2020 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include <stdio.h>
17 #include <string.h>
18 #include "harness/testHarness.h"
19 #include "harness/typeWrappers.h"
20
21 #include <vector>
22
23 #include "procs.h"
24 #include "utils.h"
25 #include <time.h>
26
27 static int max_nestingLevel = 10;
28
29 static const char* enqueue_multi_level = R"(
30 void block_fn(__global int* res, int level)
31 {
32 queue_t def_q = get_default_queue();
33 if(--level < 0) return;
34 void (^kernelBlock)(void) = ^{ block_fn(res, level); };
35 ndrange_t ndrange = ndrange_1D(1);
36 int enq_res = enqueue_kernel(def_q, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, kernelBlock);
37 if(enq_res != CLK_SUCCESS) { (*res) = -1; return; }
38 else if(*res != -1) { (*res)++; }
39 }
40 kernel void enqueue_multi_level(__global int* res, int level)
41 {
42 *res = 0;
43 block_fn(res, level);
44 })";
45
test_enqueue_profiling(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)46 int test_enqueue_profiling(cl_device_id device, cl_context context,
47 cl_command_queue queue, int num_elements)
48 {
49 cl_int err_ret, res = 0;
50 clCommandQueueWrapper dev_queue;
51 clCommandQueueWrapper host_queue;
52
53 cl_uint maxQueueSize = 0;
54 err_ret = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE,
55 sizeof(maxQueueSize), &maxQueueSize, 0);
56 test_error(err_ret,
57 "clGetDeviceInfo(CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE) failed");
58
59 cl_queue_properties dev_queue_prop_def[] = {
60 CL_QUEUE_PROPERTIES,
61 CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE
62 | CL_QUEUE_ON_DEVICE_DEFAULT | CL_QUEUE_PROFILING_ENABLE,
63 CL_QUEUE_SIZE, maxQueueSize, 0
64 };
65
66 dev_queue = clCreateCommandQueueWithProperties(
67 context, device, dev_queue_prop_def, &err_ret);
68 test_error(err_ret,
69 "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
70 "DEFAULT) failed");
71
72 cl_queue_properties host_queue_prop_def[] = { CL_QUEUE_PROPERTIES,
73 CL_QUEUE_PROFILING_ENABLE,
74 0 };
75
76 host_queue = clCreateCommandQueueWithProperties(
77 context, device, host_queue_prop_def, &err_ret);
78 test_error(err_ret,
79 "clCreateCommandQueueWithProperties(CL_QUEUE_DEVICE|CL_QUEUE_"
80 "DEFAULT) failed");
81
82 cl_int status;
83 size_t size = 1;
84 cl_int result = 0;
85
86 clMemWrapper res_mem;
87 clProgramWrapper program;
88 clKernelWrapper kernel;
89
90 cl_event kernel_event;
91
92 err_ret = create_single_kernel_helper(context, &program, &kernel, 1,
93 &enqueue_multi_level,
94 "enqueue_multi_level");
95 if (check_error(err_ret, "Create single kernel failed")) return -1;
96
97 res_mem = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
98 sizeof(result), &result, &err_ret);
99 test_error(err_ret, "clCreateBuffer() failed");
100
101 err_ret = clSetKernelArg(kernel, 0, sizeof(res_mem), &res_mem);
102 test_error(err_ret, "clSetKernelArg(0) failed");
103
104 for (int level = 0; level < max_nestingLevel; level++)
105 {
106 err_ret = clSetKernelArg(kernel, 1, sizeof(level), &level);
107 test_error(err_ret, "clSetKernelArg(1) failed");
108
109 err_ret = clEnqueueNDRangeKernel(host_queue, kernel, 1, NULL, &size,
110 &size, 0, NULL, &kernel_event);
111 test_error(err_ret,
112 "clEnqueueNDRangeKernel('enqueue_multi_level') failed");
113
114 err_ret = clEnqueueReadBuffer(host_queue, res_mem, CL_TRUE, 0,
115 sizeof(result), &result, 0, NULL, NULL);
116 test_error(err_ret, "clEnqueueReadBuffer() failed");
117
118 if (result != level)
119 {
120 log_error("Kernel execution should return the maximum nesting "
121 " level (got %d instead of %d)",
122 result, level);
123 return -1;
124 }
125
126 err_ret =
127 clGetEventInfo(kernel_event, CL_EVENT_COMMAND_EXECUTION_STATUS,
128 sizeof(status), &status, NULL);
129 test_error(err_ret, "clGetEventInfo() failed");
130
131 if (check_error(status, "Kernel execution status %d", status))
132 return status;
133
134 cl_ulong end;
135 err_ret = clGetEventProfilingInfo(
136 kernel_event, CL_PROFILING_COMMAND_END, sizeof(end), &end, NULL);
137 test_error(err_ret, "clGetEventProfilingInfo() failed");
138
139 cl_ulong complete;
140 err_ret =
141 clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_COMPLETE,
142 sizeof(complete), &complete, NULL);
143 test_error(err_ret, "clGetEventProfilingInfo() failed");
144
145 if (end > complete)
146 {
147 log_error(
148 "Profiling END should be smaller than or equal to COMPLETE for "
149 "kernels that use the on-device queue");
150 return -1;
151 }
152
153 log_info("Profiling info for '%s' kernel is OK for level %d.\n",
154 "enqueue_multi_level", level);
155
156 clReleaseEvent(kernel_event);
157 }
158
159 return res;
160 }
161