1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24
25 #include "procs.h"
26
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 " int tid = get_global_id(0);\n"
31 "\n"
32 " dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 " int tid = get_global_id(0);\n"
39 "\n"
40 " dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 " int tid = get_global_id(0);\n"
47 "\n"
48 " dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50
51
52 static const float MAX_ERR = 1e-5f;
53
54 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)55 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
56 {
57 float r;
58 int i;
59
60 float * reference_ptr = (float *)malloc(n * sizeof(float));
61
62 for (i=0; i<n; i++)
63 {
64 reference_ptr[i] = inptrA[i] + inptrB[i];
65 }
66
67 for (i=0; i<n; i++)
68 {
69 if (reference_ptr[i] != outptr[i])
70 {
71 log_error("FP_ADD float test failed\n");
72 return -1;
73 }
74 }
75
76 free(reference_ptr);
77
78 log_info("FP_ADD float test passed\n");
79 return 0;
80 }
81
82 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)83 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
84 {
85 float r;
86 int i;
87
88 float * reference_ptr = (float *)malloc(n * sizeof(float));
89
90 for (i=0; i<n; i++)
91 {
92 reference_ptr[i] = inptrA[i] - inptrB[i];
93 }
94
95 for (i=0; i<n; i++)
96 {
97 if (reference_ptr[i] != outptr[i])
98 {
99 log_error("FP_SUB float test failed\n");
100 return -1;
101 }
102 }
103
104 free(reference_ptr);
105
106 log_info("FP_SUB float test passed\n");
107 return 0;
108 }
109
110 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)111 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
112 {
113 float r;
114 int i;
115
116 float * reference_ptr = (float *)malloc(n * sizeof(float));
117
118 for (i=0; i<n; i++)
119 {
120 reference_ptr[i] = inptrA[i] * inptrB[i];
121 }
122
123 for (i=0; i<n; i++)
124 {
125 if (reference_ptr[i] != outptr[i])
126 {
127 log_error("FP_MUL float test failed\n");
128 return -1;
129 }
130 }
131
132 free(reference_ptr);
133
134 log_info("FP_MUL float test passed\n");
135 return 0;
136 }
137
138 #if defined( __APPLE__ )
139
test_queue_priority(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)140 int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
141 {
142 int err;
143 int command_queue_priority = 0;
144 int command_queue_select_compute_units = 0;
145
146 cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
147 int idx = 2;
148
149 // Check to see if queue priority is supported
150 if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
151 {
152 log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
153 }
154
155 // Check to see if selecting the number of compute units is supported
156 if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
157 {
158 log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
159 }
160
161 // If neither extension is supported, skip the test
162 if (!command_queue_priority && !command_queue_select_compute_units)
163 return 0;
164
165 // Setup the queue properties
166 #ifdef cl_APPLE_command_queue_priority
167 if (command_queue_priority) {
168 queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
169 queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
170 }
171 #endif
172
173 #ifdef cl_APPLE_command_queue_select_compute_units
174 // Check the number of compute units on the device
175 cl_uint num_compute_units = 0;
176 err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
177 if (err) {
178 log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
179 return -1;
180 }
181
182 if (command_queue_select_compute_units) {
183 queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
184 queue_properties[idx++] = num_compute_units/2;
185 }
186 #endif
187 queue_properties[idx++] = 0;
188
189 // Create the command queue
190 cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
191 if (err) {
192 log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
193 return -1;
194 }
195
196 // Test the command queue
197 cl_mem streams[4];
198 cl_program program[3];
199 cl_kernel kernel[3];
200 cl_event marker_event;
201
202 float *input_ptr[3], *output_ptr, *p;
203 size_t threads[1];
204 int i;
205 MTdata d = init_genrand( gRandomSeed );
206 size_t length = sizeof(cl_float) * num_elements;
207 int isRTZ = 0;
208 RoundingMode oldMode = kDefaultRoundingMode;
209
210 // check for floating point capabilities
211 cl_device_fp_config single_config = 0;
212 err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
213 if (err) {
214 log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
215 return -1;
216 }
217 //If we only support rtz mode
218 if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
219 {
220 //Check to make sure we are an embedded device
221 char profile[32];
222 err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
223 if( err )
224 {
225 log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
226 return -1;
227 }
228 if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
229 {
230 log_error( "FAILURE: Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
231 return -1;
232 }
233
234 isRTZ = 1;
235 oldMode = get_round();
236 }
237
238 input_ptr[0] = (cl_float *)malloc(length);
239 input_ptr[1] = (cl_float *)malloc(length);
240 input_ptr[2] = (cl_float *)malloc(length);
241 output_ptr = (cl_float *)malloc(length);
242
243 streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
244 test_error( err, "clCreateBuffer failed.");
245 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
246 test_error( err, "clCreateBuffer failed.");
247 streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
248 test_error( err, "clCreateBuffer failed.");
249 streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
250 test_error( err, "clCreateBuffer failed.");
251
252 p = input_ptr[0];
253 for (i=0; i<num_elements; i++)
254 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
255 p = input_ptr[1];
256 for (i=0; i<num_elements; i++)
257 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
258 p = input_ptr[2];
259 for (i=0; i<num_elements; i++)
260 p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
261
262 err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
263 test_error( err, "clEnqueueWriteBuffer failed.");
264
265 err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
266 test_error( err, "clEnqueueWriteBuffer failed.");
267
268 err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
269 test_error( err, "clEnqueueWriteBuffer failed.");
270
271 err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
272 test_error( err, "clEnqueueMarkerWithWaitList failed.");
273 clFlush(queue);
274
275 err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
276 test_error( err, "create_single_kernel_helper failed");
277
278 err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
279 test_error( err, "create_single_kernel_helper failed");
280
281 err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
282 test_error( err, "create_single_kernel_helper failed");
283
284
285 err = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
286 err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
287 err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
288 test_error( err, "clSetKernelArgs failed.");
289
290 err = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
291 err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
292 err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
293 test_error( err, "clSetKernelArgs failed.");
294
295 err = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
296 err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
297 err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
298 test_error( err, "clSetKernelArgs failed.");
299
300 threads[0] = (unsigned int)num_elements;
301 for (i=0; i<3; i++)
302 {
303 err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
304 test_error( err, "clEnqueueNDRangeKernel failed.");
305
306 err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
307 test_error( err, "clEnqueueReadBuffer failed.");
308
309 if( isRTZ )
310 set_round( kRoundTowardZero, kfloat );
311
312 switch (i)
313 {
314 case 0:
315 err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
316 break;
317 case 1:
318 err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
319 break;
320 case 2:
321 err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
322 break;
323 }
324
325 if( isRTZ )
326 set_round( oldMode, kfloat );
327 }
328
329 // cleanup
330 clReleaseCommandQueue(background_queue);
331 clReleaseEvent(marker_event);
332 clReleaseMemObject(streams[0]);
333 clReleaseMemObject(streams[1]);
334 clReleaseMemObject(streams[2]);
335 clReleaseMemObject(streams[3]);
336 for (i=0; i<3; i++)
337 {
338 clReleaseKernel(kernel[i]);
339 clReleaseProgram(program[i]);
340 }
341 free(input_ptr[0]);
342 free(input_ptr[1]);
343 free(input_ptr[2]);
344 free(output_ptr);
345 free_mtdata( d );
346
347 return err;
348 }
349
350
351
352 #endif
353
354