1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <string.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include "harness/rounding_mode.h"
24 
25 #include "procs.h"
26 
27 static const char *fpadd_kernel_code =
28 "__kernel void test_fpadd(__global float *srcA, __global float *srcB, __global float *dst)\n"
29 "{\n"
30 "    int  tid = get_global_id(0);\n"
31 "\n"
32 "    dst[tid] = srcA[tid] + srcB[tid];\n"
33 "}\n";
34 
35 static const char *fpsub_kernel_code =
36 "__kernel void test_fpsub(__global float *srcA, __global float *srcB, __global float *dst)\n"
37 "{\n"
38 "    int  tid = get_global_id(0);\n"
39 "\n"
40 "    dst[tid] = srcA[tid] - srcB[tid];\n"
41 "}\n";
42 
43 static const char *fpmul_kernel_code =
44 "__kernel void test_fpmul(__global float *srcA, __global float *srcB, __global float *dst)\n"
45 "{\n"
46 "    int  tid = get_global_id(0);\n"
47 "\n"
48 "    dst[tid] = srcA[tid] * srcB[tid];\n"
49 "}\n";
50 
51 
52 static const float    MAX_ERR = 1e-5f;
53 
54 static int
verify_fpadd(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)55 verify_fpadd(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
56 {
57     float       r;
58     int         i;
59 
60     float * reference_ptr = (float *)malloc(n * sizeof(float));
61 
62     for (i=0; i<n; i++)
63     {
64         reference_ptr[i] = inptrA[i] + inptrB[i];
65     }
66 
67     for (i=0; i<n; i++)
68     {
69         if (reference_ptr[i] != outptr[i])
70         {
71             log_error("FP_ADD float test failed\n");
72             return -1;
73         }
74     }
75 
76     free(reference_ptr);
77 
78     log_info("FP_ADD float test passed\n");
79     return 0;
80 }
81 
82 static int
verify_fpsub(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)83 verify_fpsub(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
84 {
85     float       r;
86     int         i;
87 
88     float * reference_ptr = (float *)malloc(n * sizeof(float));
89 
90     for (i=0; i<n; i++)
91     {
92         reference_ptr[i] = inptrA[i] - inptrB[i];
93     }
94 
95     for (i=0; i<n; i++)
96     {
97         if (reference_ptr[i] != outptr[i])
98         {
99             log_error("FP_SUB float test failed\n");
100             return -1;
101         }
102     }
103 
104     free(reference_ptr);
105 
106     log_info("FP_SUB float test passed\n");
107     return 0;
108 }
109 
110 static int
verify_fpmul(float * inptrA,float * inptrB,float * outptr,int n,int fileNum)111 verify_fpmul(float *inptrA, float *inptrB, float *outptr, int n, int fileNum)
112 {
113     float       r;
114     int         i;
115 
116     float * reference_ptr = (float *)malloc(n * sizeof(float));
117 
118     for (i=0; i<n; i++)
119     {
120         reference_ptr[i] = inptrA[i] * inptrB[i];
121     }
122 
123     for (i=0; i<n; i++)
124     {
125         if (reference_ptr[i] != outptr[i])
126         {
127             log_error("FP_MUL float test failed\n");
128             return -1;
129         }
130     }
131 
132     free(reference_ptr);
133 
134     log_info("FP_MUL float test passed\n");
135     return 0;
136 }
137 
138 #if defined( __APPLE__ )
139 
test_queue_priority(cl_device_id device,cl_context context,cl_command_queue queue,int num_elements)140 int test_queue_priority(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements)
141 {
142   int err;
143   int command_queue_priority = 0;
144   int command_queue_select_compute_units = 0;
145 
146   cl_queue_properties queue_properties[] = { CL_QUEUE_PROPERTIES, 0, 0, 0, 0, 0, 0 };
147   int idx = 2;
148 
149   // Check to see if queue priority is supported
150   if (((command_queue_priority = is_extension_available(device, "cl_APPLE_command_queue_priority"))) == 0)
151   {
152     log_info("cl_APPLE_command_queue_priority extension is not supported - skipping test\n");
153   }
154 
155   // Check to see if selecting the number of compute units is supported
156   if (((command_queue_select_compute_units = is_extension_available(device, "cl_APPLE_command_queue_select_compute_units"))) == 0)
157   {
158     log_info("cl_APPLE_command_queue_select_compute_units extension is not supported - skipping test\n");
159   }
160 
161   // If neither extension is supported, skip the test
162   if (!command_queue_priority && !command_queue_select_compute_units)
163     return 0;
164 
165   // Setup the queue properties
166 #ifdef cl_APPLE_command_queue_priority
167   if (command_queue_priority) {
168     queue_properties[idx++] = CL_QUEUE_PRIORITY_APPLE;
169     queue_properties[idx++] = CL_QUEUE_PRIORITY_BACKGROUND_APPLE;
170   }
171 #endif
172 
173 #ifdef cl_APPLE_command_queue_select_compute_units
174   // Check the number of compute units on the device
175   cl_uint num_compute_units = 0;
176   err = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof( num_compute_units ), &num_compute_units, NULL );
177   if (err) {
178     log_error("clGetDeviceInfo for CL_DEVICE_MAX_COMPUTE_UNITS failed: %d", err);
179     return -1;
180   }
181 
182   if (command_queue_select_compute_units) {
183     queue_properties[idx++] = CL_QUEUE_NUM_COMPUTE_UNITS_APPLE;
184     queue_properties[idx++] = num_compute_units/2;
185   }
186 #endif
187   queue_properties[idx++] = 0;
188 
189   // Create the command queue
190   cl_command_queue background_queue = clCreateCommandQueueWithProperties(context, device, queue_properties, &err);
191   if (err) {
192     log_error("clCreateCommandQueueWithPropertiesAPPLE failed: %d", err);
193     return -1;
194   }
195 
196   // Test the command queue
197   cl_mem streams[4];
198     cl_program program[3];
199     cl_kernel kernel[3];
200   cl_event marker_event;
201 
202   float *input_ptr[3], *output_ptr, *p;
203     size_t threads[1];
204     int i;
205   MTdata d = init_genrand( gRandomSeed );
206   size_t length = sizeof(cl_float) * num_elements;
207   int isRTZ = 0;
208   RoundingMode oldMode = kDefaultRoundingMode;
209 
210   // check for floating point capabilities
211   cl_device_fp_config single_config = 0;
212   err = clGetDeviceInfo( device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof( single_config ), &single_config, NULL );
213   if (err) {
214     log_error("clGetDeviceInfo for CL_DEVICE_SINGLE_FP_CONFIG failed: %d", err);
215     return -1;
216   }
217   //If we only support rtz mode
218   if( CL_FP_ROUND_TO_ZERO == ( single_config & (CL_FP_ROUND_TO_ZERO|CL_FP_ROUND_TO_NEAREST) ) )
219   {
220     //Check to make sure we are an embedded device
221     char profile[32];
222     err = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL);
223     if( err )
224     {
225       log_error("clGetDeviceInfo for CL_DEVICE_PROFILE failed: %d", err);
226       return -1;
227     }
228     if( 0 != strcmp( profile, "EMBEDDED_PROFILE"))
229     {
230       log_error( "FAILURE:  Device doesn't support CL_FP_ROUND_TO_NEAREST and isn't EMBEDDED_PROFILE\n" );
231       return -1;
232     }
233 
234     isRTZ = 1;
235     oldMode = get_round();
236   }
237 
238   input_ptr[0] = (cl_float *)malloc(length);
239   input_ptr[1] = (cl_float *)malloc(length);
240   input_ptr[2] = (cl_float *)malloc(length);
241   output_ptr = (cl_float *)malloc(length);
242 
243   streams[0] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
244   test_error( err, "clCreateBuffer failed.");
245   streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
246   test_error( err, "clCreateBuffer failed.");
247   streams[2] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
248   test_error( err, "clCreateBuffer failed.");
249   streams[3] = clCreateBuffer(context, CL_MEM_READ_WRITE, length, NULL, &err);
250   test_error( err, "clCreateBuffer failed.");
251 
252   p = input_ptr[0];
253   for (i=0; i<num_elements; i++)
254     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
255   p = input_ptr[1];
256   for (i=0; i<num_elements; i++)
257     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
258   p = input_ptr[2];
259   for (i=0; i<num_elements; i++)
260     p[i] = get_random_float(-MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), MAKE_HEX_FLOAT(0x1.0p31f, 0x1, 31), d);
261 
262   err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
263   test_error( err, "clEnqueueWriteBuffer failed.");
264 
265   err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
266   test_error( err, "clEnqueueWriteBuffer failed.");
267 
268   err = clEnqueueWriteBuffer(queue, streams[2], CL_TRUE, 0, length, input_ptr[2], 0, NULL, NULL);
269   test_error( err, "clEnqueueWriteBuffer failed.");
270 
271   err = clEnqueueMarkerWithWaitList(queue, 0, NULL, &marker_event);
272   test_error( err, "clEnqueueMarkerWithWaitList failed.");
273   clFlush(queue);
274 
275   err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &fpadd_kernel_code, "test_fpadd");
276   test_error( err, "create_single_kernel_helper failed");
277 
278   err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &fpsub_kernel_code, "test_fpsub");
279   test_error( err, "create_single_kernel_helper failed");
280 
281   err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &fpmul_kernel_code, "test_fpmul");
282   test_error( err, "create_single_kernel_helper failed");
283 
284 
285   err  = clSetKernelArg(kernel[0], 0, sizeof streams[0], &streams[0]);
286   err |= clSetKernelArg(kernel[0], 1, sizeof streams[1], &streams[1]);
287   err |= clSetKernelArg(kernel[0], 2, sizeof streams[3], &streams[3]);
288   test_error( err, "clSetKernelArgs failed.");
289 
290   err  = clSetKernelArg(kernel[1], 0, sizeof streams[0], &streams[0]);
291   err |= clSetKernelArg(kernel[1], 1, sizeof streams[1], &streams[1]);
292   err |= clSetKernelArg(kernel[1], 2, sizeof streams[3], &streams[3]);
293   test_error( err, "clSetKernelArgs failed.");
294 
295   err  = clSetKernelArg(kernel[2], 0, sizeof streams[0], &streams[0]);
296   err |= clSetKernelArg(kernel[2], 1, sizeof streams[1], &streams[1]);
297   err |= clSetKernelArg(kernel[2], 2, sizeof streams[3], &streams[3]);
298   test_error( err, "clSetKernelArgs failed.");
299 
300   threads[0] = (unsigned int)num_elements;
301   for (i=0; i<3; i++)
302   {
303     err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 1, &marker_event, NULL);
304     test_error( err, "clEnqueueNDRangeKernel failed.");
305 
306     err = clEnqueueReadBuffer(queue, streams[3], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
307     test_error( err, "clEnqueueReadBuffer failed.");
308 
309     if( isRTZ )
310       set_round( kRoundTowardZero, kfloat );
311 
312     switch (i)
313     {
314       case 0:
315         err = verify_fpadd(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
316         break;
317       case 1:
318         err = verify_fpsub(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
319         break;
320       case 2:
321         err = verify_fpmul(input_ptr[0], input_ptr[1], output_ptr, num_elements, i);
322         break;
323     }
324 
325     if( isRTZ )
326       set_round( oldMode, kfloat );
327   }
328 
329   // cleanup
330   clReleaseCommandQueue(background_queue);
331   clReleaseEvent(marker_event);
332   clReleaseMemObject(streams[0]);
333   clReleaseMemObject(streams[1]);
334   clReleaseMemObject(streams[2]);
335   clReleaseMemObject(streams[3]);
336   for (i=0; i<3; i++)
337   {
338     clReleaseKernel(kernel[i]);
339     clReleaseProgram(program[i]);
340   }
341   free(input_ptr[0]);
342   free(input_ptr[1]);
343   free(input_ptr[2]);
344   free(output_ptr);
345   free_mtdata( d );
346 
347   return err;
348 }
349 
350 
351 
352 #endif
353 
354