1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 #define NUM_PROGRAMS 6
26 
27 static const int vector_sizes[] = {1, 2, 3, 4, 8, 16};
28 
29 
30 const char *int_mul24_kernel_code =
31 "__kernel void test_int_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
32 "{\n"
33 "    int  tid = get_global_id(0);\n"
34 "\n"
35 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
36 "}\n";
37 
38 const char *int2_mul24_kernel_code =
39 "__kernel void test_int2_mul24(__global int2 *srcA, __global int2 *srcB, __global int2 *dst)\n"
40 "{\n"
41 "    int  tid = get_global_id(0);\n"
42 "\n"
43 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
44 "}\n";
45 
46 const char *int3_mul24_kernel_code =
47 "__kernel void test_int3_mul24(__global int *srcA, __global int *srcB, __global int *dst)\n"
48 "{\n"
49 "    int  tid = get_global_id(0);\n"
50 "    int3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
51 "    vstore3(tmp, tid, dst);\n"
52 "}\n";
53 
54 const char *int4_mul24_kernel_code =
55 "__kernel void test_int4_mul24(__global int4 *srcA, __global int4 *srcB, __global int4 *dst)\n"
56 "{\n"
57 "    int  tid = get_global_id(0);\n"
58 "\n"
59 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
60 "}\n";
61 
62 const char *int8_mul24_kernel_code =
63 "__kernel void test_int8_mul24(__global int8 *srcA, __global int8 *srcB, __global int8 *dst)\n"
64 "{\n"
65 "    int  tid = get_global_id(0);\n"
66 "\n"
67 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
68 "}\n";
69 
70 const char *int16_mul24_kernel_code =
71 "__kernel void test_int16_mul24(__global int16 *srcA, __global int16 *srcB, __global int16 *dst)\n"
72 "{\n"
73 "    int  tid = get_global_id(0);\n"
74 "\n"
75 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
76 "}\n";
77 
78 const char *uint_mul24_kernel_code =
79 "__kernel void test_int_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
80 "{\n"
81 "    int  tid = get_global_id(0);\n"
82 "\n"
83 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
84 "}\n";
85 
86 const char *uint2_mul24_kernel_code =
87 "__kernel void test_int2_mul24(__global uint2 *srcA, __global uint2 *srcB, __global uint2 *dst)\n"
88 "{\n"
89 "    int  tid = get_global_id(0);\n"
90 "\n"
91 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
92 "}\n";
93 
94 const char *uint3_mul24_kernel_code =
95 "__kernel void test_int3_mul24(__global uint *srcA, __global uint *srcB, __global uint *dst)\n"
96 "{\n"
97 "    int  tid = get_global_id(0);\n"
98 "    uint3 tmp = mul24(vload3(tid, srcA), vload3(tid, srcB));\n"
99 "    vstore3(tmp, tid, dst);\n"
100 "}\n";
101 
102 const char *uint4_mul24_kernel_code =
103 "__kernel void test_int4_mul24(__global uint4 *srcA, __global uint4 *srcB, __global uint4 *dst)\n"
104 "{\n"
105 "    int  tid = get_global_id(0);\n"
106 "\n"
107 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
108 "}\n";
109 
110 const char *uint8_mul24_kernel_code =
111 "__kernel void test_int8_mul24(__global uint8 *srcA, __global uint8 *srcB, __global uint8 *dst)\n"
112 "{\n"
113 "    int  tid = get_global_id(0);\n"
114 "\n"
115 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
116 "}\n";
117 
118 const char *uint16_mul24_kernel_code =
119 "__kernel void test_int16_mul24(__global uint16 *srcA, __global uint16 *srcB, __global uint16 *dst)\n"
120 "{\n"
121 "    int  tid = get_global_id(0);\n"
122 "\n"
123 "    dst[tid] = mul24(srcA[tid], srcB[tid]);\n"
124 "}\n";
125 
126 
127 int
verify_int_mul24(int * inptrA,int * inptrB,int * outptr,size_t n,size_t vecSize)128 verify_int_mul24(int *inptrA, int *inptrB, int *outptr, size_t n, size_t vecSize)
129 {
130     int            r;
131     size_t         i;
132 
133     for (i=0; i<n; i++)
134     {
135         int a = (inptrA[i] << 8 ) >> 8;
136         int b = (inptrB[i] << 8 ) >> 8;
137         r = a * b;
138         if (r != outptr[i])
139              return -1;
140     }
141 
142     return 0;
143 }
144 
145 int
verify_uint_mul24(cl_uint * inptrA,cl_uint * inptrB,cl_uint * outptr,size_t n,size_t vecSize)146 verify_uint_mul24(cl_uint *inptrA, cl_uint *inptrB, cl_uint *outptr, size_t n, size_t vecSize)
147 {
148     cl_uint            r;
149     size_t         i;
150 
151     for (i=0; i<n; i++)
152     {
153         r = (inptrA[i] & 0xffffffU) * (inptrB[i] & 0xffffffU);
154         if (r != outptr[i])
155         {
156             log_error( "failed at %ld: 0x%8.8x * 0x%8.8x = *0x%8.8x vs 0x%8.8x\n", i, inptrA[i], inptrB[i], r, outptr[i] );
157              return -1;
158         }
159     }
160 
161     return 0;
162 }
163 
random_int24(MTdata d)164 static inline int random_int24( MTdata d )
165 {
166     int result = genrand_int32(d);
167 
168     return (result << 8) >> 8;
169 }
170 
171 
172 static const char *test_str_names[] = { "int", "int2", "int3", "int4", "int8", "int16", "uint", "uint2", "uint3", "uint4", "uint8", "uint16" };
173 
test_integer_mul24(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)174 int test_integer_mul24(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
175 {
176     cl_mem streams[3];
177     cl_int *input_ptr[2], *output_ptr, *p;
178 
179     cl_program program[NUM_PROGRAMS*2];
180     cl_kernel kernel[NUM_PROGRAMS*2];
181     size_t threads[1];
182 
183     int                num_elements;
184     int                err;
185     int                i;
186     MTdata              d;
187 
188     size_t length = sizeof(cl_int) * 16 * n_elems;
189     num_elements = n_elems * 16;
190 
191     input_ptr[0] = (cl_int*)malloc(length);
192     input_ptr[1] = (cl_int*)malloc(length);
193     output_ptr   = (cl_int*)malloc(length);
194 
195     streams[0] = clCreateBuffer(context, 0, length, NULL, NULL);
196     if (!streams[0])
197     {
198         log_error("clCreateBuffer failed\n");
199         return -1;
200     }
201     streams[1] = clCreateBuffer(context, 0, length, NULL, NULL);
202     if (!streams[1])
203     {
204         log_error("clCreateBuffer failed\n");
205         return -1;
206     }
207     streams[2] = clCreateBuffer(context, 0, length, NULL, NULL);
208     if (!streams[2])
209     {
210         log_error("clCreateBuffer failed\n");
211         return -1;
212     }
213 
214     d = init_genrand( gRandomSeed );
215     p = input_ptr[0];
216     for (i=0; i<num_elements; i++)
217         p[i] = random_int24(d);
218     p = input_ptr[1];
219     for (i=0; i<num_elements; i++)
220         p[i] = random_int24(d);
221     free_mtdata(d); d = NULL;
222 
223     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
224     if (err != CL_SUCCESS)
225     {
226         log_error("clEnqueueWriteBuffer failed\n");
227         return -1;
228     }
229     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
230     if (err != CL_SUCCESS)
231     {
232         log_error("clEnqueueWriteBuffer failed\n");
233         return -1;
234     }
235     err = create_single_kernel_helper(context, &program[0], &kernel[0], 1, &int_mul24_kernel_code, "test_int_mul24");
236     if (err)
237         return -1;
238     err = create_single_kernel_helper(context, &program[1], &kernel[1], 1, &int2_mul24_kernel_code, "test_int2_mul24");
239     if (err)
240         return -1;
241     err = create_single_kernel_helper(context, &program[2], &kernel[2], 1, &int3_mul24_kernel_code, "test_int3_mul24");
242     if (err)
243         return -1;
244     err = create_single_kernel_helper(context, &program[3], &kernel[3], 1, &int4_mul24_kernel_code, "test_int4_mul24");
245     if (err)
246         return -1;
247     err = create_single_kernel_helper(context, &program[4], &kernel[4], 1, &int8_mul24_kernel_code, "test_int8_mul24");
248     if (err)
249         return -1;
250     err = create_single_kernel_helper(context, &program[5], &kernel[5], 1, &int16_mul24_kernel_code, "test_int16_mul24");
251     if (err)
252         return -1;
253 
254     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS], &kernel[NUM_PROGRAMS], 1, &uint_mul24_kernel_code, "test_int_mul24");
255     if (err)
256         return -1;
257     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+1], &kernel[NUM_PROGRAMS+1], 1, &uint2_mul24_kernel_code, "test_int2_mul24");
258     if (err)
259         return -1;
260     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+2], &kernel[NUM_PROGRAMS+2], 1, &uint3_mul24_kernel_code, "test_int3_mul24");
261     if (err)
262         return -1;
263     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+3], &kernel[NUM_PROGRAMS+3], 1, &uint4_mul24_kernel_code, "test_int4_mul24");
264     if (err)
265         return -1;
266     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+4], &kernel[NUM_PROGRAMS+4], 1, &uint8_mul24_kernel_code, "test_int8_mul24");
267     if (err)
268         return -1;
269     err = create_single_kernel_helper(context, &program[NUM_PROGRAMS+5], &kernel[NUM_PROGRAMS+5], 1, &uint16_mul24_kernel_code, "test_int16_mul24");
270     if (err)
271         return -1;
272 
273     for (i=0; i<2*NUM_PROGRAMS; i++)
274     {
275         err  = clSetKernelArg(kernel[i], 0, sizeof streams[0], &streams[0]);
276         err |= clSetKernelArg(kernel[i], 1, sizeof streams[1], &streams[1]);
277         err |= clSetKernelArg(kernel[i], 2, sizeof streams[2], &streams[2]);
278         if (err != CL_SUCCESS)
279         {
280             log_error("clSetKernelArgs failed\n");
281             return -1;
282         }
283     }
284 
285     // test signed
286     threads[0] = (unsigned int)n_elems;
287     for (i=0; i<NUM_PROGRAMS; i++)
288     {
289         err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
290         if (err != CL_SUCCESS)
291         {
292             log_error("clEnqueueNDRangeKernel failed\n");
293             return -1;
294         }
295 
296         err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
297         if (err != CL_SUCCESS)
298         {
299             log_error("clEnqueueReadBuffer failed\n");
300             return -1;
301         }
302 
303         err = verify_int_mul24(input_ptr[0], input_ptr[1], output_ptr, vector_sizes[i], vector_sizes[i]);
304         if (err)
305         {
306             log_error("INT_MUL24 %s test failed\n", test_str_names[i]);
307             err = -1;
308         }
309         else
310         {
311             log_info("INT_MUL24 %s test passed\n", test_str_names[i]);
312             err = 0;
313         }
314 
315         if (err)
316             break;
317     }
318 
319     // clamp the set of input values to be in range
320     p = input_ptr[0];
321     for (i=0; i<num_elements; i++)
322         p[i] &= 0xffffffU;
323     p = input_ptr[1];
324     for (i=0; i<num_elements; i++)
325         p[i] &= 0xffffffU;
326 
327     err = clEnqueueWriteBuffer(queue, streams[0], CL_TRUE, 0, length, input_ptr[0], 0, NULL, NULL);
328     if (err != CL_SUCCESS)
329     {
330         log_error("clEnqueueWriteBuffer failed\n");
331         return -1;
332     }
333     err = clEnqueueWriteBuffer(queue, streams[1], CL_TRUE, 0, length, input_ptr[1], 0, NULL, NULL);
334     if (err != CL_SUCCESS)
335     {
336         log_error("clEnqueueWriteBuffer failed\n");
337         return -1;
338     }
339 
340     // test unsigned
341     for (i=NUM_PROGRAMS; i<2*NUM_PROGRAMS; i++)
342     {
343         err = clEnqueueNDRangeKernel(queue, kernel[i], 1, NULL, threads, NULL, 0, NULL, NULL);
344         if (err != CL_SUCCESS)
345         {
346             log_error("clEnqueueNDRangeKernel failed\n");
347             return -1;
348         }
349 
350         err = clEnqueueReadBuffer(queue, streams[2], CL_TRUE, 0, length, output_ptr, 0, NULL, NULL);
351         if (err != CL_SUCCESS)
352         {
353             log_error("clEnqueueReadBuffer failed\n");
354             return -1;
355         }
356 
357         err = verify_uint_mul24((cl_uint*) input_ptr[0], (cl_uint*) input_ptr[1], (cl_uint*) output_ptr, n_elems * vector_sizes[i-NUM_PROGRAMS], vector_sizes[i-NUM_PROGRAMS]);
358         if (err)
359         {
360             log_error("UINT_MUL24 %s test failed\n", test_str_names[i]);
361             err = -1;
362         }
363         else
364         {
365             log_info("UINT_MUL24 %s test passed\n", test_str_names[i]);
366             err = 0;
367         }
368 
369         if (err)
370             break;
371     }
372 
373 
374     // cleanup
375     clReleaseMemObject(streams[0]);
376     clReleaseMemObject(streams[1]);
377     clReleaseMemObject(streams[2]);
378     for (i=0; i<2*NUM_PROGRAMS; i++)
379     {
380         clReleaseKernel(kernel[i]);
381         clReleaseProgram(program[i]);
382     }
383     free(input_ptr[0]);
384     free(input_ptr[1]);
385     free(output_ptr);
386     return err;
387 }
388 
389 
390