1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17 
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 
23 #include "procs.h"
24 
25 
26 const char *wg_all_kernel_code =
27 "__kernel void test_wg_all(global float *input, global int *output)\n"
28 "{\n"
29 "    int  tid = get_global_id(0);\n"
30 "\n"
31 "    int result = work_group_all((input[tid] > input[tid+1]));\n"
32 "    output[tid] = result;\n"
33 "}\n";
34 
35 
36 static int
verify_wg_all(float * inptr,int * outptr,size_t n,size_t wg_size)37 verify_wg_all(float *inptr, int *outptr, size_t n, size_t wg_size)
38 {
39     size_t     i, j;
40 
41     for (i=0; i<n; i+=wg_size)
42     {
43         int predicate_all = 0xFFFFFFFF;
44         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
45         {
46             if (!(inptr[i+j] > inptr[i+j+1]))
47             {
48                 predicate_all = 0;
49                 break;
50             }
51         }
52         for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
53         {
54             if ( (predicate_all && (outptr[i+j] == 0)) ||
55                  ((predicate_all == 0) && outptr[i+j]) )
56             {
57                 log_info("work_group_all: Error at %lu: expected = %d, got = %d\n", i+j, predicate_all, outptr[i+j]);
58                 return -1;
59             }
60         }
61     }
62 
63     return 0;
64 }
65 
66 int
test_work_group_all(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)67 test_work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
68 {
69     cl_mem       streams[2];
70     cl_float     *input_ptr[1], *p;
71     cl_int       *output_ptr;
72     cl_program   program;
73     cl_kernel    kernel;
74     void         *values[2];
75     size_t       threads[1];
76     size_t       wg_size[1];
77     size_t       num_elements;
78     int          err;
79     int          i;
80     MTdata       d;
81 
82     err = create_single_kernel_helper(context, &program, &kernel, 1,
83                                       &wg_all_kernel_code, "test_wg_all");
84     if (err)
85         return -1;
86 
87     // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
88     err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
89     test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
90 
91     num_elements = n_elems;
92 
93     input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
94     output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
95     streams[0] =
96         clCreateBuffer(context, CL_MEM_READ_WRITE,
97                        sizeof(cl_float) * (num_elements + 1), NULL, NULL);
98     if (!streams[0])
99     {
100         log_error("clCreateBuffer failed\n");
101         return -1;
102     }
103 
104     streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
105                                 sizeof(cl_int) * num_elements, NULL, NULL);
106     if (!streams[1])
107     {
108         log_error("clCreateBuffer failed\n");
109         return -1;
110     }
111 
112     p = input_ptr[0];
113     d = init_genrand( gRandomSeed );
114     for (i=0; i<(num_elements+1); i++)
115     {
116         p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
117     }
118     free_mtdata(d); d = NULL;
119 
120     err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*(num_elements+1), (void *)input_ptr[0], 0, NULL, NULL );
121     if (err != CL_SUCCESS)
122     {
123         log_error("clWriteArray failed\n");
124         return -1;
125     }
126 
127     values[0] = streams[0];
128     values[1] = streams[1];
129     err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
130     err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
131     if (err != CL_SUCCESS)
132     {
133         log_error("clSetKernelArgs failed\n");
134         return -1;
135     }
136 
137     // Line below is troublesome...
138     threads[0] = (size_t)n_elems;
139     err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
140     if (err != CL_SUCCESS)
141     {
142         log_error("clEnqueueNDRangeKernel failed\n");
143         return -1;
144     }
145 
146     cl_uint dead = 0xdeaddead;
147     memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
148     err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
149     if (err != CL_SUCCESS)
150     {
151         log_error("clEnqueueReadBuffer failed\n");
152         return -1;
153     }
154 
155     if (verify_wg_all(input_ptr[0], output_ptr, num_elements, wg_size[0]))
156     {
157         log_error("work_group_all test failed\n");
158         return -1;
159     }
160     log_info("work_group_all test passed\n");
161 
162     clReleaseMemObject(streams[0]);
163     clReleaseMemObject(streams[1]);
164     clReleaseKernel(kernel);
165     clReleaseProgram(program);
166     free(input_ptr[0]);
167     free(output_ptr);
168 
169     return err;
170 }
171 
172