1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "harness/compat.h"
17
18 #include <stdio.h>
19 #include <string.h>
20 #include <sys/types.h>
21 #include <sys/stat.h>
22
23 #include "procs.h"
24
25
26 const char *wg_all_kernel_code =
27 "__kernel void test_wg_all(global float *input, global int *output)\n"
28 "{\n"
29 " int tid = get_global_id(0);\n"
30 "\n"
31 " int result = work_group_all((input[tid] > input[tid+1]));\n"
32 " output[tid] = result;\n"
33 "}\n";
34
35
36 static int
verify_wg_all(float * inptr,int * outptr,size_t n,size_t wg_size)37 verify_wg_all(float *inptr, int *outptr, size_t n, size_t wg_size)
38 {
39 size_t i, j;
40
41 for (i=0; i<n; i+=wg_size)
42 {
43 int predicate_all = 0xFFFFFFFF;
44 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
45 {
46 if (!(inptr[i+j] > inptr[i+j+1]))
47 {
48 predicate_all = 0;
49 break;
50 }
51 }
52 for (j=0; j<((n-i) > wg_size ? wg_size : (n-i)); j++)
53 {
54 if ( (predicate_all && (outptr[i+j] == 0)) ||
55 ((predicate_all == 0) && outptr[i+j]) )
56 {
57 log_info("work_group_all: Error at %lu: expected = %d, got = %d\n", i+j, predicate_all, outptr[i+j]);
58 return -1;
59 }
60 }
61 }
62
63 return 0;
64 }
65
66 int
test_work_group_all(cl_device_id device,cl_context context,cl_command_queue queue,int n_elems)67 test_work_group_all(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems)
68 {
69 cl_mem streams[2];
70 cl_float *input_ptr[1], *p;
71 cl_int *output_ptr;
72 cl_program program;
73 cl_kernel kernel;
74 void *values[2];
75 size_t threads[1];
76 size_t wg_size[1];
77 size_t num_elements;
78 int err;
79 int i;
80 MTdata d;
81
82 err = create_single_kernel_helper(context, &program, &kernel, 1,
83 &wg_all_kernel_code, "test_wg_all");
84 if (err)
85 return -1;
86
87 // "wg_size" is limited to that of the first dimension as only a 1DRange is executed.
88 err = get_max_allowed_1d_work_group_size_on_device(device, kernel, wg_size);
89 test_error(err, "get_max_allowed_1d_work_group_size_on_device failed");
90
91 num_elements = n_elems;
92
93 input_ptr[0] = (cl_float*)malloc(sizeof(cl_float) * (num_elements+1));
94 output_ptr = (cl_int*)malloc(sizeof(cl_int) * (num_elements+1));
95 streams[0] =
96 clCreateBuffer(context, CL_MEM_READ_WRITE,
97 sizeof(cl_float) * (num_elements + 1), NULL, NULL);
98 if (!streams[0])
99 {
100 log_error("clCreateBuffer failed\n");
101 return -1;
102 }
103
104 streams[1] = clCreateBuffer(context, CL_MEM_READ_WRITE,
105 sizeof(cl_int) * num_elements, NULL, NULL);
106 if (!streams[1])
107 {
108 log_error("clCreateBuffer failed\n");
109 return -1;
110 }
111
112 p = input_ptr[0];
113 d = init_genrand( gRandomSeed );
114 for (i=0; i<(num_elements+1); i++)
115 {
116 p[i] = get_random_float((float)(-100000.f * M_PI), (float)(100000.f * M_PI) ,d);
117 }
118 free_mtdata(d); d = NULL;
119
120 err = clEnqueueWriteBuffer( queue, streams[0], true, 0, sizeof(cl_float)*(num_elements+1), (void *)input_ptr[0], 0, NULL, NULL );
121 if (err != CL_SUCCESS)
122 {
123 log_error("clWriteArray failed\n");
124 return -1;
125 }
126
127 values[0] = streams[0];
128 values[1] = streams[1];
129 err = clSetKernelArg(kernel, 0, sizeof streams[0], &streams[0] );
130 err |= clSetKernelArg(kernel, 1, sizeof streams[1], &streams[1] );
131 if (err != CL_SUCCESS)
132 {
133 log_error("clSetKernelArgs failed\n");
134 return -1;
135 }
136
137 // Line below is troublesome...
138 threads[0] = (size_t)n_elems;
139 err = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, wg_size, 0, NULL, NULL );
140 if (err != CL_SUCCESS)
141 {
142 log_error("clEnqueueNDRangeKernel failed\n");
143 return -1;
144 }
145
146 cl_uint dead = 0xdeaddead;
147 memset_pattern4(output_ptr, &dead, sizeof(cl_float)*num_elements);
148 err = clEnqueueReadBuffer( queue, streams[1], true, 0, sizeof(cl_int)*num_elements, (void *)output_ptr, 0, NULL, NULL );
149 if (err != CL_SUCCESS)
150 {
151 log_error("clEnqueueReadBuffer failed\n");
152 return -1;
153 }
154
155 if (verify_wg_all(input_ptr[0], output_ptr, num_elements, wg_size[0]))
156 {
157 log_error("work_group_all test failed\n");
158 return -1;
159 }
160 log_info("work_group_all test passed\n");
161
162 clReleaseMemObject(streams[0]);
163 clReleaseMemObject(streams[1]);
164 clReleaseKernel(kernel);
165 clReleaseProgram(program);
166 free(input_ptr[0]);
167 free(output_ptr);
168
169 return err;
170 }
171
172