1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "function_list.h"
18 #include "test_functions.h"
19 #include "utility.h"
20
21 #include <cstring>
22
BuildKernel(const char * name,int vectorSize,cl_kernel * k,cl_program * p,bool relaxedMode)23 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
24 cl_program *p, bool relaxedMode)
25 {
26 const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
27 "__kernel void math_kernel",
28 sizeNames[vectorSize],
29 "( __global double",
30 sizeNames[vectorSize],
31 "* out, __global double",
32 sizeNames[vectorSize],
33 "* in1, __global double",
34 sizeNames[vectorSize],
35 "* in2, __global double",
36 sizeNames[vectorSize],
37 "* in3 )\n"
38 "{\n"
39 " size_t i = get_global_id(0);\n"
40 " out[i] = ",
41 name,
42 "( in1[i], in2[i], in3[i] );\n"
43 "}\n" };
44
45 const char *c3[] = {
46 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
47 "__kernel void math_kernel",
48 sizeNames[vectorSize],
49 "( __global double* out, __global double* in, __global double* in2, "
50 "__global double* in3)\n"
51 "{\n"
52 " size_t i = get_global_id(0);\n"
53 " if( i + 1 < get_global_size(0) )\n"
54 " {\n"
55 " double3 d0 = vload3( 0, in + 3 * i );\n"
56 " double3 d1 = vload3( 0, in2 + 3 * i );\n"
57 " double3 d2 = vload3( 0, in3 + 3 * i );\n"
58 " d0 = ",
59 name,
60 "( d0, d1, d2 );\n"
61 " vstore3( d0, 0, out + 3*i );\n"
62 " }\n"
63 " else\n"
64 " {\n"
65 " size_t parity = i & 1; // Figure out how many elements are "
66 "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
67 "buffer size \n"
68 " double3 d0;\n"
69 " double3 d1;\n"
70 " double3 d2;\n"
71 " switch( parity )\n"
72 " {\n"
73 " case 1:\n"
74 " d0 = (double3)( in[3*i], NAN, NAN ); \n"
75 " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
76 " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
77 " break;\n"
78 " case 0:\n"
79 " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
80 " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
81 " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
82 " break;\n"
83 " }\n"
84 " d0 = ",
85 name,
86 "( d0, d1, d2 );\n"
87 " switch( parity )\n"
88 " {\n"
89 " case 0:\n"
90 " out[3*i+1] = d0.y; \n"
91 " // fall through\n"
92 " case 1:\n"
93 " out[3*i] = d0.x; \n"
94 " break;\n"
95 " }\n"
96 " }\n"
97 "}\n"
98 };
99
100 const char **kern = c;
101 size_t kernSize = sizeof(c) / sizeof(c[0]);
102
103 if (sizeValues[vectorSize] == 3)
104 {
105 kern = c3;
106 kernSize = sizeof(c3) / sizeof(c3[0]);
107 }
108
109 char testName[32];
110 snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
111 sizeNames[vectorSize]);
112
113 return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
114 }
115
116 typedef struct BuildKernelInfo
117 {
118 cl_uint offset; // the first vector size to build
119 cl_kernel *kernels;
120 cl_program *programs;
121 const char *nameInCode;
122 bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
123 } BuildKernelInfo;
124
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)125 static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
126 {
127 BuildKernelInfo *info = (BuildKernelInfo *)p;
128 cl_uint i = info->offset + job_id;
129 return BuildKernel(info->nameInCode, i, info->kernels + i,
130 info->programs + i, info->relaxedMode);
131 }
132
TestFunc_mad_Double(const Func * f,MTdata d,bool relaxedMode)133 int TestFunc_mad_Double(const Func *f, MTdata d, bool relaxedMode)
134 {
135 int error;
136 cl_program programs[VECTOR_SIZE_COUNT];
137 cl_kernel kernels[VECTOR_SIZE_COUNT];
138 float maxError = 0.0f;
139 double maxErrorVal = 0.0f;
140 double maxErrorVal2 = 0.0f;
141 double maxErrorVal3 = 0.0f;
142 uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
143
144 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
145
146 // Init the kernels
147 {
148 BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
149 f->nameInCode, relaxedMode };
150 if ((error = ThreadPool_Do(BuildKernelFn,
151 gMaxVectorSizeIndex - gMinVectorSizeIndex,
152 &build_info)))
153 return error;
154 }
155
156 for (uint64_t i = 0; i < (1ULL << 32); i += step)
157 {
158 // Init input array
159 double *p = (double *)gIn;
160 double *p2 = (double *)gIn2;
161 double *p3 = (double *)gIn3;
162 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
163 {
164 p[j] = DoubleFromUInt32(genrand_int32(d));
165 p2[j] = DoubleFromUInt32(genrand_int32(d));
166 p3[j] = DoubleFromUInt32(genrand_int32(d));
167 }
168
169 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
170 BUFFER_SIZE, gIn, 0, NULL, NULL)))
171 {
172 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
173 return error;
174 }
175
176 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
177 BUFFER_SIZE, gIn2, 0, NULL, NULL)))
178 {
179 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
180 return error;
181 }
182
183 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
184 BUFFER_SIZE, gIn3, 0, NULL, NULL)))
185 {
186 vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
187 return error;
188 }
189
190 // write garbage into output arrays
191 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
192 {
193 uint32_t pattern = 0xffffdead;
194 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
195 if ((error =
196 clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
197 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
198 {
199 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
200 error, j);
201 goto exit;
202 }
203 }
204
205 // Run the kernels
206 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
207 {
208 size_t vectorSize = sizeof(cl_double) * sizeValues[j];
209 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
210 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
211 if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
212 &gOutBuffer[j])))
213 {
214 LogBuildError(programs[j]);
215 goto exit;
216 }
217 if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
218 &gInBuffer)))
219 {
220 LogBuildError(programs[j]);
221 goto exit;
222 }
223 if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
224 &gInBuffer2)))
225 {
226 LogBuildError(programs[j]);
227 goto exit;
228 }
229 if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
230 &gInBuffer3)))
231 {
232 LogBuildError(programs[j]);
233 goto exit;
234 }
235
236 if ((error =
237 clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
238 &localCount, NULL, 0, NULL, NULL)))
239 {
240 vlog_error("FAILED -- could not execute kernel\n");
241 goto exit;
242 }
243 }
244
245 // Get that moving
246 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
247
248 // Calculate the correctly rounded reference result
249 double *r = (double *)gOut_Ref;
250 double *s = (double *)gIn;
251 double *s2 = (double *)gIn2;
252 double *s3 = (double *)gIn3;
253 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
254 r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
255
256 // Read the data back
257 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
258 {
259 if ((error =
260 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
261 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
262 {
263 vlog_error("ReadArray failed %d\n", error);
264 goto exit;
265 }
266 }
267
268 if (gSkipCorrectnessTesting) break;
269
270 // Verify data -- No verification possible.
271 // MAD is a random number generator.
272 if (0 == (i & 0x0fffffff))
273 {
274 vlog(".");
275 fflush(stdout);
276 }
277 }
278
279 if (!gSkipCorrectnessTesting)
280 {
281 if (gWimpyMode)
282 vlog("Wimp pass");
283 else
284 vlog("passed");
285
286 vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
287 maxErrorVal3);
288 }
289
290 vlog("\n");
291
292 exit:
293 // Release
294 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
295 {
296 clReleaseKernel(kernels[k]);
297 clReleaseProgram(programs[k]);
298 }
299
300 return error;
301 }
302