1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16
17 #include "function_list.h"
18 #include "test_functions.h"
19 #include "utility.h"
20
21 #include <cstring>
22
23 #define CORRECTLY_ROUNDED 0
24 #define FLUSHED 1
25
BuildKernel(const char * name,int vectorSize,cl_kernel * k,cl_program * p,bool relaxedMode)26 static int BuildKernel(const char *name, int vectorSize, cl_kernel *k,
27 cl_program *p, bool relaxedMode)
28 {
29 const char *c[] = { "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
30 "__kernel void math_kernel",
31 sizeNames[vectorSize],
32 "( __global double",
33 sizeNames[vectorSize],
34 "* out, __global double",
35 sizeNames[vectorSize],
36 "* in1, __global double",
37 sizeNames[vectorSize],
38 "* in2, __global double",
39 sizeNames[vectorSize],
40 "* in3 )\n"
41 "{\n"
42 " size_t i = get_global_id(0);\n"
43 " out[i] = ",
44 name,
45 "( in1[i], in2[i], in3[i] );\n"
46 "}\n" };
47
48 const char *c3[] = {
49 "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n",
50 "__kernel void math_kernel",
51 sizeNames[vectorSize],
52 "( __global double* out, __global double* in, __global double* in2, "
53 "__global double* in3)\n"
54 "{\n"
55 " size_t i = get_global_id(0);\n"
56 " if( i + 1 < get_global_size(0) )\n"
57 " {\n"
58 " double3 d0 = vload3( 0, in + 3 * i );\n"
59 " double3 d1 = vload3( 0, in2 + 3 * i );\n"
60 " double3 d2 = vload3( 0, in3 + 3 * i );\n"
61 " d0 = ",
62 name,
63 "( d0, d1, d2 );\n"
64 " vstore3( d0, 0, out + 3*i );\n"
65 " }\n"
66 " else\n"
67 " {\n"
68 " size_t parity = i & 1; // Figure out how many elements are "
69 "left over after BUFFER_SIZE % (3*sizeof(float)). Assume power of two "
70 "buffer size \n"
71 " double3 d0;\n"
72 " double3 d1;\n"
73 " double3 d2;\n"
74 " switch( parity )\n"
75 " {\n"
76 " case 1:\n"
77 " d0 = (double3)( in[3*i], NAN, NAN ); \n"
78 " d1 = (double3)( in2[3*i], NAN, NAN ); \n"
79 " d2 = (double3)( in3[3*i], NAN, NAN ); \n"
80 " break;\n"
81 " case 0:\n"
82 " d0 = (double3)( in[3*i], in[3*i+1], NAN ); \n"
83 " d1 = (double3)( in2[3*i], in2[3*i+1], NAN ); \n"
84 " d2 = (double3)( in3[3*i], in3[3*i+1], NAN ); \n"
85 " break;\n"
86 " }\n"
87 " d0 = ",
88 name,
89 "( d0, d1, d2 );\n"
90 " switch( parity )\n"
91 " {\n"
92 " case 0:\n"
93 " out[3*i+1] = d0.y; \n"
94 " // fall through\n"
95 " case 1:\n"
96 " out[3*i] = d0.x; \n"
97 " break;\n"
98 " }\n"
99 " }\n"
100 "}\n"
101 };
102
103 const char **kern = c;
104 size_t kernSize = sizeof(c) / sizeof(c[0]);
105
106 if (sizeValues[vectorSize] == 3)
107 {
108 kern = c3;
109 kernSize = sizeof(c3) / sizeof(c3[0]);
110 }
111
112 char testName[32];
113 snprintf(testName, sizeof(testName) - 1, "math_kernel%s",
114 sizeNames[vectorSize]);
115
116 return MakeKernel(kern, (cl_uint)kernSize, testName, k, p, relaxedMode);
117 }
118
119 typedef struct BuildKernelInfo
120 {
121 cl_uint offset; // the first vector size to build
122 cl_kernel *kernels;
123 cl_program *programs;
124 const char *nameInCode;
125 bool relaxedMode; // Whether to build with -cl-fast-relaxed-math.
126 } BuildKernelInfo;
127
BuildKernelFn(cl_uint job_id,cl_uint thread_id UNUSED,void * p)128 static cl_int BuildKernelFn(cl_uint job_id, cl_uint thread_id UNUSED, void *p)
129 {
130 BuildKernelInfo *info = (BuildKernelInfo *)p;
131 cl_uint i = info->offset + job_id;
132 return BuildKernel(info->nameInCode, i, info->kernels + i,
133 info->programs + i, info->relaxedMode);
134 }
135
136 // A table of more difficult cases to get right
137 static const double specialValues[] = {
138 -NAN,
139 -INFINITY,
140 -DBL_MAX,
141 MAKE_HEX_DOUBLE(-0x1.0000000000001p64, -0x10000000000001LL, 12),
142 MAKE_HEX_DOUBLE(-0x1.0p64, -0x1LL, 64),
143 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp63, -0x1fffffffffffffLL, 11),
144 MAKE_HEX_DOUBLE(-0x1.0000000000001p63, -0x10000000000001LL, 11),
145 MAKE_HEX_DOUBLE(-0x1.0p63, -0x1LL, 63),
146 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp62, -0x1fffffffffffffLL, 10),
147 -3.0,
148 MAKE_HEX_DOUBLE(-0x1.8000000000001p1, -0x18000000000001LL, -51),
149 -2.5,
150 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp1, -0x17ffffffffffffLL, -51),
151 -2.0,
152 MAKE_HEX_DOUBLE(-0x1.8000000000001p0, -0x18000000000001LL, -52),
153 -1.5,
154 MAKE_HEX_DOUBLE(-0x1.7ffffffffffffp0, -0x17ffffffffffffLL, -52),
155 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
156 -1.0,
157 MAKE_HEX_DOUBLE(-0x1.fffffffffffffp-1, -0x1fffffffffffffLL, -53),
158 MAKE_HEX_DOUBLE(-0x1.0000000000001p-1022, -0x10000000000001LL, -1074),
159 -DBL_MIN,
160 MAKE_HEX_DOUBLE(-0x0.fffffffffffffp-1022, -0x0fffffffffffffLL, -1074),
161 MAKE_HEX_DOUBLE(-0x0.0000000000fffp-1022, -0x00000000000fffLL, -1074),
162 MAKE_HEX_DOUBLE(-0x0.00000000000fep-1022, -0x000000000000feLL, -1074),
163 MAKE_HEX_DOUBLE(-0x0.000000000000ep-1022, -0x0000000000000eLL, -1074),
164 MAKE_HEX_DOUBLE(-0x0.000000000000cp-1022, -0x0000000000000cLL, -1074),
165 MAKE_HEX_DOUBLE(-0x0.000000000000ap-1022, -0x0000000000000aLL, -1074),
166 MAKE_HEX_DOUBLE(-0x0.0000000000003p-1022, -0x00000000000003LL, -1074),
167 MAKE_HEX_DOUBLE(-0x0.0000000000002p-1022, -0x00000000000002LL, -1074),
168 MAKE_HEX_DOUBLE(-0x0.0000000000001p-1022, -0x00000000000001LL, -1074),
169 -0.0,
170
171 +NAN,
172 +INFINITY,
173 +DBL_MAX,
174 MAKE_HEX_DOUBLE(+0x1.0000000000001p64, +0x10000000000001LL, 12),
175 MAKE_HEX_DOUBLE(+0x1.0p64, +0x1LL, 64),
176 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp63, +0x1fffffffffffffLL, 11),
177 MAKE_HEX_DOUBLE(+0x1.0000000000001p63, +0x10000000000001LL, 11),
178 MAKE_HEX_DOUBLE(+0x1.0p63, +0x1LL, 63),
179 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp62, +0x1fffffffffffffLL, 10),
180 +3.0,
181 MAKE_HEX_DOUBLE(+0x1.8000000000001p1, +0x18000000000001LL, -51),
182 +2.5,
183 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp1, +0x17ffffffffffffLL, -51),
184 +2.0,
185 MAKE_HEX_DOUBLE(+0x1.8000000000001p0, +0x18000000000001LL, -52),
186 +1.5,
187 MAKE_HEX_DOUBLE(+0x1.7ffffffffffffp0, +0x17ffffffffffffLL, -52),
188 MAKE_HEX_DOUBLE(-0x1.0000000000001p0, -0x10000000000001LL, -52),
189 +1.0,
190 MAKE_HEX_DOUBLE(+0x1.fffffffffffffp-1, +0x1fffffffffffffLL, -53),
191 MAKE_HEX_DOUBLE(+0x1.0000000000001p-1022, +0x10000000000001LL, -1074),
192 +DBL_MIN,
193 MAKE_HEX_DOUBLE(+0x0.fffffffffffffp-1022, +0x0fffffffffffffLL, -1074),
194 MAKE_HEX_DOUBLE(+0x0.0000000000fffp-1022, +0x00000000000fffLL, -1074),
195 MAKE_HEX_DOUBLE(+0x0.00000000000fep-1022, +0x000000000000feLL, -1074),
196 MAKE_HEX_DOUBLE(+0x0.000000000000ep-1022, +0x0000000000000eLL, -1074),
197 MAKE_HEX_DOUBLE(+0x0.000000000000cp-1022, +0x0000000000000cLL, -1074),
198 MAKE_HEX_DOUBLE(+0x0.000000000000ap-1022, +0x0000000000000aLL, -1074),
199 MAKE_HEX_DOUBLE(+0x0.0000000000003p-1022, +0x00000000000003LL, -1074),
200 MAKE_HEX_DOUBLE(+0x0.0000000000002p-1022, +0x00000000000002LL, -1074),
201 MAKE_HEX_DOUBLE(+0x0.0000000000001p-1022, +0x00000000000001LL, -1074),
202 +0.0,
203 };
204
205 static const size_t specialValuesCount =
206 sizeof(specialValues) / sizeof(specialValues[0]);
207
TestFunc_Double_Double_Double_Double(const Func * f,MTdata d,bool relaxedMode)208 int TestFunc_Double_Double_Double_Double(const Func *f, MTdata d,
209 bool relaxedMode)
210 {
211 int error;
212 cl_program programs[VECTOR_SIZE_COUNT];
213 cl_kernel kernels[VECTOR_SIZE_COUNT];
214 float maxError = 0.0f;
215 int ftz = f->ftz || gForceFTZ;
216 double maxErrorVal = 0.0f;
217 double maxErrorVal2 = 0.0f;
218 double maxErrorVal3 = 0.0f;
219 uint64_t step = getTestStep(sizeof(double), BUFFER_SIZE);
220
221 logFunctionInfo(f->name, sizeof(cl_double), relaxedMode);
222
223 Force64BitFPUPrecision();
224
225 // Init the kernels
226 {
227 BuildKernelInfo build_info = { gMinVectorSizeIndex, kernels, programs,
228 f->nameInCode, relaxedMode };
229 if ((error = ThreadPool_Do(BuildKernelFn,
230 gMaxVectorSizeIndex - gMinVectorSizeIndex,
231 &build_info)))
232 return error;
233 }
234
235 for (uint64_t i = 0; i < (1ULL << 32); i += step)
236 {
237 // Init input array
238 double *p = (double *)gIn;
239 double *p2 = (double *)gIn2;
240 double *p3 = (double *)gIn3;
241 size_t idx = 0;
242
243 if (i == 0)
244 { // test edge cases
245 uint32_t x, y, z;
246 x = y = z = 0;
247 for (; idx < BUFFER_SIZE / sizeof(double); idx++)
248 {
249 p[idx] = specialValues[x];
250 p2[idx] = specialValues[y];
251 p3[idx] = specialValues[z];
252 if (++x >= specialValuesCount)
253 {
254 x = 0;
255 if (++y >= specialValuesCount)
256 {
257 y = 0;
258 if (++z >= specialValuesCount) break;
259 }
260 }
261 }
262 if (idx == BUFFER_SIZE / sizeof(double))
263 vlog_error("Test Error: not all special cases tested!\n");
264 }
265
266 for (; idx < BUFFER_SIZE / sizeof(double); idx++)
267 {
268 p[idx] = DoubleFromUInt32(genrand_int32(d));
269 p2[idx] = DoubleFromUInt32(genrand_int32(d));
270 p3[idx] = DoubleFromUInt32(genrand_int32(d));
271 }
272
273 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer, CL_FALSE, 0,
274 BUFFER_SIZE, gIn, 0, NULL, NULL)))
275 {
276 vlog_error("\n*** Error %d in clEnqueueWriteBuffer ***\n", error);
277 return error;
278 }
279
280 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer2, CL_FALSE, 0,
281 BUFFER_SIZE, gIn2, 0, NULL, NULL)))
282 {
283 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2 ***\n", error);
284 return error;
285 }
286
287 if ((error = clEnqueueWriteBuffer(gQueue, gInBuffer3, CL_FALSE, 0,
288 BUFFER_SIZE, gIn3, 0, NULL, NULL)))
289 {
290 vlog_error("\n*** Error %d in clEnqueueWriteBuffer3 ***\n", error);
291 return error;
292 }
293
294 // write garbage into output arrays
295 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
296 {
297 uint32_t pattern = 0xffffdead;
298 memset_pattern4(gOut[j], &pattern, BUFFER_SIZE);
299 if ((error =
300 clEnqueueWriteBuffer(gQueue, gOutBuffer[j], CL_FALSE, 0,
301 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
302 {
303 vlog_error("\n*** Error %d in clEnqueueWriteBuffer2(%d) ***\n",
304 error, j);
305 goto exit;
306 }
307 }
308
309 // Run the kernels
310 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
311 {
312 size_t vectorSize = sizeof(cl_double) * sizeValues[j];
313 size_t localCount = (BUFFER_SIZE + vectorSize - 1)
314 / vectorSize; // BUFFER_SIZE / vectorSize rounded up
315 if ((error = clSetKernelArg(kernels[j], 0, sizeof(gOutBuffer[j]),
316 &gOutBuffer[j])))
317 {
318 LogBuildError(programs[j]);
319 goto exit;
320 }
321 if ((error = clSetKernelArg(kernels[j], 1, sizeof(gInBuffer),
322 &gInBuffer)))
323 {
324 LogBuildError(programs[j]);
325 goto exit;
326 }
327 if ((error = clSetKernelArg(kernels[j], 2, sizeof(gInBuffer2),
328 &gInBuffer2)))
329 {
330 LogBuildError(programs[j]);
331 goto exit;
332 }
333 if ((error = clSetKernelArg(kernels[j], 3, sizeof(gInBuffer3),
334 &gInBuffer3)))
335 {
336 LogBuildError(programs[j]);
337 goto exit;
338 }
339
340 if ((error =
341 clEnqueueNDRangeKernel(gQueue, kernels[j], 1, NULL,
342 &localCount, NULL, 0, NULL, NULL)))
343 {
344 vlog_error("FAILED -- could not execute kernel\n");
345 goto exit;
346 }
347 }
348
349 // Get that moving
350 if ((error = clFlush(gQueue))) vlog("clFlush failed\n");
351
352 // Calculate the correctly rounded reference result
353 double *r = (double *)gOut_Ref;
354 double *s = (double *)gIn;
355 double *s2 = (double *)gIn2;
356 double *s3 = (double *)gIn3;
357 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
358 r[j] = (double)f->dfunc.f_fff(s[j], s2[j], s3[j]);
359
360 // Read the data back
361 for (auto j = gMinVectorSizeIndex; j < gMaxVectorSizeIndex; j++)
362 {
363 if ((error =
364 clEnqueueReadBuffer(gQueue, gOutBuffer[j], CL_TRUE, 0,
365 BUFFER_SIZE, gOut[j], 0, NULL, NULL)))
366 {
367 vlog_error("ReadArray failed %d\n", error);
368 goto exit;
369 }
370 }
371
372 if (gSkipCorrectnessTesting) break;
373
374 // Verify data
375 uint64_t *t = (uint64_t *)gOut_Ref;
376 for (size_t j = 0; j < BUFFER_SIZE / sizeof(double); j++)
377 {
378 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
379 {
380 uint64_t *q = (uint64_t *)(gOut[k]);
381
382 // If we aren't getting the correctly rounded result
383 if (t[j] != q[j])
384 {
385 double test = ((double *)q)[j];
386 long double correct = f->dfunc.f_fff(s[j], s2[j], s3[j]);
387 float err = Bruteforce_Ulp_Error_Double(test, correct);
388 int fail = !(fabsf(err) <= f->double_ulps);
389
390 if (fail && ftz)
391 {
392 // retry per section 6.5.3.2
393 if (IsDoubleSubnormal(correct))
394 { // look at me,
395 fail = fail && (test != 0.0f);
396 if (!fail) err = 0.0f;
397 }
398
399 // retry per section 6.5.3.3
400 if (fail && IsDoubleSubnormal(s[j]))
401 { // look at me,
402 long double correct2 =
403 f->dfunc.f_fff(0.0, s2[j], s3[j]);
404 long double correct3 =
405 f->dfunc.f_fff(-0.0, s2[j], s3[j]);
406 float err2 =
407 Bruteforce_Ulp_Error_Double(test, correct2);
408 float err3 =
409 Bruteforce_Ulp_Error_Double(test, correct3);
410 fail = fail
411 && ((!(fabsf(err2) <= f->double_ulps))
412 && (!(fabsf(err3) <= f->double_ulps)));
413 if (fabsf(err2) < fabsf(err)) err = err2;
414 if (fabsf(err3) < fabsf(err)) err = err3;
415
416 // retry per section 6.5.3.4
417 if (IsDoubleResultSubnormal(correct2,
418 f->double_ulps)
419 || IsDoubleResultSubnormal(correct3,
420 f->double_ulps))
421 { // look at me now,
422 fail = fail && (test != 0.0f);
423 if (!fail) err = 0.0f;
424 }
425
426 // try with first two args as zero
427 if (IsDoubleSubnormal(s2[j]))
428 { // its fun to have fun,
429 correct2 = f->dfunc.f_fff(0.0, 0.0, s3[j]);
430 correct3 = f->dfunc.f_fff(-0.0, 0.0, s3[j]);
431 long double correct4 =
432 f->dfunc.f_fff(0.0, -0.0, s3[j]);
433 long double correct5 =
434 f->dfunc.f_fff(-0.0, -0.0, s3[j]);
435 err2 =
436 Bruteforce_Ulp_Error_Double(test, correct2);
437 err3 =
438 Bruteforce_Ulp_Error_Double(test, correct3);
439 float err4 =
440 Bruteforce_Ulp_Error_Double(test, correct4);
441 float err5 =
442 Bruteforce_Ulp_Error_Double(test, correct5);
443 fail = fail
444 && ((!(fabsf(err2) <= f->double_ulps))
445 && (!(fabsf(err3) <= f->double_ulps))
446 && (!(fabsf(err4) <= f->double_ulps))
447 && (!(fabsf(err5) <= f->double_ulps)));
448 if (fabsf(err2) < fabsf(err)) err = err2;
449 if (fabsf(err3) < fabsf(err)) err = err3;
450 if (fabsf(err4) < fabsf(err)) err = err4;
451 if (fabsf(err5) < fabsf(err)) err = err5;
452
453 // retry per section 6.5.3.4
454 if (IsDoubleResultSubnormal(correct2,
455 f->double_ulps)
456 || IsDoubleResultSubnormal(correct3,
457 f->double_ulps)
458 || IsDoubleResultSubnormal(correct4,
459 f->double_ulps)
460 || IsDoubleResultSubnormal(correct5,
461 f->double_ulps))
462 {
463 fail = fail && (test != 0.0f);
464 if (!fail) err = 0.0f;
465 }
466
467 if (IsDoubleSubnormal(s3[j]))
468 { // but you have to know how!
469 correct2 = f->dfunc.f_fff(0.0, 0.0, 0.0f);
470 correct3 = f->dfunc.f_fff(-0.0, 0.0, 0.0f);
471 correct4 = f->dfunc.f_fff(0.0, -0.0, 0.0f);
472 correct5 = f->dfunc.f_fff(-0.0, -0.0, 0.0f);
473 long double correct6 =
474 f->dfunc.f_fff(0.0, 0.0, -0.0f);
475 long double correct7 =
476 f->dfunc.f_fff(-0.0, 0.0, -0.0f);
477 long double correct8 =
478 f->dfunc.f_fff(0.0, -0.0, -0.0f);
479 long double correct9 =
480 f->dfunc.f_fff(-0.0, -0.0, -0.0f);
481 err2 = Bruteforce_Ulp_Error_Double(
482 test, correct2);
483 err3 = Bruteforce_Ulp_Error_Double(
484 test, correct3);
485 err4 = Bruteforce_Ulp_Error_Double(
486 test, correct4);
487 err5 = Bruteforce_Ulp_Error_Double(
488 test, correct5);
489 float err6 = Bruteforce_Ulp_Error_Double(
490 test, correct6);
491 float err7 = Bruteforce_Ulp_Error_Double(
492 test, correct7);
493 float err8 = Bruteforce_Ulp_Error_Double(
494 test, correct8);
495 float err9 = Bruteforce_Ulp_Error_Double(
496 test, correct9);
497 fail = fail
498 && ((!(fabsf(err2) <= f->double_ulps))
499 && (!(fabsf(err3)
500 <= f->double_ulps))
501 && (!(fabsf(err4)
502 <= f->double_ulps))
503 && (!(fabsf(err5)
504 <= f->double_ulps))
505 && (!(fabsf(err5)
506 <= f->double_ulps))
507 && (!(fabsf(err6)
508 <= f->double_ulps))
509 && (!(fabsf(err7)
510 <= f->double_ulps))
511 && (!(fabsf(err8)
512 <= f->double_ulps)));
513 if (fabsf(err2) < fabsf(err)) err = err2;
514 if (fabsf(err3) < fabsf(err)) err = err3;
515 if (fabsf(err4) < fabsf(err)) err = err4;
516 if (fabsf(err5) < fabsf(err)) err = err5;
517 if (fabsf(err6) < fabsf(err)) err = err6;
518 if (fabsf(err7) < fabsf(err)) err = err7;
519 if (fabsf(err8) < fabsf(err)) err = err8;
520 if (fabsf(err9) < fabsf(err)) err = err9;
521
522 // retry per section 6.5.3.4
523 if (IsDoubleResultSubnormal(correct2,
524 f->double_ulps)
525 || IsDoubleResultSubnormal(
526 correct3, f->double_ulps)
527 || IsDoubleResultSubnormal(
528 correct4, f->double_ulps)
529 || IsDoubleResultSubnormal(
530 correct5, f->double_ulps)
531 || IsDoubleResultSubnormal(
532 correct6, f->double_ulps)
533 || IsDoubleResultSubnormal(
534 correct7, f->double_ulps)
535 || IsDoubleResultSubnormal(
536 correct8, f->double_ulps)
537 || IsDoubleResultSubnormal(
538 correct9, f->double_ulps))
539 {
540 fail = fail && (test != 0.0f);
541 if (!fail) err = 0.0f;
542 }
543 }
544 }
545 else if (IsDoubleSubnormal(s3[j]))
546 {
547 correct2 = f->dfunc.f_fff(0.0, s2[j], 0.0);
548 correct3 = f->dfunc.f_fff(-0.0, s2[j], 0.0);
549 long double correct4 =
550 f->dfunc.f_fff(0.0, s2[j], -0.0);
551 long double correct5 =
552 f->dfunc.f_fff(-0.0, s2[j], -0.0);
553 err2 =
554 Bruteforce_Ulp_Error_Double(test, correct2);
555 err3 =
556 Bruteforce_Ulp_Error_Double(test, correct3);
557 float err4 =
558 Bruteforce_Ulp_Error_Double(test, correct4);
559 float err5 =
560 Bruteforce_Ulp_Error_Double(test, correct5);
561 fail = fail
562 && ((!(fabsf(err2) <= f->double_ulps))
563 && (!(fabsf(err3) <= f->double_ulps))
564 && (!(fabsf(err4) <= f->double_ulps))
565 && (!(fabsf(err5) <= f->double_ulps)));
566 if (fabsf(err2) < fabsf(err)) err = err2;
567 if (fabsf(err3) < fabsf(err)) err = err3;
568 if (fabsf(err4) < fabsf(err)) err = err4;
569 if (fabsf(err5) < fabsf(err)) err = err5;
570
571 // retry per section 6.5.3.4
572 if (IsDoubleResultSubnormal(correct2,
573 f->double_ulps)
574 || IsDoubleResultSubnormal(correct3,
575 f->double_ulps)
576 || IsDoubleResultSubnormal(correct4,
577 f->double_ulps)
578 || IsDoubleResultSubnormal(correct5,
579 f->double_ulps))
580 {
581 fail = fail && (test != 0.0f);
582 if (!fail) err = 0.0f;
583 }
584 }
585 }
586 else if (fail && IsDoubleSubnormal(s2[j]))
587 {
588 long double correct2 =
589 f->dfunc.f_fff(s[j], 0.0, s3[j]);
590 long double correct3 =
591 f->dfunc.f_fff(s[j], -0.0, s3[j]);
592 float err2 =
593 Bruteforce_Ulp_Error_Double(test, correct2);
594 float err3 =
595 Bruteforce_Ulp_Error_Double(test, correct3);
596 fail = fail
597 && ((!(fabsf(err2) <= f->double_ulps))
598 && (!(fabsf(err3) <= f->double_ulps)));
599 if (fabsf(err2) < fabsf(err)) err = err2;
600 if (fabsf(err3) < fabsf(err)) err = err3;
601
602 // retry per section 6.5.3.4
603 if (IsDoubleResultSubnormal(correct2,
604 f->double_ulps)
605 || IsDoubleResultSubnormal(correct3,
606 f->double_ulps))
607 {
608 fail = fail && (test != 0.0f);
609 if (!fail) err = 0.0f;
610 }
611
612 // try with second two args as zero
613 if (IsDoubleSubnormal(s3[j]))
614 {
615 correct2 = f->dfunc.f_fff(s[j], 0.0, 0.0);
616 correct3 = f->dfunc.f_fff(s[j], -0.0, 0.0);
617 long double correct4 =
618 f->dfunc.f_fff(s[j], 0.0, -0.0);
619 long double correct5 =
620 f->dfunc.f_fff(s[j], -0.0, -0.0);
621 err2 =
622 Bruteforce_Ulp_Error_Double(test, correct2);
623 err3 =
624 Bruteforce_Ulp_Error_Double(test, correct3);
625 float err4 =
626 Bruteforce_Ulp_Error_Double(test, correct4);
627 float err5 =
628 Bruteforce_Ulp_Error_Double(test, correct5);
629 fail = fail
630 && ((!(fabsf(err2) <= f->double_ulps))
631 && (!(fabsf(err3) <= f->double_ulps))
632 && (!(fabsf(err4) <= f->double_ulps))
633 && (!(fabsf(err5) <= f->double_ulps)));
634 if (fabsf(err2) < fabsf(err)) err = err2;
635 if (fabsf(err3) < fabsf(err)) err = err3;
636 if (fabsf(err4) < fabsf(err)) err = err4;
637 if (fabsf(err5) < fabsf(err)) err = err5;
638
639 // retry per section 6.5.3.4
640 if (IsDoubleResultSubnormal(correct2,
641 f->double_ulps)
642 || IsDoubleResultSubnormal(correct3,
643 f->double_ulps)
644 || IsDoubleResultSubnormal(correct4,
645 f->double_ulps)
646 || IsDoubleResultSubnormal(correct5,
647 f->double_ulps))
648 {
649 fail = fail && (test != 0.0f);
650 if (!fail) err = 0.0f;
651 }
652 }
653 }
654 else if (fail && IsDoubleSubnormal(s3[j]))
655 {
656 long double correct2 =
657 f->dfunc.f_fff(s[j], s2[j], 0.0);
658 long double correct3 =
659 f->dfunc.f_fff(s[j], s2[j], -0.0);
660 float err2 =
661 Bruteforce_Ulp_Error_Double(test, correct2);
662 float err3 =
663 Bruteforce_Ulp_Error_Double(test, correct3);
664 fail = fail
665 && ((!(fabsf(err2) <= f->double_ulps))
666 && (!(fabsf(err3) <= f->double_ulps)));
667 if (fabsf(err2) < fabsf(err)) err = err2;
668 if (fabsf(err3) < fabsf(err)) err = err3;
669
670 // retry per section 6.5.3.4
671 if (IsDoubleResultSubnormal(correct2,
672 f->double_ulps)
673 || IsDoubleResultSubnormal(correct3,
674 f->double_ulps))
675 {
676 fail = fail && (test != 0.0f);
677 if (!fail) err = 0.0f;
678 }
679 }
680 }
681
682 if (fabsf(err) > maxError)
683 {
684 maxError = fabsf(err);
685 maxErrorVal = s[j];
686 maxErrorVal2 = s2[j];
687 maxErrorVal3 = s3[j];
688 }
689
690 if (fail)
691 {
692 vlog_error("\nERROR: %sD%s: %f ulp error at {%.13la, "
693 "%.13la, %.13la}: *%.13la vs. %.13la\n",
694 f->name, sizeNames[k], err, s[j], s2[j],
695 s3[j], ((double *)gOut_Ref)[j], test);
696 error = -1;
697 goto exit;
698 }
699 }
700 }
701 }
702
703 if (0 == (i & 0x0fffffff))
704 {
705 if (gVerboseBruteForce)
706 {
707 vlog("base:%14u step:%10zu bufferSize:%10zd \n", i, step,
708 BUFFER_SIZE);
709 }
710 else
711 {
712 vlog(".");
713 }
714 fflush(stdout);
715 }
716 }
717
718 if (!gSkipCorrectnessTesting)
719 {
720 if (gWimpyMode)
721 vlog("Wimp pass");
722 else
723 vlog("passed");
724
725 vlog("\t%8.2f @ {%a, %a, %a}", maxError, maxErrorVal, maxErrorVal2,
726 maxErrorVal3);
727 }
728
729 vlog("\n");
730
731 exit:
732 // Release
733 for (auto k = gMinVectorSizeIndex; k < gMaxVectorSizeIndex; k++)
734 {
735 clReleaseKernel(kernels[k]);
736 clReleaseProgram(programs[k]);
737 }
738
739 return error;
740 }
741