1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "function_list.h"
18 #include "sleep.h"
19 #include "utility.h"
20 
21 #include <cstdio>
22 #include <cstdlib>
23 #include <ctime>
24 #include <string>
25 #include <vector>
26 
27 #include "harness/errorHelpers.h"
28 #include "harness/kernelHelpers.h"
29 #include "harness/parseParameters.h"
30 #include "harness/typeWrappers.h"
31 
32 #if defined(__APPLE__)
33 #include <sys/sysctl.h>
34 #include <sys/mman.h>
35 #include <libgen.h>
36 #include <sys/time.h>
37 #elif defined(__linux__)
38 #include <unistd.h>
39 #include <sys/syscall.h>
40 #include <linux/sysctl.h>
41 #include <sys/param.h>
42 #endif
43 
44 #if defined(__linux__) || (defined WIN32 && defined __MINGW32__)
45 #include <sys/param.h>
46 #endif
47 
48 #include "harness/testHarness.h"
49 
50 #define kPageSize 4096
51 #define DOUBLE_REQUIRED_FEATURES                                               \
52     (CL_FP_FMA | CL_FP_ROUND_TO_NEAREST | CL_FP_ROUND_TO_ZERO                  \
53      | CL_FP_ROUND_TO_INF | CL_FP_INF_NAN | CL_FP_DENORM)
54 
55 static std::vector<const char *> gTestNames;
56 static char appName[MAXPATHLEN] = "";
57 cl_device_id gDevice = NULL;
58 cl_context gContext = NULL;
59 cl_command_queue gQueue = NULL;
60 static int32_t gStartTestNumber = -1;
61 static int32_t gEndTestNumber = -1;
62 int gSkipCorrectnessTesting = 0;
63 static int gStopOnError = 0;
64 static bool gSkipRestOfTests;
65 int gForceFTZ = 0;
66 int gWimpyMode = 0;
67 static int gHasDouble = 0;
68 static int gTestFloat = 1;
69 // This flag should be 'ON' by default and it can be changed through the command
70 // line arguments.
71 static int gTestFastRelaxed = 1;
72 /*This flag corresponds to defining if the implementation has Derived Fast
73   Relaxed functions. The spec does not specify ULP for derived function.  The
74   derived functions are composed of base functions which are tested for ULP,
75   thus when this flag is enabled, Derived functions will not be tested for ULP,
76   as per table 7.1 of OpenCL 2.0 spec. Since there is no way of quering the
77   device whether it is a derived or non-derived implementation according to
78   OpenCL 2.0 spec then it has to be changed through a command line argument.
79 */
80 int gFastRelaxedDerived = 1;
81 static int gToggleCorrectlyRoundedDivideSqrt = 0;
82 int gDeviceILogb0 = 1;
83 int gDeviceILogbNaN = 1;
84 int gCheckTininessBeforeRounding = 1;
85 int gIsInRTZMode = 0;
86 uint32_t gMaxVectorSizeIndex = VECTOR_SIZE_COUNT;
87 uint32_t gMinVectorSizeIndex = 0;
88 void *gIn = NULL;
89 void *gIn2 = NULL;
90 void *gIn3 = NULL;
91 void *gOut_Ref = NULL;
92 void *gOut[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
93 void *gOut_Ref2 = NULL;
94 void *gOut2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
95 cl_mem gInBuffer = NULL;
96 cl_mem gInBuffer2 = NULL;
97 cl_mem gInBuffer3 = NULL;
98 cl_mem gOutBuffer[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
99 cl_mem gOutBuffer2[VECTOR_SIZE_COUNT] = { NULL, NULL, NULL, NULL, NULL, NULL };
100 static MTdata gMTdata;
101 cl_device_fp_config gFloatCapabilities = 0;
102 int gWimpyReductionFactor = 32;
103 int gVerboseBruteForce = 0;
104 
105 static int ParseArgs(int argc, const char **argv);
106 static void PrintUsage(void);
107 static void PrintFunctions(void);
108 static test_status InitCL(cl_device_id device);
109 static void ReleaseCL(void);
110 static int InitILogbConstants(void);
111 static int IsTininessDetectedBeforeRounding(void);
112 static int
113 IsInRTZMode(void); // expensive. Please check gIsInRTZMode global instead.
114 
doTest(const char * name)115 static int doTest(const char *name)
116 {
117     if (gSkipRestOfTests)
118     {
119         vlog("Skipping function because of an earlier error.\n");
120         return 1;
121     }
122 
123     int error = 0;
124     const Func *func_data = NULL;
125 
126     for (size_t i = 0; i < functionListCount; i++)
127     {
128         const Func *const temp_func = functionList + i;
129         if (strcmp(temp_func->name, name) == 0)
130         {
131             if ((gStartTestNumber != -1 && i < gStartTestNumber)
132                 || i > gEndTestNumber)
133             {
134                 vlog("Skipping function #%d\n", i);
135                 return 0;
136             }
137 
138             func_data = temp_func;
139             break;
140         }
141     }
142 
143     if (func_data == NULL)
144     {
145         vlog("Function '%s' doesn't exist!\n", name);
146         exit(EXIT_FAILURE);
147     }
148 
149     if (func_data->func.p == NULL)
150     {
151         vlog("'%s' is missing implementation, skipping function.\n",
152              func_data->name);
153         return 0;
154     }
155 
156     // if correctly rounded divide & sqrt are supported by the implementation
157     // then test it; otherwise skip the test
158     if (strcmp(func_data->name, "sqrt_cr") == 0
159         || strcmp(func_data->name, "divide_cr") == 0)
160     {
161         if ((gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT) == 0)
162         {
163             vlog("Correctly rounded divide and sqrt are not supported, "
164                  "skipping function.\n");
165             return 0;
166         }
167     }
168 
169     {
170         extern int my_ilogb(double);
171         if (0 == strcmp("ilogb", func_data->name))
172         {
173             InitILogbConstants();
174         }
175 
176         if (gTestFastRelaxed && func_data->relaxed)
177         {
178             if (get_device_cl_version(gDevice) > Version(1, 2))
179             {
180                 gTestCount++;
181                 vlog("%3d: ", gTestCount);
182                 // Test with relaxed requirements here.
183                 if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
184                                                   true /* relaxed mode */))
185                 {
186                     gFailCount++;
187                     error++;
188                     if (gStopOnError)
189                     {
190                         gSkipRestOfTests = true;
191                         return error;
192                     }
193                 }
194             }
195             else
196             {
197                 vlog("Skipping reduced precision testing for device with "
198                      "version 1.2 or less\n");
199             }
200         }
201 
202         if (gTestFloat)
203         {
204             gTestCount++;
205             vlog("%3d: ", gTestCount);
206             // Don't test with relaxed requirements.
207             if (func_data->vtbl_ptr->TestFunc(func_data, gMTdata,
208                                               false /* relaxed mode */))
209             {
210                 gFailCount++;
211                 error++;
212                 if (gStopOnError)
213                 {
214                     gSkipRestOfTests = true;
215                     return error;
216                 }
217             }
218         }
219 
220         if (gHasDouble && NULL != func_data->vtbl_ptr->DoubleTestFunc
221             && NULL != func_data->dfunc.p)
222         {
223             gTestCount++;
224             vlog("%3d: ", gTestCount);
225             // Don't test with relaxed requirements.
226             if (func_data->vtbl_ptr->DoubleTestFunc(func_data, gMTdata,
227                                                     false /* relaxed mode*/))
228             {
229                 gFailCount++;
230                 error++;
231                 if (gStopOnError)
232                 {
233                     gSkipRestOfTests = true;
234                     return error;
235                 }
236             }
237         }
238     }
239 
240     return error;
241 }
242 
243 
244 #define TEST_LAMBDA(name)                                                      \
245     [](cl_device_id, cl_context, cl_command_queue, int) {                      \
246         return doTest(#name);                                                  \
247     }
248 
249 // Redefine ADD_TEST to use TEST_LAMBDA.
250 #undef ADD_TEST
251 #define ADD_TEST(name)                                                         \
252     {                                                                          \
253         TEST_LAMBDA(name), #name, Version(1, 0)                                \
254     }
255 
256 static test_definition test_list[] = {
257     ADD_TEST(acos),          ADD_TEST(acosh),      ADD_TEST(acospi),
258     ADD_TEST(asin),          ADD_TEST(asinh),      ADD_TEST(asinpi),
259     ADD_TEST(atan),          ADD_TEST(atanh),      ADD_TEST(atanpi),
260     ADD_TEST(atan2),         ADD_TEST(atan2pi),    ADD_TEST(cbrt),
261     ADD_TEST(ceil),          ADD_TEST(copysign),   ADD_TEST(cos),
262     ADD_TEST(cosh),          ADD_TEST(cospi),      ADD_TEST(exp),
263     ADD_TEST(exp2),          ADD_TEST(exp10),      ADD_TEST(expm1),
264     ADD_TEST(fabs),          ADD_TEST(fdim),       ADD_TEST(floor),
265     ADD_TEST(fma),           ADD_TEST(fmax),       ADD_TEST(fmin),
266     ADD_TEST(fmod),          ADD_TEST(fract),      ADD_TEST(frexp),
267     ADD_TEST(hypot),         ADD_TEST(ilogb),      ADD_TEST(isequal),
268     ADD_TEST(isfinite),      ADD_TEST(isgreater),  ADD_TEST(isgreaterequal),
269     ADD_TEST(isinf),         ADD_TEST(isless),     ADD_TEST(islessequal),
270     ADD_TEST(islessgreater), ADD_TEST(isnan),      ADD_TEST(isnormal),
271     ADD_TEST(isnotequal),    ADD_TEST(isordered),  ADD_TEST(isunordered),
272     ADD_TEST(ldexp),         ADD_TEST(lgamma),     ADD_TEST(lgamma_r),
273     ADD_TEST(log),           ADD_TEST(log2),       ADD_TEST(log10),
274     ADD_TEST(log1p),         ADD_TEST(logb),       ADD_TEST(mad),
275     ADD_TEST(maxmag),        ADD_TEST(minmag),     ADD_TEST(modf),
276     ADD_TEST(nan),           ADD_TEST(nextafter),  ADD_TEST(pow),
277     ADD_TEST(pown),          ADD_TEST(powr),       ADD_TEST(remainder),
278     ADD_TEST(remquo),        ADD_TEST(rint),       ADD_TEST(rootn),
279     ADD_TEST(round),         ADD_TEST(rsqrt),      ADD_TEST(signbit),
280     ADD_TEST(sin),           ADD_TEST(sincos),     ADD_TEST(sinh),
281     ADD_TEST(sinpi),         ADD_TEST(sqrt),       ADD_TEST(sqrt_cr),
282     ADD_TEST(tan),           ADD_TEST(tanh),       ADD_TEST(tanpi),
283     ADD_TEST(trunc),         ADD_TEST(half_cos),   ADD_TEST(half_divide),
284     ADD_TEST(half_exp),      ADD_TEST(half_exp2),  ADD_TEST(half_exp10),
285     ADD_TEST(half_log),      ADD_TEST(half_log2),  ADD_TEST(half_log10),
286     ADD_TEST(half_powr),     ADD_TEST(half_recip), ADD_TEST(half_rsqrt),
287     ADD_TEST(half_sin),      ADD_TEST(half_sqrt),  ADD_TEST(half_tan),
288     ADD_TEST(add),           ADD_TEST(subtract),   ADD_TEST(divide),
289     ADD_TEST(divide_cr),     ADD_TEST(multiply),   ADD_TEST(assignment),
290     ADD_TEST(not),
291 };
292 
293 #undef ADD_TEST
294 #undef TEST_LAMBDA
295 
296 static const int test_num = ARRAY_SIZE(test_list);
297 
298 #pragma mark -
299 
main(int argc,const char * argv[])300 int main(int argc, const char *argv[])
301 {
302     int error;
303 
304     argc = parseCustomParam(argc, argv);
305     if (argc == -1)
306     {
307         return -1;
308     }
309 
310     error = ParseArgs(argc, argv);
311     if (error) return error;
312 
313     // This takes a while, so prevent the machine from going to sleep.
314     PreventSleep();
315     atexit(ResumeSleep);
316 
317     if (gSkipCorrectnessTesting)
318         vlog("*** Skipping correctness testing! ***\n\n");
319     else if (gStopOnError)
320         vlog("Stopping at first error.\n");
321 
322     vlog("   \t                                        ");
323     if (gWimpyMode) vlog("   ");
324     if (!gSkipCorrectnessTesting) vlog("\t  max_ulps");
325 
326     vlog("\n-------------------------------------------------------------------"
327          "----------------------------------------\n");
328 
329     gMTdata = init_genrand(gRandomSeed);
330 
331     FPU_mode_type oldMode;
332     DisableFTZ(&oldMode);
333 
334     int ret = runTestHarnessWithCheck(gTestNames.size(), gTestNames.data(),
335                                       test_num, test_list, true, 0, InitCL);
336 
337     RestoreFPState(&oldMode);
338 
339     free_mtdata(gMTdata);
340 
341     if (gQueue)
342     {
343         int error_code = clFinish(gQueue);
344         if (error_code) vlog_error("clFinish failed:%d\n", error_code);
345     }
346 
347     ReleaseCL();
348 
349     return ret;
350 }
351 
ParseArgs(int argc,const char ** argv)352 static int ParseArgs(int argc, const char **argv)
353 {
354     // We only pass test names to runTestHarnessWithCheck, hence global command
355     // line options defined by the harness cannot be used by the user.
356     // To respect the implementation details of runTestHarnessWithCheck,
357     // gTestNames[0] has to exist although its value is not important.
358     gTestNames.push_back("");
359 
360     int singleThreaded = 0;
361 
362     { // Extract the app name
363         strncpy(appName, argv[0], MAXPATHLEN);
364 
365 #if defined(__APPLE__)
366         char baseName[MAXPATHLEN];
367         char *base = NULL;
368         strncpy(baseName, argv[0], MAXPATHLEN);
369         base = basename(baseName);
370         if (NULL != base)
371         {
372             strncpy(appName, base, sizeof(appName));
373             appName[sizeof(appName) - 1] = '\0';
374         }
375 #endif
376     }
377 
378     vlog("\n%s\t", appName);
379     for (int i = 1; i < argc; i++)
380     {
381         const char *arg = argv[i];
382         if (NULL == arg) break;
383 
384         vlog("\t%s", arg);
385         int optionFound = 0;
386         if (arg[0] == '-')
387         {
388             while (arg[1] != '\0')
389             {
390                 arg++;
391                 optionFound = 1;
392                 switch (*arg)
393                 {
394                     case 'c': gToggleCorrectlyRoundedDivideSqrt ^= 1; break;
395 
396                     case 'd': gHasDouble ^= 1; break;
397 
398                     case 'e': gFastRelaxedDerived ^= 1; break;
399 
400                     case 'f': gTestFloat ^= 1; break;
401 
402                     case 'h': PrintUsage(); return -1;
403 
404                     case 'p': PrintFunctions(); return -1;
405 
406                     case 'l': gSkipCorrectnessTesting ^= 1; break;
407 
408                     case 'm': singleThreaded ^= 1; break;
409 
410                     case 'r': gTestFastRelaxed ^= 1; break;
411 
412                     case 's': gStopOnError ^= 1; break;
413 
414                     case 'v': gVerboseBruteForce ^= 1; break;
415 
416                     case 'w': // wimpy mode
417                         gWimpyMode ^= 1;
418                         break;
419 
420                     case '[':
421                         parseWimpyReductionFactor(arg, gWimpyReductionFactor);
422                         break;
423 
424                     case 'z': gForceFTZ ^= 1; break;
425 
426                     case '1':
427                         if (arg[1] == '6')
428                         {
429                             gMinVectorSizeIndex = 5;
430                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
431                             arg++;
432                         }
433                         else
434                         {
435                             gMinVectorSizeIndex = 0;
436                             gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
437                         }
438                         break;
439                     case '2':
440                         gMinVectorSizeIndex = 1;
441                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
442                         break;
443                     case '3':
444                         gMinVectorSizeIndex = 2;
445                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
446                         break;
447                     case '4':
448                         gMinVectorSizeIndex = 3;
449                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
450                         break;
451                     case '8':
452                         gMinVectorSizeIndex = 4;
453                         gMaxVectorSizeIndex = gMinVectorSizeIndex + 1;
454                         break;
455 
456                     default:
457                         vlog(" <-- unknown flag: %c (0x%2.2x)\n)", *arg, *arg);
458                         PrintUsage();
459                         return -1;
460                 }
461             }
462         }
463 
464         if (!optionFound)
465         {
466             char *t = NULL;
467             long number = strtol(arg, &t, 0);
468             if (t != arg)
469             {
470                 if (-1 == gStartTestNumber)
471                     gStartTestNumber = (int32_t)number;
472                 else
473                     gEndTestNumber = gStartTestNumber + (int32_t)number;
474             }
475             else
476             {
477                 // Make sure this is a valid name
478                 unsigned int k;
479                 for (k = 0; k < functionListCount; k++)
480                 {
481                     const Func *f = functionList + k;
482                     if (strcmp(arg, f->name) == 0)
483                     {
484                         gTestNames.push_back(arg);
485                         break;
486                     }
487                 }
488                 // If we didn't find it in the list of test names
489                 if (k >= functionListCount)
490                 {
491                     gTestNames.push_back(arg);
492                 }
493             }
494         }
495     }
496 
497     // Check for the wimpy mode environment variable
498     if (getenv("CL_WIMPY_MODE"))
499     {
500         vlog("\n");
501         vlog("*** Detected CL_WIMPY_MODE env                          ***\n");
502         gWimpyMode = 1;
503     }
504 
505     vlog("\nTest binary built %s %s\n", __DATE__, __TIME__);
506 
507     PrintArch();
508 
509     if (gWimpyMode)
510     {
511         vlog("\n");
512         vlog("*** WARNING: Testing in Wimpy mode!                     ***\n");
513         vlog("*** Wimpy mode is not sufficient to verify correctness. ***\n");
514         vlog("*** Wimpy Reduction Factor: %-27u ***\n\n",
515              gWimpyReductionFactor);
516     }
517 
518     if (singleThreaded) SetThreadCount(1);
519 
520     return 0;
521 }
522 
523 
PrintFunctions(void)524 static void PrintFunctions(void)
525 {
526     vlog("\nMath function names:\n");
527     for (int i = 0; i < functionListCount; i++)
528     {
529         vlog("\t%s\n", functionList[i].name);
530     }
531 }
532 
PrintUsage(void)533 static void PrintUsage(void)
534 {
535     vlog("%s [-cglsz]: <optional: math function names>\n", appName);
536     vlog("\toptions:\n");
537     vlog("\t\t-c\tToggle test fp correctly rounded divide and sqrt (Default: "
538          "off)\n");
539     vlog("\t\t-d\tToggle double precision testing. (Default: on iff khr_fp_64 "
540          "on)\n");
541     vlog("\t\t-f\tToggle float precision testing. (Default: on)\n");
542     vlog("\t\t-r\tToggle fast relaxed math precision testing. (Default: on)\n");
543     vlog("\t\t-e\tToggle test as derived implementations for fast relaxed math "
544          "precision. (Default: on)\n");
545     vlog("\t\t-h\tPrint this message and quit\n");
546     vlog("\t\t-p\tPrint all math function names and quit\n");
547     vlog("\t\t-l\tlink check only (make sure functions are present, skip "
548          "accuracy checks.)\n");
549     vlog("\t\t-m\tToggle run multi-threaded. (Default: on) )\n");
550     vlog("\t\t-s\tStop on error\n");
551     vlog("\t\t-w\tToggle Wimpy Mode, * Not a valid test * \n");
552     vlog("\t\t-[2^n]\tSet wimpy reduction factor, recommended range of n is "
553          "1-10, default factor(%u)\n",
554          gWimpyReductionFactor);
555     vlog("\t\t-z\tToggle FTZ mode (Section 6.5.3) for all functions. (Set by "
556          "device capabilities by default.)\n");
557     vlog("\t\t-v\tToggle Verbosity (Default: off)\n ");
558     vlog("\t\t-#\tTest only vector sizes #, e.g. \"-1\" tests scalar only, "
559          "\"-16\" tests 16-wide vectors only.\n");
560     vlog("\n\tYou may also pass a number instead of a function name.\n");
561     vlog("\tThis causes the first N tests to be skipped. The tests are "
562          "numbered.\n");
563     vlog("\tIf you pass a second number, that is the number tests to run after "
564          "the first one.\n");
565     vlog("\tA name list may be used in conjunction with a number range. In "
566          "that case,\n");
567     vlog("\tonly the named cases in the number range will run.\n");
568     vlog("\tYou may also choose to pass no arguments, in which case all tests "
569          "will be run.\n");
570     vlog("\tYou may pass CL_DEVICE_TYPE_CPU/GPU/ACCELERATOR to select the "
571          "device.\n");
572     vlog("\n");
573 }
574 
bruteforce_notify_callback(const char * errinfo,const void * private_info,size_t cb,void * user_data)575 static void CL_CALLBACK bruteforce_notify_callback(const char *errinfo,
576                                                    const void *private_info,
577                                                    size_t cb, void *user_data)
578 {
579     vlog("%s  (%p, %zd, %p)\n", errinfo, private_info, cb, user_data);
580 }
581 
InitCL(cl_device_id device)582 test_status InitCL(cl_device_id device)
583 {
584     int error;
585     uint32_t i;
586     cl_device_type device_type;
587 
588     error = clGetDeviceInfo(device, CL_DEVICE_TYPE, sizeof(device_type),
589                             &device_type, NULL);
590     if (error)
591     {
592         print_error(error, "Unable to get device type");
593         return TEST_FAIL;
594     }
595 
596     gDevice = device;
597 
598     // Check extensions
599     if (is_extension_available(gDevice, "cl_khr_fp64"))
600     {
601         gHasDouble ^= 1;
602 #if defined(CL_DEVICE_DOUBLE_FP_CONFIG)
603         cl_device_fp_config doubleCapabilities = 0;
604         if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_DOUBLE_FP_CONFIG,
605                                      sizeof(doubleCapabilities),
606                                      &doubleCapabilities, NULL)))
607         {
608             vlog_error("ERROR: Unable to get device "
609                        "CL_DEVICE_DOUBLE_FP_CONFIG. (%d)\n",
610                        error);
611             return TEST_FAIL;
612         }
613 
614         if (DOUBLE_REQUIRED_FEATURES
615             != (doubleCapabilities & DOUBLE_REQUIRED_FEATURES))
616         {
617             std::string list;
618             if (0 == (doubleCapabilities & CL_FP_FMA)) list += "CL_FP_FMA, ";
619             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_NEAREST))
620                 list += "CL_FP_ROUND_TO_NEAREST, ";
621             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_ZERO))
622                 list += "CL_FP_ROUND_TO_ZERO, ";
623             if (0 == (doubleCapabilities & CL_FP_ROUND_TO_INF))
624                 list += "CL_FP_ROUND_TO_INF, ";
625             if (0 == (doubleCapabilities & CL_FP_INF_NAN))
626                 list += "CL_FP_INF_NAN, ";
627             if (0 == (doubleCapabilities & CL_FP_DENORM))
628                 list += "CL_FP_DENORM, ";
629             vlog_error("ERROR: required double features are missing: %s\n",
630                        list.c_str());
631 
632             return TEST_FAIL;
633         }
634 #else
635         vlog_error("FAIL: device says it supports cl_khr_fp64 but "
636                    "CL_DEVICE_DOUBLE_FP_CONFIG is not in the headers!\n");
637         return TEST_FAIL;
638 #endif
639     }
640 
641     uint32_t deviceFrequency = 0;
642     size_t configSize = sizeof(deviceFrequency);
643     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_MAX_CLOCK_FREQUENCY,
644                                  configSize, &deviceFrequency, NULL)))
645         deviceFrequency = 0;
646 
647     if ((error = clGetDeviceInfo(gDevice, CL_DEVICE_SINGLE_FP_CONFIG,
648                                  sizeof(gFloatCapabilities),
649                                  &gFloatCapabilities, NULL)))
650     {
651         vlog_error(
652             "ERROR: Unable to get device CL_DEVICE_SINGLE_FP_CONFIG. (%d)\n",
653             error);
654         return TEST_FAIL;
655     }
656 
657     gContext = clCreateContext(NULL, 1, &gDevice, bruteforce_notify_callback,
658                                NULL, &error);
659     if (NULL == gContext || error)
660     {
661         vlog_error("clCreateContext failed. (%d) \n", error);
662         return TEST_FAIL;
663     }
664 
665     gQueue = clCreateCommandQueue(gContext, gDevice, 0, &error);
666     if (NULL == gQueue || error)
667     {
668         vlog_error("clCreateCommandQueue failed. (%d)\n", error);
669         return TEST_FAIL;
670     }
671 
672     // Allocate buffers
673     cl_uint min_alignment = 0;
674     error = clGetDeviceInfo(gDevice, CL_DEVICE_MEM_BASE_ADDR_ALIGN,
675                             sizeof(cl_uint), (void *)&min_alignment, NULL);
676     if (CL_SUCCESS != error)
677     {
678         vlog_error("clGetDeviceInfo failed. (%d)\n", error);
679         return TEST_FAIL;
680     }
681     min_alignment >>= 3; // convert bits to bytes
682 
683     gIn = align_malloc(BUFFER_SIZE, min_alignment);
684     if (NULL == gIn) return TEST_FAIL;
685     gIn2 = align_malloc(BUFFER_SIZE, min_alignment);
686     if (NULL == gIn2) return TEST_FAIL;
687     gIn3 = align_malloc(BUFFER_SIZE, min_alignment);
688     if (NULL == gIn3) return TEST_FAIL;
689     gOut_Ref = align_malloc(BUFFER_SIZE, min_alignment);
690     if (NULL == gOut_Ref) return TEST_FAIL;
691     gOut_Ref2 = align_malloc(BUFFER_SIZE, min_alignment);
692     if (NULL == gOut_Ref2) return TEST_FAIL;
693 
694     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
695     {
696         gOut[i] = align_malloc(BUFFER_SIZE, min_alignment);
697         if (NULL == gOut[i]) return TEST_FAIL;
698         gOut2[i] = align_malloc(BUFFER_SIZE, min_alignment);
699         if (NULL == gOut2[i]) return TEST_FAIL;
700     }
701 
702     cl_mem_flags device_flags = CL_MEM_READ_ONLY;
703     // save a copy on the host device to make this go faster
704     if (CL_DEVICE_TYPE_CPU == device_type)
705         device_flags |= CL_MEM_USE_HOST_PTR;
706     else
707         device_flags |= CL_MEM_COPY_HOST_PTR;
708 
709     // setup input buffers
710     gInBuffer =
711         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn, &error);
712     if (gInBuffer == NULL || error)
713     {
714         vlog_error("clCreateBuffer1 failed for input (%d)\n", error);
715         return TEST_FAIL;
716     }
717 
718     gInBuffer2 =
719         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn2, &error);
720     if (gInBuffer2 == NULL || error)
721     {
722         vlog_error("clCreateBuffer2 failed for input (%d)\n", error);
723         return TEST_FAIL;
724     }
725 
726     gInBuffer3 =
727         clCreateBuffer(gContext, device_flags, BUFFER_SIZE, gIn3, &error);
728     if (gInBuffer3 == NULL || error)
729     {
730         vlog_error("clCreateBuffer3 failed for input (%d)\n", error);
731         return TEST_FAIL;
732     }
733 
734 
735     // setup output buffers
736     device_flags = CL_MEM_READ_WRITE;
737     // save a copy on the host device to make this go faster
738     if (CL_DEVICE_TYPE_CPU == device_type)
739         device_flags |= CL_MEM_USE_HOST_PTR;
740     else
741         device_flags |= CL_MEM_COPY_HOST_PTR;
742     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
743     {
744         gOutBuffer[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
745                                        gOut[i], &error);
746         if (gOutBuffer[i] == NULL || error)
747         {
748             vlog_error("clCreateBuffer failed for output (%d)\n", error);
749             return TEST_FAIL;
750         }
751         gOutBuffer2[i] = clCreateBuffer(gContext, device_flags, BUFFER_SIZE,
752                                         gOut2[i], &error);
753         if (gOutBuffer2[i] == NULL || error)
754         {
755             vlog_error("clCreateBuffer2 failed for output (%d)\n", error);
756             return TEST_FAIL;
757         }
758     }
759 
760     // we are embedded, check current rounding mode
761     if (gIsEmbedded)
762     {
763         gIsInRTZMode = IsInRTZMode();
764     }
765 
766     // Check tininess detection
767     IsTininessDetectedBeforeRounding();
768 
769     cl_platform_id platform;
770     int err = clGetPlatformIDs(1, &platform, NULL);
771     if (err)
772     {
773         print_error(err, "clGetPlatformIDs failed");
774         return TEST_FAIL;
775     }
776 
777     char c[1024];
778     static const char *no_yes[] = { "NO", "YES" };
779     vlog("\nCompute Device info:\n");
780     clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(c), &c, NULL);
781     vlog("\tPlatform Version: %s\n", c);
782     clGetDeviceInfo(gDevice, CL_DEVICE_NAME, sizeof(c), &c, NULL);
783     vlog("\tDevice Name: %s\n", c);
784     clGetDeviceInfo(gDevice, CL_DEVICE_VENDOR, sizeof(c), &c, NULL);
785     vlog("\tVendor: %s\n", c);
786     clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(c), &c, NULL);
787     vlog("\tDevice Version: %s\n", c);
788     clGetDeviceInfo(gDevice, CL_DEVICE_OPENCL_C_VERSION, sizeof(c), &c, NULL);
789     vlog("\tCL C Version: %s\n", c);
790     clGetDeviceInfo(gDevice, CL_DRIVER_VERSION, sizeof(c), &c, NULL);
791     vlog("\tDriver Version: %s\n", c);
792     vlog("\tDevice Frequency: %d MHz\n", deviceFrequency);
793     vlog("\tSubnormal values supported for floats? %s\n",
794          no_yes[0 != (CL_FP_DENORM & gFloatCapabilities)]);
795     vlog("\tCorrectly rounded divide and sqrt supported for floats? %s\n",
796          no_yes[0
797                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
798     if (gToggleCorrectlyRoundedDivideSqrt)
799     {
800         gFloatCapabilities ^= CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT;
801     }
802     vlog("\tTesting with correctly rounded float divide and sqrt? %s\n",
803          no_yes[0
804                 != (CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT & gFloatCapabilities)]);
805     vlog("\tTesting with FTZ mode ON for floats? %s\n",
806          no_yes[0 != gForceFTZ || 0 == (CL_FP_DENORM & gFloatCapabilities)]);
807     vlog("\tTesting single precision? %s\n", no_yes[0 != gTestFloat]);
808     vlog("\tTesting fast relaxed math? %s\n", no_yes[0 != gTestFastRelaxed]);
809     if (gTestFastRelaxed)
810     {
811         vlog("\tFast relaxed math has derived implementations? %s\n",
812              no_yes[0 != gFastRelaxedDerived]);
813     }
814     vlog("\tTesting double precision? %s\n", no_yes[0 != gHasDouble]);
815     if (sizeof(long double) == sizeof(double) && gHasDouble)
816     {
817         vlog("\n\t\tWARNING: Host system long double does not have better "
818              "precision than double!\n");
819         vlog("\t\t         All double results that do not match the reference "
820              "result have their reported\n");
821         vlog("\t\t         error inflated by 0.5 ulps to account for the fact "
822              "that this system\n");
823         vlog("\t\t         can not accurately represent the right result to an "
824              "accuracy closer\n");
825         vlog("\t\t         than half an ulp. See comments in "
826              "Bruteforce_Ulp_Error_Double() for more details.\n\n");
827     }
828 
829     vlog("\tIs Embedded? %s\n", no_yes[0 != gIsEmbedded]);
830     if (gIsEmbedded)
831         vlog("\tRunning in RTZ mode? %s\n", no_yes[0 != gIsInRTZMode]);
832     vlog("\tTininess is detected before rounding? %s\n",
833          no_yes[0 != gCheckTininessBeforeRounding]);
834     vlog("\tWorker threads: %d\n", GetThreadCount());
835     vlog("\tTesting vector sizes:");
836     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
837         vlog("\t%d", sizeValues[i]);
838 
839     vlog("\n");
840     vlog("\tVerbose? %s\n", no_yes[0 != gVerboseBruteForce]);
841     vlog("\n\n");
842 
843     // Check to see if we are using single threaded mode on other than a 1.0
844     // device
845     if (getenv("CL_TEST_SINGLE_THREADED"))
846     {
847 
848         char device_version[1024] = { 0 };
849         clGetDeviceInfo(gDevice, CL_DEVICE_VERSION, sizeof(device_version),
850                         device_version, NULL);
851 
852         if (strcmp("OpenCL 1.0 ", device_version))
853         {
854             vlog("ERROR: CL_TEST_SINGLE_THREADED is set in the environment. "
855                  "Running single threaded.\n");
856         }
857     }
858 
859     return TEST_PASS;
860 }
861 
ReleaseCL(void)862 static void ReleaseCL(void)
863 {
864     uint32_t i;
865     clReleaseMemObject(gInBuffer);
866     clReleaseMemObject(gInBuffer2);
867     clReleaseMemObject(gInBuffer3);
868     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
869     {
870         clReleaseMemObject(gOutBuffer[i]);
871         clReleaseMemObject(gOutBuffer2[i]);
872     }
873     clReleaseCommandQueue(gQueue);
874     clReleaseContext(gContext);
875 
876     align_free(gIn);
877     align_free(gIn2);
878     align_free(gIn3);
879     align_free(gOut_Ref);
880     align_free(gOut_Ref2);
881 
882     for (i = gMinVectorSizeIndex; i < gMaxVectorSizeIndex; i++)
883     {
884         align_free(gOut[i]);
885         align_free(gOut2[i]);
886     }
887 }
888 
_LogBuildError(cl_program p,int line,const char * file)889 void _LogBuildError(cl_program p, int line, const char *file)
890 {
891     char the_log[2048] = "";
892 
893     vlog_error("%s:%d: Build Log:\n", file, line);
894     if (0
895         == clGetProgramBuildInfo(p, gDevice, CL_PROGRAM_BUILD_LOG,
896                                  sizeof(the_log), the_log, NULL))
897         vlog_error("%s", the_log);
898     else
899         vlog_error("*** Error getting build log for program %p\n", p);
900 }
901 
InitILogbConstants(void)902 int InitILogbConstants(void)
903 {
904     int error;
905     const char *kernelSource =
906         R"(__kernel void GetILogBConstants( __global int *out )
907         {
908             out[0] = FP_ILOGB0;
909             out[1] = FP_ILOGBNAN;
910         })";
911 
912     clProgramWrapper query;
913     clKernelWrapper kernel;
914     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
915                                         &kernelSource, "GetILogBConstants");
916     if (error != CL_SUCCESS)
917     {
918         vlog_error("Error: Unable to create kernel to get FP_ILOGB0 and "
919                    "FP_ILOGBNAN for the device. (%d)",
920                    error);
921         return error;
922     }
923 
924     if ((error =
925              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
926                             &gOutBuffer[gMinVectorSizeIndex])))
927     {
928         vlog_error("Error: Unable to set kernel arg to get FP_ILOGB0 and "
929                    "FP_ILOGBNAN for the device. Err = %d",
930                    error);
931         return error;
932     }
933 
934     size_t dim = 1;
935     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
936                                         NULL, NULL)))
937     {
938         vlog_error("Error: Unable to execute kernel to get FP_ILOGB0 and "
939                    "FP_ILOGBNAN for the device. Err = %d",
940                    error);
941         return error;
942     }
943 
944     struct
945     {
946         cl_int ilogb0, ilogbnan;
947     } data;
948     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
949                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
950                                      NULL)))
951     {
952         vlog_error("Error: unable to read FP_ILOGB0 and FP_ILOGBNAN from the "
953                    "device. Err = %d",
954                    error);
955         return error;
956     }
957 
958     gDeviceILogb0 = data.ilogb0;
959     gDeviceILogbNaN = data.ilogbnan;
960 
961     return 0;
962 }
963 
IsTininessDetectedBeforeRounding(void)964 int IsTininessDetectedBeforeRounding(void)
965 {
966     int error;
967     const char *kernelSource =
968         R"(__kernel void IsTininessDetectedBeforeRounding( __global float *out )
969         {
970            volatile float a = 0x1.000002p-126f;
971            volatile float b = 0x1.fffffcp-1f;
972            out[0] = a * b; // product is 0x1.fffffffffff8p-127
973         })";
974 
975     clProgramWrapper query;
976     clKernelWrapper kernel;
977     error =
978         create_single_kernel_helper(gContext, &query, &kernel, 1, &kernelSource,
979                                     "IsTininessDetectedBeforeRounding");
980     if (error != CL_SUCCESS)
981     {
982         vlog_error("Error: Unable to create kernel to detect how tininess is "
983                    "detected for the device. (%d)",
984                    error);
985         return error;
986     }
987 
988     if ((error =
989              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
990                             &gOutBuffer[gMinVectorSizeIndex])))
991     {
992         vlog_error("Error: Unable to set kernel arg to detect how tininess is "
993                    "detected  for the device. Err = %d",
994                    error);
995         return error;
996     }
997 
998     size_t dim = 1;
999     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1000                                         NULL, NULL)))
1001     {
1002         vlog_error("Error: Unable to execute kernel to detect how tininess is "
1003                    "detected  for the device. Err = %d",
1004                    error);
1005         return error;
1006     }
1007 
1008     struct
1009     {
1010         cl_uint f;
1011     } data;
1012     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1013                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1014                                      NULL)))
1015     {
1016         vlog_error("Error: unable to read result from tininess test from the "
1017                    "device. Err = %d",
1018                    error);
1019         return error;
1020     }
1021 
1022     gCheckTininessBeforeRounding = 0 == (data.f & 0x7fffffff);
1023 
1024     return 0;
1025 }
1026 
1027 
MakeKernel(const char ** c,cl_uint count,const char * name,cl_kernel * k,cl_program * p,bool relaxedMode)1028 int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
1029                cl_program *p, bool relaxedMode)
1030 {
1031     int error = 0;
1032     char options[200] = "";
1033 
1034     if (gForceFTZ)
1035     {
1036         strcat(options, " -cl-denorms-are-zero");
1037     }
1038 
1039     if (relaxedMode)
1040     {
1041         strcat(options, " -cl-fast-relaxed-math");
1042     }
1043 
1044     error =
1045         create_single_kernel_helper(gContext, p, k, count, c, name, options);
1046     if (error != CL_SUCCESS)
1047     {
1048         vlog_error("\t\tFAILED -- Failed to create kernel. (%d)\n", error);
1049         return error;
1050     }
1051 
1052     return error;
1053 }
1054 
MakeKernels(const char ** c,cl_uint count,const char * name,cl_uint kernel_count,cl_kernel * k,cl_program * p,bool relaxedMode)1055 int MakeKernels(const char **c, cl_uint count, const char *name,
1056                 cl_uint kernel_count, cl_kernel *k, cl_program *p,
1057                 bool relaxedMode)
1058 {
1059     int error = 0;
1060     cl_uint i;
1061     char options[200] = "";
1062 
1063     if (gForceFTZ)
1064     {
1065         strcat(options, " -cl-denorms-are-zero ");
1066     }
1067 
1068     if (gFloatCapabilities & CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT)
1069     {
1070         strcat(options, " -cl-fp32-correctly-rounded-divide-sqrt ");
1071     }
1072 
1073     if (relaxedMode)
1074     {
1075         strcat(options, " -cl-fast-relaxed-math");
1076     }
1077 
1078     error =
1079         create_single_kernel_helper(gContext, p, NULL, count, c, NULL, options);
1080     if (error != CL_SUCCESS)
1081     {
1082         vlog_error("\t\tFAILED -- Failed to create program. (%d)\n", error);
1083         return error;
1084     }
1085 
1086 
1087     memset(k, 0, kernel_count * sizeof(*k));
1088     for (i = 0; i < kernel_count; i++)
1089     {
1090         k[i] = clCreateKernel(*p, name, &error);
1091         if (NULL == k[i] || error)
1092         {
1093             char buffer[2048] = "";
1094 
1095             vlog_error("\t\tFAILED -- clCreateKernel() failed: (%d)\n", error);
1096             clGetProgramBuildInfo(*p, gDevice, CL_PROGRAM_BUILD_LOG,
1097                                   sizeof(buffer), buffer, NULL);
1098             vlog_error("Log: %s\n", buffer);
1099             clReleaseProgram(*p);
1100             return error;
1101         }
1102     }
1103 
1104     return error;
1105 }
1106 
1107 
IsInRTZMode(void)1108 static int IsInRTZMode(void)
1109 {
1110     int error;
1111     const char *kernelSource =
1112         R"(__kernel void GetRoundingMode( __global int *out )
1113         {
1114             volatile float a = 0x1.0p23f;
1115             volatile float b = -0x1.0p23f;
1116             out[0] = (a + 0x1.fffffep-1f == a) && (b - 0x1.fffffep-1f == b);
1117         })";
1118 
1119     clProgramWrapper query;
1120     clKernelWrapper kernel;
1121     error = create_single_kernel_helper(gContext, &query, &kernel, 1,
1122                                         &kernelSource, "GetRoundingMode");
1123     if (error != CL_SUCCESS)
1124     {
1125         vlog_error("Error: Unable to create kernel to detect RTZ mode for the "
1126                    "device. (%d)",
1127                    error);
1128         return error;
1129     }
1130 
1131     if ((error =
1132              clSetKernelArg(kernel, 0, sizeof(gOutBuffer[gMinVectorSizeIndex]),
1133                             &gOutBuffer[gMinVectorSizeIndex])))
1134     {
1135         vlog_error("Error: Unable to set kernel arg to detect RTZ mode for the "
1136                    "device. Err = %d",
1137                    error);
1138         return error;
1139     }
1140 
1141     size_t dim = 1;
1142     if ((error = clEnqueueNDRangeKernel(gQueue, kernel, 1, NULL, &dim, NULL, 0,
1143                                         NULL, NULL)))
1144     {
1145         vlog_error("Error: Unable to execute kernel to detect RTZ mode for the "
1146                    "device. Err = %d",
1147                    error);
1148         return error;
1149     }
1150 
1151     struct
1152     {
1153         cl_int isRTZ;
1154     } data;
1155     if ((error = clEnqueueReadBuffer(gQueue, gOutBuffer[gMinVectorSizeIndex],
1156                                      CL_TRUE, 0, sizeof(data), &data, 0, NULL,
1157                                      NULL)))
1158     {
1159         vlog_error(
1160             "Error: unable to read RTZ mode data from the device. Err = %d",
1161             error);
1162         return error;
1163     }
1164 
1165     return data.isRTZ;
1166 }
1167 
1168 #pragma mark -
1169 
1170 const char *sizeNames[VECTOR_SIZE_COUNT] = { "", "2", "3", "4", "8", "16" };
1171 const int sizeValues[VECTOR_SIZE_COUNT] = { 1, 2, 3, 4, 8, 16 };
1172 
1173 // TODO: There is another version of Ulp_Error_Double defined in
1174 // test_common/harness/errorHelpers.c
Bruteforce_Ulp_Error_Double(double test,long double reference)1175 float Bruteforce_Ulp_Error_Double(double test, long double reference)
1176 {
1177     // Check for Non-power-of-two and NaN
1178 
1179     // Note: This function presumes that someone has already tested whether the
1180     // result is correctly, rounded before calling this function.  That test:
1181     //
1182     //    if( (float) reference == test )
1183     //        return 0.0f;
1184     //
1185     // would ensure that cases like fabs(reference) > FLT_MAX are weeded out
1186     // before we get here. Otherwise, we'll return inf ulp error here, for what
1187     // are otherwise correctly rounded results.
1188 
1189     // Deal with long double = double
1190     // On most systems long double is a higher precision type than double. They
1191     // provide either a 80-bit or greater floating point type, or they provide a
1192     // head-tail double double format. That is sufficient to represent the
1193     // accuracy of a floating point result to many more bits than double and we
1194     // can calculate sub-ulp errors. This is the standard system for which this
1195     // test suite is designed.
1196     //
1197     // On some systems double and long double are the same thing. Then we run
1198     // into a problem, because our representation of the infinitely precise
1199     // result (passed in as reference above) can be off by as much as a half
1200     // double precision ulp itself.  In this case, we inflate the reported error
1201     // by half an ulp to take this into account.  A more correct and permanent
1202     // fix would be to undertake refactoring the reference code to return
1203     // results in this format:
1204     //
1205     //    typedef struct DoubleReference
1206     //    { // true value = correctlyRoundedResult + ulps *
1207     //    ulp(correctlyRoundedResult)        (infinitely precise)
1208     //        double  correctlyRoundedResult;     // as best we can
1209     //        double  ulps;                       // plus a fractional amount to
1210     //        account for the difference
1211     //    }DoubleReference;                       //     between infinitely
1212     //    precise result and correctlyRoundedResult, in units of ulps.
1213     //
1214     // This would provide a useful higher-than-double precision format for
1215     // everyone that we can use, and would solve a few problems with
1216     // representing absolute errors below DBL_MIN and over DBL_MAX for systems
1217     // that use a head to tail double double for long double.
1218 
1219     int x;
1220     long double testVal = test;
1221 
1222     // First, handle special reference values
1223     if (isinf(reference))
1224     {
1225         if (reference == testVal) return 0.0f;
1226 
1227         return INFINITY;
1228     }
1229 
1230     if (isnan(reference))
1231     {
1232         if (isnan(testVal)) return 0.0f;
1233 
1234         return INFINITY;
1235     }
1236 
1237     if (0.0L != reference && 0.5L != frexpl(reference, &x))
1238     { // Non-zero and Non-power of two
1239 
1240         // allow correctly rounded results to pass through unmolested. (We might
1241         // add error to it below.) There is something of a performance
1242         // optimization here.
1243         if (testVal == reference) return 0.0f;
1244 
1245         // The unbiased exponent of the ulp unit place
1246         int ulp_exp =
1247             DBL_MANT_DIG - 1 - MAX(ilogbl(reference), DBL_MIN_EXP - 1);
1248 
1249         // Scale the exponent of the error
1250         float result = (float)scalbnl(testVal - reference, ulp_exp);
1251 
1252         // account for rounding error in reference result on systems that do not
1253         // have a higher precision floating point type (see above)
1254         if (sizeof(long double) == sizeof(double))
1255             result += copysignf(0.5f, result);
1256 
1257         return result;
1258     }
1259 
1260     // reference is a normal power of two or a zero
1261     // The unbiased exponent of the ulp unit place
1262     int ulp_exp =
1263         DBL_MANT_DIG - 1 - MAX(ilogbl(reference) - 1, DBL_MIN_EXP - 1);
1264 
1265     // allow correctly rounded results to pass through unmolested. (We might add
1266     // error to it below.) There is something of a performance optimization here
1267     // too.
1268     if (testVal == reference) return 0.0f;
1269 
1270     // Scale the exponent of the error
1271     float result = (float)scalbnl(testVal - reference, ulp_exp);
1272 
1273     // account for rounding error in reference result on systems that do not
1274     // have a higher precision floating point type (see above)
1275     if (sizeof(long double) == sizeof(double))
1276         result += copysignf(0.5f, result);
1277 
1278     return result;
1279 }
1280 
Abs_Error(float test,double reference)1281 float Abs_Error(float test, double reference)
1282 {
1283     if (isnan(test) && isnan(reference)) return 0.0f;
1284     return fabs((float)(reference - (double)test));
1285 }
1286 
RoundUpToNextPowerOfTwo(cl_uint x)1287 cl_uint RoundUpToNextPowerOfTwo(cl_uint x)
1288 {
1289     if (0 == (x & (x - 1))) return x;
1290 
1291     while (x & (x - 1)) x &= x - 1;
1292 
1293     return x + x;
1294 }
1295