1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 
17 #include "utility.h"
18 #include "function_list.h"
19 
20 #if defined(__PPC__)
21 // Global varaiable used to hold the FPU control register state. The FPSCR
22 // register can not be used because not all Power implementations retain or
23 // observed the NI (non-IEEE mode) bit.
24 __thread fpu_control_t fpu_control = 0;
25 #endif
26 
MulD(double * rhi,double * rlo,double u,double v)27 void MulD(double *rhi, double *rlo, double u, double v)
28 {
29     const double c = 134217729.0; // 1+2^27
30     double up, u1, u2, vp, v1, v2;
31 
32     up = u * c;
33     u1 = (u - up) + up;
34     u2 = u - u1;
35 
36     vp = v * c;
37     v1 = (v - vp) + vp;
38     v2 = v - v1;
39 
40     double rh = u * v;
41     double rl = (((u1 * v1 - rh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
42 
43     *rhi = rh;
44     *rlo = rl;
45 }
46 
AddD(double * rhi,double * rlo,double a,double b)47 void AddD(double *rhi, double *rlo, double a, double b)
48 {
49     double zhi, zlo;
50     zhi = a + b;
51     if (fabs(a) > fabs(b))
52     {
53         zlo = zhi - a;
54         zlo = b - zlo;
55     }
56     else
57     {
58         zlo = zhi - b;
59         zlo = a - zlo;
60     }
61 
62     *rhi = zhi;
63     *rlo = zlo;
64 }
65 
MulDD(double * rhi,double * rlo,double xh,double xl,double yh,double yl)66 void MulDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
67 {
68     double mh, ml;
69     double c = 134217729.0;
70     double up, u1, u2, vp, v1, v2;
71 
72     up = xh * c;
73     u1 = (xh - up) + up;
74     u2 = xh - u1;
75 
76     vp = yh * c;
77     v1 = (yh - vp) + vp;
78     v2 = yh - v1;
79 
80     mh = xh * yh;
81     ml = (((u1 * v1 - mh) + (u1 * v2)) + (u2 * v1)) + (u2 * v2);
82     ml += xh * yl + xl * yh;
83 
84     *rhi = mh + ml;
85     *rlo = (mh - (*rhi)) + ml;
86 }
87 
AddDD(double * rhi,double * rlo,double xh,double xl,double yh,double yl)88 void AddDD(double *rhi, double *rlo, double xh, double xl, double yh, double yl)
89 {
90     double r, s;
91     r = xh + yh;
92     s = (fabs(xh) > fabs(yh)) ? (xh - r + yh + yl + xl)
93                               : (yh - r + xh + xl + yl);
94     *rhi = r + s;
95     *rlo = (r - (*rhi)) + s;
96 }
97 
DivideDD(double * chi,double * clo,double a,double b)98 void DivideDD(double *chi, double *clo, double a, double b)
99 {
100     *chi = a / b;
101     double rhi, rlo;
102     MulD(&rhi, &rlo, *chi, b);
103     AddDD(&rhi, &rlo, -rhi, -rlo, a, 0.0);
104     *clo = rhi / b;
105 }
106 
107 // These functions comapre two floats/doubles. Since some platforms may choose
108 // to flush denormals to zeros before comparison, comparison like a < b may give
109 // wrong result in "certain cases" where we do need correct compasion result
110 // when operands are denormals .... these functions comapre floats/doubles using
111 // signed integer/long int rep. In other cases, when flushing to zeros is fine,
112 // these should not be used. Also these doesn't check for nans and assume nans
113 // are handled separately as special edge case by the caller which calls these
114 // functions return 0 if both are equal, 1 if x > y and -1 if x < y.
115 
compareFloats(float x,float y)116 inline int compareFloats(float x, float y)
117 {
118     int32f_t a, b;
119 
120     a.f = x;
121     b.f = y;
122 
123     if (a.i & 0x80000000) a.i = 0x80000000 - a.i;
124     if (b.i & 0x80000000) b.i = 0x80000000 - b.i;
125 
126     if (a.i == b.i) return 0;
127 
128     return a.i < b.i ? -1 : 1;
129 }
130 
compareDoubles(double x,double y)131 inline int compareDoubles(double x, double y)
132 {
133     int64d_t a, b;
134 
135     a.d = x;
136     b.d = y;
137 
138     if (a.l & 0x8000000000000000LL) a.l = 0x8000000000000000LL - a.l;
139     if (b.l & 0x8000000000000000LL) b.l = 0x8000000000000000LL - b.l;
140 
141     if (a.l == b.l) return 0;
142 
143     return a.l < b.l ? -1 : 1;
144 }
145 
logFunctionInfo(const char * fname,unsigned int float_size,unsigned int isFastRelaxed)146 void logFunctionInfo(const char *fname, unsigned int float_size,
147                      unsigned int isFastRelaxed)
148 {
149     char const *fpSizeStr = NULL;
150     char const *fpFastRelaxedStr = "";
151     switch (float_size)
152     {
153         case sizeof(cl_double): fpSizeStr = "fp64"; break;
154         case sizeof(cl_float): fpSizeStr = "fp32"; break;
155         case sizeof(cl_half): fpSizeStr = "fp16"; break;
156     }
157     if (isFastRelaxed)
158     {
159         fpFastRelaxedStr = "rlx";
160     }
161     vlog("%15s %4s %4s", fname, fpSizeStr, fpFastRelaxedStr);
162 }
163 
getAllowedUlpError(const Func * f,const bool relaxed)164 float getAllowedUlpError(const Func *f, const bool relaxed)
165 {
166     float ulp;
167 
168     if (relaxed)
169     {
170         if (gIsEmbedded)
171         {
172             ulp = f->relaxed_embedded_error;
173         }
174         else
175         {
176             ulp = f->relaxed_error;
177         }
178     }
179     else
180     {
181         if (gIsEmbedded)
182         {
183             ulp = f->float_embedded_ulps;
184         }
185         else
186         {
187             ulp = f->float_ulps;
188         }
189     }
190 
191     return ulp;
192 }
193