1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef UTILITY_H
17 #define UTILITY_H
18 
19 #include "harness/compat.h"
20 #include "harness/rounding_mode.h"
21 #include "harness/fpcontrol.h"
22 #include "harness/testHarness.h"
23 #include "harness/ThreadPool.h"
24 #include "harness/conversions.h"
25 
26 #define BUFFER_SIZE (1024 * 1024 * 2)
27 #define EMBEDDED_REDUCTION_FACTOR (64)
28 
29 #if defined(__GNUC__)
30 #define UNUSED __attribute__((unused))
31 #else
32 #define UNUSED
33 #endif
34 
35 struct Func;
36 
37 extern int gWimpyReductionFactor;
38 
39 #define VECTOR_SIZE_COUNT 6
40 extern const char *sizeNames[VECTOR_SIZE_COUNT];
41 extern const int sizeValues[VECTOR_SIZE_COUNT];
42 
43 extern cl_device_id gDevice;
44 extern cl_context gContext;
45 extern cl_command_queue gQueue;
46 extern void *gIn;
47 extern void *gIn2;
48 extern void *gIn3;
49 extern void *gOut_Ref;
50 extern void *gOut_Ref2;
51 extern void *gOut[VECTOR_SIZE_COUNT];
52 extern void *gOut2[VECTOR_SIZE_COUNT];
53 extern cl_mem gInBuffer;
54 extern cl_mem gInBuffer2;
55 extern cl_mem gInBuffer3;
56 extern cl_mem gOutBuffer[VECTOR_SIZE_COUNT];
57 extern cl_mem gOutBuffer2[VECTOR_SIZE_COUNT];
58 extern int gSkipCorrectnessTesting;
59 extern int gForceFTZ;
60 extern int gFastRelaxedDerived;
61 extern int gWimpyMode;
62 extern int gIsInRTZMode;
63 extern int gInfNanSupport;
64 extern int gIsEmbedded;
65 extern int gVerboseBruteForce;
66 extern uint32_t gMaxVectorSizeIndex;
67 extern uint32_t gMinVectorSizeIndex;
68 extern cl_device_fp_config gFloatCapabilities;
69 
70 #define LOWER_IS_BETTER 0
71 #define HIGHER_IS_BETTER 1
72 
73 #include "harness/errorHelpers.h"
74 
75 #if defined(_MSC_VER)
76 // Deal with missing scalbn on windows
77 #define scalbnf(_a, _i) ldexpf(_a, _i)
78 #define scalbn(_a, _i) ldexp(_a, _i)
79 #define scalbnl(_a, _i) ldexpl(_a, _i)
80 #endif
81 
82 float Abs_Error(float test, double reference);
83 float Ulp_Error(float test, double reference);
84 float Bruteforce_Ulp_Error_Double(double test, long double reference);
85 
86 int MakeKernel(const char **c, cl_uint count, const char *name, cl_kernel *k,
87                cl_program *p, bool relaxedMode);
88 int MakeKernels(const char **c, cl_uint count, const char *name,
89                 cl_uint kernel_count, cl_kernel *k, cl_program *p,
90                 bool relaxedMode);
91 
92 // used to convert a bucket of bits into a search pattern through double
93 static inline double DoubleFromUInt32(uint32_t bits);
DoubleFromUInt32(uint32_t bits)94 static inline double DoubleFromUInt32(uint32_t bits)
95 {
96     union {
97         uint64_t u;
98         double d;
99     } u;
100 
101     // split 0x89abcdef to 0x89abc00000000def
102     u.u = bits & 0xfffU;
103     u.u |= (uint64_t)(bits & ~0xfffU) << 32;
104 
105     // sign extend the leading bit of def segment as sign bit so that the middle
106     // region consists of either all 1s or 0s
107     u.u -= (bits & 0x800U) << 1;
108 
109     // return result
110     return u.d;
111 }
112 
113 void _LogBuildError(cl_program p, int line, const char *file);
114 #define LogBuildError(program) _LogBuildError(program, __LINE__, __FILE__)
115 
116 // The spec is fairly clear that we may enforce a hard cutoff to prevent
117 // premature flushing to zero.
118 // However, to avoid conflict for 1.0, we are letting results at TYPE_MIN +
119 // ulp_limit to be flushed to zero.
IsFloatResultSubnormal(double x,float ulps)120 static inline int IsFloatResultSubnormal(double x, float ulps)
121 {
122     x = fabs(x) - MAKE_HEX_DOUBLE(0x1.0p-149, 0x1, -149) * (double)ulps;
123     return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
124 }
125 
IsFloatResultSubnormalAbsError(double x,float abs_err)126 static inline int IsFloatResultSubnormalAbsError(double x, float abs_err)
127 {
128     x = x - abs_err;
129     return x < MAKE_HEX_DOUBLE(0x1.0p-126, 0x1, -126);
130 }
131 
IsDoubleResultSubnormal(long double x,float ulps)132 static inline int IsDoubleResultSubnormal(long double x, float ulps)
133 {
134     x = fabsl(x) - MAKE_HEX_LONG(0x1.0p-1074, 0x1, -1074) * (long double)ulps;
135     return x < MAKE_HEX_LONG(0x1.0p-1022, 0x1, -1022);
136 }
137 
IsFloatInfinity(double x)138 static inline int IsFloatInfinity(double x)
139 {
140     union {
141         cl_float d;
142         cl_uint u;
143     } u;
144     u.d = (cl_float)x;
145     return ((u.u & 0x7fffffffU) == 0x7F800000U);
146 }
147 
IsFloatMaxFloat(double x)148 static inline int IsFloatMaxFloat(double x)
149 {
150     union {
151         cl_float d;
152         cl_uint u;
153     } u;
154     u.d = (cl_float)x;
155     return ((u.u & 0x7fffffffU) == 0x7F7FFFFFU);
156 }
157 
IsFloatNaN(double x)158 static inline int IsFloatNaN(double x)
159 {
160     union {
161         cl_float d;
162         cl_uint u;
163     } u;
164     u.d = (cl_float)x;
165     return ((u.u & 0x7fffffffU) > 0x7F800000U);
166 }
167 
168 extern cl_uint RoundUpToNextPowerOfTwo(cl_uint x);
169 
170 // Windows (since long double got deprecated) sets the x87 to 53-bit precision
171 // (that's x87 default state).  This causes problems with the tests that
172 // convert long and ulong to float and double or otherwise deal with values
173 // that need more precision than 53-bit. So, set the x87 to 64-bit precision.
Force64BitFPUPrecision(void)174 static inline void Force64BitFPUPrecision(void)
175 {
176 #if __MINGW32__
177     // The usual method is to use _controlfp as follows:
178     //     #include <float.h>
179     //     _controlfp(_PC_64, _MCW_PC);
180     //
181     // _controlfp is available on MinGW32 but not on MinGW64. Instead of having
182     // divergent code just use inline assembly which works for both.
183     unsigned short int orig_cw = 0;
184     unsigned short int new_cw = 0;
185     __asm__ __volatile__("fstcw %0" : "=m"(orig_cw));
186     new_cw = orig_cw | 0x0300; // set precision to 64-bit
187     __asm__ __volatile__("fldcw  %0" ::"m"(new_cw));
188 #elif defined(_WIN32) && defined(__INTEL_COMPILER)
189     // Unfortunately, usual method (`_controlfp( _PC_64, _MCW_PC );') does *not*
190     // work on win.x64: > On the x64 architecture, changing the floating point
191     // precision is not supported. (Taken from
192     // http://msdn.microsoft.com/en-us/library/e9b52ceh%28v=vs.100%29.aspx)
193     int cw;
194     __asm { fnstcw cw }
195     ; // Get current value of FPU control word.
196     cw = cw & 0xfffffcff
197         | (3 << 8); // Set Precision Control to Double Extended Precision.
198     __asm { fldcw cw }
199     ; // Set new value of FPU control word.
200 #else
201     /* Implement for other platforms if needed */
202 #endif
203 }
204 
205 extern void memset_pattern4(void *dest, const void *src_pattern, size_t bytes);
206 
207 typedef union {
208     int32_t i;
209     float f;
210 } int32f_t;
211 
212 typedef union {
213     int64_t l;
214     double d;
215 } int64d_t;
216 
217 void MulD(double *rhi, double *rlo, double u, double v);
218 void AddD(double *rhi, double *rlo, double a, double b);
219 void MulDD(double *rhi, double *rlo, double xh, double xl, double yh,
220            double yl);
221 void AddDD(double *rhi, double *rlo, double xh, double xl, double yh,
222            double yl);
223 void DivideDD(double *chi, double *clo, double a, double b);
224 int compareFloats(float x, float y);
225 int compareDoubles(double x, double y);
226 
227 void logFunctionInfo(const char *fname, unsigned int float_size,
228                      unsigned int isFastRelaxed);
229 
230 float getAllowedUlpError(const Func *f, const bool relaxed);
231 
getTestScale(size_t typeSize)232 static inline cl_uint getTestScale(size_t typeSize)
233 {
234     if (gWimpyMode)
235     {
236         return (cl_uint)typeSize * 2 * gWimpyReductionFactor;
237     }
238     else if (gIsEmbedded)
239     {
240         return EMBEDDED_REDUCTION_FACTOR;
241     }
242     else
243     {
244         return 1;
245     }
246 }
247 
getTestStep(size_t typeSize,size_t bufferSize)248 static inline uint64_t getTestStep(size_t typeSize, size_t bufferSize)
249 {
250     if (gWimpyMode)
251     {
252         return (1ULL << 32) * gWimpyReductionFactor / (512);
253     }
254     else if (gIsEmbedded)
255     {
256         return (BUFFER_SIZE / typeSize) * EMBEDDED_REDUCTION_FACTOR;
257     }
258     else
259     {
260         return bufferSize / typeSize;
261     }
262 }
263 
264 #endif /* UTILITY_H */
265