1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #include "compat.h"
17 
18 #if defined(_MSC_VER)
19 
20 #include <limits.h>
21 #include <stdlib.h>
22 
23 #include <CL/cl.h>
24 
25 #include <windows.h>
26 
27 #if _MSC_VER < 1900 && !defined(__INTEL_COMPILER)
28 
29 ///////////////////////////////////////////////////////////////////
30 //
31 //                   rint, rintf
32 //
33 ///////////////////////////////////////////////////////////////////
34 
copysignf(float x,float y)35 float copysignf(float x, float y)
36 {
37     union {
38         cl_uint u;
39         float f;
40     } ux, uy;
41 
42     ux.f = x;
43     uy.f = y;
44 
45     ux.u = (ux.u & 0x7fffffffU) | (uy.u & 0x80000000U);
46 
47     return ux.f;
48 }
49 
copysign(double x,double y)50 double copysign(double x, double y)
51 {
52     union {
53         cl_ulong u;
54         double f;
55     } ux, uy;
56 
57     ux.f = x;
58     uy.f = y;
59 
60     ux.u = (ux.u & 0x7fffffffffffffffULL) | (uy.u & 0x8000000000000000ULL);
61 
62     return ux.f;
63 }
64 
copysignl(long double x,long double y)65 long double copysignl(long double x, long double y)
66 {
67     union {
68         long double f;
69         struct
70         {
71             cl_ulong m;
72             cl_ushort sexp;
73         } u;
74     } ux, uy;
75 
76     ux.f = x;
77     uy.f = y;
78 
79     ux.u.sexp = (ux.u.sexp & 0x7fff) | (uy.u.sexp & 0x8000);
80 
81     return ux.f;
82 }
83 
rintf(float x)84 float rintf(float x)
85 {
86     float absx = fabsf(x);
87 
88     if (absx < 8388608.0f /* 0x1.0p23f */)
89     {
90         float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
91         float rounded = x + magic;
92         rounded -= magic;
93         x = copysignf(rounded, x);
94     }
95 
96     return x;
97 }
98 
rint(double x)99 double rint(double x)
100 {
101     double absx = fabs(x);
102 
103     if (absx < 4503599627370496.0 /* 0x1.0p52f */)
104     {
105         double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
106         double rounded = x + magic;
107         rounded -= magic;
108         x = copysign(rounded, x);
109     }
110 
111     return x;
112 }
113 
rintl(long double x)114 long double rintl(long double x)
115 {
116     double absx = fabs(x);
117 
118     if (absx < 9223372036854775808.0L /* 0x1.0p64f */)
119     {
120         long double magic =
121             copysignl(9223372036854775808.0L /* 0x1.0p63L */, x);
122         long double rounded = x + magic;
123         rounded -= magic;
124         x = copysignl(rounded, x);
125     }
126 
127     return x;
128 }
129 
130 #if _MSC_VER < 1800
131 
132 ///////////////////////////////////////////////////////////////////
133 //
134 //                   ilogb, ilogbf, ilogbl
135 //
136 ///////////////////////////////////////////////////////////////////
137 #ifndef FP_ILOGB0
138 #define FP_ILOGB0 INT_MIN
139 #endif
140 
141 #ifndef FP_ILOGBNAN
142 #define FP_ILOGBNAN INT_MIN
143 #endif
144 
ilogb(double x)145 int ilogb(double x)
146 {
147     union {
148         double f;
149         cl_ulong u;
150     } u;
151     u.f = x;
152 
153     cl_ulong absx = u.u & CL_LONG_MAX;
154     if (absx - 0x0001000000000000ULL
155         >= 0x7ff0000000000000ULL - 0x0001000000000000ULL)
156     {
157         switch (absx)
158         {
159             case 0: return FP_ILOGB0;
160             case 0x7ff0000000000000ULL: return INT_MAX;
161             default:
162                 if (absx > 0x7ff0000000000000ULL) return FP_ILOGBNAN;
163 
164                 // subnormal
165                 u.u = absx | 0x3ff0000000000000ULL;
166                 u.f -= 1.0;
167                 return (u.u >> 52) - (1023 + 1022);
168         }
169     }
170 
171     return (absx >> 52) - 1023;
172 }
173 
174 
ilogbf(float x)175 int ilogbf(float x)
176 {
177     union {
178         float f;
179         cl_uint u;
180     } u;
181     u.f = x;
182 
183     cl_uint absx = u.u & 0x7fffffff;
184     if (absx - 0x00800000U >= 0x7f800000U - 0x00800000U)
185     {
186         switch (absx)
187         {
188             case 0: return FP_ILOGB0;
189             case 0x7f800000U: return INT_MAX;
190             default:
191                 if (absx > 0x7f800000) return FP_ILOGBNAN;
192 
193                 // subnormal
194                 u.u = absx | 0x3f800000U;
195                 u.f -= 1.0f;
196                 return (u.u >> 23) - (127 + 126);
197         }
198     }
199 
200     return (absx >> 23) - 127;
201 }
202 
ilogbl(long double x)203 int ilogbl(long double x)
204 {
205     union {
206         long double f;
207         struct
208         {
209             cl_ulong m;
210             cl_ushort sexp;
211         } u;
212     } u;
213     u.f = x;
214 
215     int exp = u.u.sexp & 0x7fff;
216     if (0 == exp)
217     {
218         if (0 == u.u.m) return FP_ILOGB0;
219 
220         // subnormal
221         u.u.sexp = 0x3fff;
222         u.f -= 1.0f;
223         exp = u.u.sexp & 0x7fff;
224 
225         return exp - (0x3fff + 0x3ffe);
226     }
227     else if (0x7fff == exp)
228     {
229         if (u.u.m & CL_LONG_MAX) return FP_ILOGBNAN;
230 
231         return INT_MAX;
232     }
233 
234     return exp - 0x3fff;
235 }
236 
237 #endif // _MSC_VER < 1800
238 
239 ///////////////////////////////////////////////////////////////////
240 //
241 //                 fmax, fmin, fmaxf, fminf
242 //
243 ///////////////////////////////////////////////////////////////////
244 
GET_BITS_SP32(float fx,unsigned int * ux)245 static void GET_BITS_SP32(float fx, unsigned int* ux)
246 {
247     volatile union {
248         float f;
249         unsigned int u;
250     } _bitsy;
251     _bitsy.f = (fx);
252     *ux = _bitsy.u;
253 }
254 /* static void GET_BITS_SP32(float fx, unsigned int* ux) */
255 /* { */
256 /*     volatile union {float f; unsigned int i;} _bitsy; */
257 /*     _bitsy.f = (fx); */
258 /*     *ux = _bitsy.i; */
259 /* } */
PUT_BITS_SP32(unsigned int ux,float * fx)260 static void PUT_BITS_SP32(unsigned int ux, float* fx)
261 {
262     volatile union {
263         float f;
264         unsigned int u;
265     } _bitsy;
266     _bitsy.u = (ux);
267     *fx = _bitsy.f;
268 }
269 /* static void PUT_BITS_SP32(unsigned int ux, float* fx) */
270 /* { */
271 /*     volatile union {float f; unsigned int i;} _bitsy; */
272 /*     _bitsy.i = (ux); */
273 /*     *fx = _bitsy.f; */
274 /* } */
GET_BITS_DP64(double dx,unsigned __int64 * lx)275 static void GET_BITS_DP64(double dx, unsigned __int64* lx)
276 {
277     volatile union {
278         double d;
279         unsigned __int64 l;
280     } _bitsy;
281     _bitsy.d = (dx);
282     *lx = _bitsy.l;
283 }
PUT_BITS_DP64(unsigned __int64 lx,double * dx)284 static void PUT_BITS_DP64(unsigned __int64 lx, double* dx)
285 {
286     volatile union {
287         double d;
288         unsigned __int64 l;
289     } _bitsy;
290     _bitsy.l = (lx);
291     *dx = _bitsy.d;
292 }
293 
294 #if 0
295 int SIGNBIT_DP64(double x )
296 {
297     int hx;
298     _GET_HIGH_WORD(hx,x);
299     return((hx>>31));
300 }
301 #endif
302 
303 #if _MSC_VER < 1900
304 
305 /* fmax(x, y) returns the larger (more positive) of x and y.
306    NaNs are treated as missing values: if one argument is NaN,
307    the other argument is returned. If both arguments are NaN,
308    the first argument is returned. */
309 
310 /* This works so long as the compiler knows that (x != x) means
311    that x is NaN; gcc does. */
fmax(double x,double y)312 double fmax(double x, double y)
313 {
314     if (isnan(y)) return x;
315 
316     return x >= y ? x : y;
317 }
318 
319 
320 /* fmin(x, y) returns the smaller (more negative) of x and y.
321    NaNs are treated as missing values: if one argument is NaN,
322    the other argument is returned. If both arguments are NaN,
323    the first argument is returned. */
324 
fmin(double x,double y)325 double fmin(double x, double y)
326 {
327     if (isnan(y)) return x;
328 
329     return x <= y ? x : y;
330 }
331 
332 
fmaxf(float x,float y)333 float fmaxf(float x, float y)
334 {
335     if (isnan(y)) return x;
336 
337     return x >= y ? x : y;
338 }
339 
340 /* fminf(x, y) returns the smaller (more negative) of x and y.
341    NaNs are treated as missing values: if one argument is NaN,
342    the other argument is returned. If both arguments are NaN,
343    the first argument is returned. */
344 
fminf(float x,float y)345 float fminf(float x, float y)
346 {
347     if (isnan(y)) return x;
348 
349     return x <= y ? x : y;
350 }
351 
scalblnl(long double x,long n)352 long double scalblnl(long double x, long n)
353 {
354     union {
355         long double d;
356         struct
357         {
358             cl_ulong m;
359             cl_ushort sexp;
360         } u;
361     } u;
362     u.u.m = CL_LONG_MIN;
363 
364     if (x == 0.0L || n < -2200) return copysignl(0.0L, x);
365 
366     if (n > 2200) return INFINITY;
367 
368     if (n < 0)
369     {
370         u.u.sexp = 0x3fff - 1022;
371         while (n <= -1022)
372         {
373             x *= u.d;
374             n += 1022;
375         }
376         u.u.sexp = 0x3fff + n;
377         x *= u.d;
378         return x;
379     }
380 
381     if (n > 0)
382     {
383         u.u.sexp = 0x3fff + 1023;
384         while (n >= 1023)
385         {
386             x *= u.d;
387             n -= 1023;
388         }
389         u.u.sexp = 0x3fff + n;
390         x *= u.d;
391         return x;
392     }
393 
394     return x;
395 }
396 
397 ///////////////////////////////////////////////////////////////////
398 //
399 //                          log2
400 //
401 ///////////////////////////////////////////////////////////////////
402 const static cl_double log_e_base2 = 1.4426950408889634074;
403 const static cl_double log_10_base2 = 3.3219280948873623478;
404 
405 // double log10(double x);
406 
log2(double x)407 double log2(double x) { return 1.44269504088896340735992468100189214 * log(x); }
408 
log2l(long double x)409 long double log2l(long double x)
410 {
411     return 1.44269504088896340735992468100189214L * log(x);
412 }
413 
trunc(double x)414 double trunc(double x)
415 {
416     double absx = fabs(x);
417 
418     if (absx < 4503599627370496.0 /* 0x1.0p52f */)
419     {
420         cl_long rounded = x;
421         x = copysign((double)rounded, x);
422     }
423 
424     return x;
425 }
426 
truncf(float x)427 float truncf(float x)
428 {
429     float absx = fabsf(x);
430 
431     if (absx < 8388608.0f /* 0x1.0p23f */)
432     {
433         cl_int rounded = x;
434         x = copysignf((float)rounded, x);
435     }
436 
437     return x;
438 }
439 
lround(double x)440 long lround(double x)
441 {
442     double absx = fabs(x);
443 
444     if (absx < 0.5) return 0;
445 
446     if (absx < 4503599627370496.0 /* 0x1.0p52 */)
447     {
448         absx += 0.5;
449         cl_long rounded = absx;
450         absx = rounded;
451         x = copysign(absx, x);
452     }
453 
454     if (x >= (double)LONG_MAX) return LONG_MAX;
455 
456     return (long)x;
457 }
458 
lroundf(float x)459 long lroundf(float x)
460 {
461     float absx = fabsf(x);
462 
463     if (absx < 0.5f) return 0;
464 
465     if (absx < 8388608.0f)
466     {
467         absx += 0.5f;
468         cl_int rounded = absx;
469         absx = rounded;
470         x = copysignf(absx, x);
471     }
472 
473     if (x >= (float)LONG_MAX) return LONG_MAX;
474 
475     return (long)x;
476 }
477 
round(double x)478 double round(double x)
479 {
480     double absx = fabs(x);
481 
482     if (absx < 0.5) return copysign(0.0, x);
483 
484     if (absx < 4503599627370496.0 /* 0x1.0p52 */)
485     {
486         absx += 0.5;
487         cl_long rounded = absx;
488         absx = rounded;
489         x = copysign(absx, x);
490     }
491 
492     return x;
493 }
494 
roundf(float x)495 float roundf(float x)
496 {
497     float absx = fabsf(x);
498 
499     if (absx < 0.5f) return copysignf(0.0f, x);
500 
501     if (absx < 8388608.0f)
502     {
503         absx += 0.5f;
504         cl_int rounded = absx;
505         absx = rounded;
506         x = copysignf(absx, x);
507     }
508 
509     return x;
510 }
511 
roundl(long double x)512 long double roundl(long double x)
513 {
514     long double absx = fabsl(x);
515 
516     if (absx < 0.5L) return copysignl(0.0L, x);
517 
518     if (absx < 9223372036854775808.0L /*0x1.0p63L*/)
519     {
520         absx += 0.5L;
521         cl_ulong rounded = absx;
522         absx = rounded;
523         x = copysignl(absx, x);
524     }
525 
526     return x;
527 }
528 
cbrtf(float x)529 float cbrtf(float x)
530 {
531     float z = pow(fabs((double)x), 1.0 / 3.0);
532     return copysignf(z, x);
533 }
534 
cbrt(double x)535 double cbrt(double x) { return copysign(pow(fabs(x), 1.0 / 3.0), x); }
536 
lrint(double x)537 long int lrint(double x)
538 {
539     double absx = fabs(x);
540 
541     if (x >= (double)LONG_MAX) return LONG_MAX;
542 
543     if (absx < 4503599627370496.0 /* 0x1.0p52 */)
544     {
545         double magic = copysign(4503599627370496.0 /* 0x1.0p52 */, x);
546         double rounded = x + magic;
547         rounded -= magic;
548         return (long int)rounded;
549     }
550 
551     return (long int)x;
552 }
553 
lrintf(float x)554 long int lrintf(float x)
555 {
556     float absx = fabsf(x);
557 
558     if (x >= (float)LONG_MAX) return LONG_MAX;
559 
560     if (absx < 8388608.0f /* 0x1.0p23f */)
561     {
562         float magic = copysignf(8388608.0f /* 0x1.0p23f */, x);
563         float rounded = x + magic;
564         rounded -= magic;
565         return (long int)rounded;
566     }
567 
568     return (long int)x;
569 }
570 
571 #endif // _MSC_VER < 1900
572 
573 ///////////////////////////////////////////////////////////////////
574 //
575 //                  fenv functions
576 //
577 ///////////////////////////////////////////////////////////////////
578 
579 #if _MSC_VER < 1800
fetestexcept(int excepts)580 int fetestexcept(int excepts)
581 {
582     unsigned int status = _statusfp();
583     return excepts
584         & (((status & _SW_INEXACT) ? FE_INEXACT : 0)
585            | ((status & _SW_UNDERFLOW) ? FE_UNDERFLOW : 0)
586            | ((status & _SW_OVERFLOW) ? FE_OVERFLOW : 0)
587            | ((status & _SW_ZERODIVIDE) ? FE_DIVBYZERO : 0)
588            | ((status & _SW_INVALID) ? FE_INVALID : 0));
589 }
590 
feclearexcept(int excepts)591 int feclearexcept(int excepts)
592 {
593     _clearfp();
594     return 0;
595 }
596 #endif
597 
598 #endif // __INTEL_COMPILER
599 
600 #if _MSC_VER < 1900 && (!defined(__INTEL_COMPILER) || __INTEL_COMPILER < 1300)
601 
nanf(const char * str)602 float nanf(const char* str)
603 {
604     cl_uint u = atoi(str);
605     u |= 0x7fc00000U;
606     return *(float*)(&u);
607 }
608 
609 
nan(const char * str)610 double nan(const char* str)
611 {
612     cl_ulong u = atoi(str);
613     u |= 0x7ff8000000000000ULL;
614     return *(double*)(&u);
615 }
616 
617 // double check this implementatation
nanl(const char * str)618 long double nanl(const char* str)
619 {
620     union {
621         long double f;
622         struct
623         {
624             cl_ulong m;
625             cl_ushort sexp;
626         } u;
627     } u;
628     u.u.sexp = 0x7fff;
629     u.u.m = 0x8000000000000000ULL | atoi(str);
630 
631     return u.f;
632 }
633 
634 #endif
635 
636 ///////////////////////////////////////////////////////////////////
637 //
638 //                  misc functions
639 //
640 ///////////////////////////////////////////////////////////////////
641 
642 /*
643 // This function is commented out because the Windows implementation should
644 never call munmap.
645 // If it is calling it, we have a bug. Please file a bugzilla.
646 int munmap(void *addr, size_t len)
647 {
648 // FIXME: this is not correct.  munmap is like free()
649 // http://www.opengroup.org/onlinepubs/7990989775/xsh/munmap.html
650 
651     return (int)VirtualAlloc( (LPVOID)addr, len,
652                   MEM_COMMIT|MEM_RESERVE, PAGE_NOACCESS );
653 }
654 */
655 
ReadTime(void)656 uint64_t ReadTime(void)
657 {
658     LARGE_INTEGER current;
659     QueryPerformanceCounter(&current);
660     return (uint64_t)current.QuadPart;
661 }
662 
SubtractTime(uint64_t endTime,uint64_t startTime)663 double SubtractTime(uint64_t endTime, uint64_t startTime)
664 {
665     static double PerformanceFrequency = 0.0;
666 
667     if (PerformanceFrequency == 0.0)
668     {
669         LARGE_INTEGER frequency;
670         QueryPerformanceFrequency(&frequency);
671         PerformanceFrequency = (double)frequency.QuadPart;
672     }
673 
674     return (double)(endTime - startTime) / PerformanceFrequency * 1e9;
675 }
676 
cf_signbit(double x)677 int cf_signbit(double x)
678 {
679     union {
680         double f;
681         cl_ulong u;
682     } u;
683     u.f = x;
684     return u.u >> 63;
685 }
686 
cf_signbitf(float x)687 int cf_signbitf(float x)
688 {
689     union {
690         float f;
691         cl_uint u;
692     } u;
693     u.f = x;
694     return u.u >> 31;
695 }
696 
int2float(int32_t ix)697 float int2float(int32_t ix)
698 {
699     union {
700         float f;
701         int32_t i;
702     } u;
703     u.i = ix;
704     return u.f;
705 }
706 
float2int(float fx)707 int32_t float2int(float fx)
708 {
709     union {
710         float f;
711         int32_t i;
712     } u;
713     u.f = fx;
714     return u.i;
715 }
716 
717 #if !defined(_WIN64)
718 /** Returns the number of leading 0-bits in x,
719     starting at the most significant bit position.
720     If x is 0, the result is undefined.
721 */
__builtin_clz(unsigned int pattern)722 int __builtin_clz(unsigned int pattern)
723 {
724 #if 0
725     int res;
726     __asm {
727         mov eax, pattern
728         bsr eax, eax
729         mov res, eax
730     }
731     return 31 - res;
732 #endif
733     unsigned long index;
734     unsigned char res = _BitScanReverse(&index, pattern);
735     if (res)
736     {
737         return 8 * sizeof(int) - 1 - index;
738     }
739     else
740     {
741         return 8 * sizeof(int);
742     }
743 }
744 #else
__builtin_clz(unsigned int pattern)745 int __builtin_clz(unsigned int pattern)
746 {
747     int count;
748     if (pattern == 0u)
749     {
750         return 32;
751     }
752     count = 31;
753     if (pattern >= 1u << 16)
754     {
755         pattern >>= 16;
756         count -= 16;
757     }
758     if (pattern >= 1u << 8)
759     {
760         pattern >>= 8;
761         count -= 8;
762     }
763     if (pattern >= 1u << 4)
764     {
765         pattern >>= 4;
766         count -= 4;
767     }
768     if (pattern >= 1u << 2)
769     {
770         pattern >>= 2;
771         count -= 2;
772     }
773     if (pattern >= 1u << 1)
774     {
775         count -= 1;
776     }
777     return count;
778 }
779 
780 #endif // !defined(_WIN64)
781 
782 #include <intrin.h>
783 #include <emmintrin.h>
784 
usleep(int usec)785 int usleep(int usec)
786 {
787     Sleep((usec + 999) / 1000);
788     return 0;
789 }
790 
sleep(unsigned int sec)791 unsigned int sleep(unsigned int sec)
792 {
793     Sleep(sec * 1000);
794     return 0;
795 }
796 
797 #endif // defined( _MSC_VER )
798