1 //
2 // Copyright (c) 2017 The Khronos Group Inc.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //    http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 #ifndef _fpcontrol_h
17 #define _fpcontrol_h
18 
19 // In order to get tests for correctly rounded operations (e.g. multiply) to
20 // work properly we need to be able to set the reference hardware to FTZ mode if
21 // the device hardware is running in that mode.  We have explored all other
22 // options short of writing correctly rounded operations in integer code, and
23 // have found this is the only way to correctly verify operation.
24 //
25 // Non-Apple implementations will need to provide their own implentation for
26 // these features.  If the reference hardware and device are both running in the
27 // same state (either FTZ or IEEE compliant modes) then these functions may be
28 // empty.  If the device is running in non-default rounding mode (e.g. round
29 // toward zero), then these functions should also set the reference device into
30 // that rounding mode.
31 #if defined(__APPLE__) || defined(_MSC_VER) || defined(__linux__)              \
32     || defined(__MINGW32__)
33 typedef int FPU_mode_type;
34 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
35     || defined(__MINGW32__)
36 #include <xmmintrin.h>
37 #elif defined(__PPC__)
38 #include <fpu_control.h>
39 extern __thread fpu_control_t fpu_control;
40 #endif
41 // Set the reference hardware floating point unit to FTZ mode
ForceFTZ(FPU_mode_type * mode)42 static inline void ForceFTZ(FPU_mode_type *mode)
43 {
44 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
45     || defined(__MINGW32__)
46     *mode = _mm_getcsr();
47     _mm_setcsr(*mode | 0x8040);
48 #elif defined(__PPC__)
49     *mode = fpu_control;
50     fpu_control |= _FPU_MASK_NI;
51 #elif defined(__arm__)
52     unsigned fpscr;
53     __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
54     *mode = fpscr;
55     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr | (1U << 24)));
56     // Add 64 bit support
57 #elif defined(__aarch64__)
58     unsigned fpscr;
59     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
60     *mode = fpscr;
61     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr | (1U << 24)));
62 #else
63 #error ForceFTZ needs an implentation
64 #endif
65 }
66 
67 // Disable the denorm flush to zero
DisableFTZ(FPU_mode_type * mode)68 static inline void DisableFTZ(FPU_mode_type *mode)
69 {
70 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
71     || defined(__MINGW32__)
72     *mode = _mm_getcsr();
73     _mm_setcsr(*mode & ~0x8040);
74 #elif defined(__PPC__)
75     *mode = fpu_control;
76     fpu_control &= ~_FPU_MASK_NI;
77 #elif defined(__arm__)
78     unsigned fpscr;
79     __asm__ volatile("fmrx %0, fpscr" : "=r"(fpscr));
80     *mode = fpscr;
81     __asm__ volatile("fmxr fpscr, %0" ::"r"(fpscr & ~(1U << 24)));
82     // Add 64 bit support
83 #elif defined(__aarch64__)
84     unsigned fpscr;
85     __asm__ volatile("mrs %0, fpcr" : "=r"(fpscr));
86     *mode = fpscr;
87     __asm__ volatile("msr fpcr, %0" ::"r"(fpscr & ~(1U << 24)));
88 #else
89 #error DisableFTZ needs an implentation
90 #endif
91 }
92 
93 // Restore the reference hardware to floating point state indicated by *mode
RestoreFPState(FPU_mode_type * mode)94 static inline void RestoreFPState(FPU_mode_type *mode)
95 {
96 #if defined(__i386__) || defined(__x86_64__) || defined(_MSC_VER)              \
97     || defined(__MINGW32__)
98     _mm_setcsr(*mode);
99 #elif defined(__PPC__)
100     fpu_control = *mode;
101 #elif defined(__arm__)
102     __asm__ volatile("fmxr fpscr, %0" ::"r"(*mode));
103     // Add 64 bit support
104 #elif defined(__aarch64__)
105     __asm__ volatile("msr fpcr, %0" ::"r"(*mode));
106 #else
107 #error RestoreFPState needs an implementation
108 #endif
109 }
110 #else
111 #error ForceFTZ and RestoreFPState need implentations
112 #endif
113 
114 #endif
115