1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include <tuple>
17 
18 #include "tensorflow/core/platform/byte_order.h"
19 #include "tensorflow/core/platform/cpu_info.h"
20 #include "tensorflow/core/platform/denormal.h"
21 #include "tensorflow/core/platform/logging.h"
22 #include "tensorflow/core/platform/platform.h"
23 // If we're on gcc 4.8 or older, there's a known bug that prevents the use of
24 // intrinsics when the architecture is not defined in the flags. See
25 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=57202
26 #if !defined(__SSE3__) && !defined(__clang__) && \
27     (defined(__GNUC__) && (__GNUC__ < 4) ||      \
28      ((__GNUC__ == 4) && (__GNUC_MINOR__ < 9)))
29 #define GCC_WITHOUT_INTRINSICS
30 #endif
31 // Only try to use SSE3 instructions if we're on an x86 platform, and it's not
32 // mobile, and we're not on a known bad gcc version.
33 #if defined(PLATFORM_IS_X86) && !defined(IS_MOBILE_PLATFORM) && \
34     !defined(GCC_WITHOUT_INTRINSICS)
35 #define DENORM_USE_INTRINSICS
36 #endif
37 
38 #ifdef DENORM_USE_INTRINSICS
39 #include <pmmintrin.h>
40 #endif
41 
42 namespace tensorflow {
43 namespace port {
44 
SetDenormalState(bool flush_zero_mode,bool denormals_zero_mode)45 static void SetDenormalState(bool flush_zero_mode, bool denormals_zero_mode) {
46   // For now, we flush denormals only on SSE 3.  Other architectures such as ARM
47   // can be added as needed.
48 
49 #ifdef DENORM_USE_INTRINSICS
50   if (TestCPUFeature(SSE3)) {
51     // Restore flags
52     _MM_SET_FLUSH_ZERO_MODE(flush_zero_mode ? _MM_FLUSH_ZERO_ON
53                                             : _MM_FLUSH_ZERO_OFF);
54     _MM_SET_DENORMALS_ZERO_MODE(denormals_zero_mode ? _MM_DENORMALS_ZERO_ON
55                                                     : _MM_DENORMALS_ZERO_OFF);
56   }
57 #endif
58 }
59 
GetDernormalState()60 static std::pair<bool, bool> GetDernormalState() {
61   // For now, we flush denormals only on SSE 3.  Other architectures such as ARM
62   // can be added as needed.
63 
64 #ifdef DENORM_USE_INTRINSICS
65   if (TestCPUFeature(SSE3)) {
66     // Save existing flags
67     bool flush_zero_mode = _MM_GET_FLUSH_ZERO_MODE() == _MM_FLUSH_ZERO_ON;
68     bool denormals_zero_mode =
69         _MM_GET_DENORMALS_ZERO_MODE() == _MM_DENORMALS_ZERO_ON;
70     return {flush_zero_mode, denormals_zero_mode};
71   }
72 #endif
73   return {false, false};
74 }
75 
ScopedRestoreFlushDenormalState()76 ScopedRestoreFlushDenormalState::ScopedRestoreFlushDenormalState() {
77   std::tie(flush_zero_mode_, denormals_zero_mode_) = GetDernormalState();
78 }
79 
~ScopedRestoreFlushDenormalState()80 ScopedRestoreFlushDenormalState::~ScopedRestoreFlushDenormalState() {
81   SetDenormalState(flush_zero_mode_, denormals_zero_mode_);
82 }
83 
ScopedFlushDenormal()84 ScopedFlushDenormal::ScopedFlushDenormal() {
85   SetDenormalState(/*flush_zero_mode=*/true, /*denormals_zero_mode=*/true);
86 }
87 
ScopedDontFlushDenormal()88 ScopedDontFlushDenormal::ScopedDontFlushDenormal() {
89   SetDenormalState(/*flush_zero_mode=*/false, /*denormals_zero_mode=*/false);
90 }
91 
92 }  // namespace port
93 }  // namespace tensorflow
94