1 /* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/lite/kernels/cpu_backend_context.h"
17 
18 #include <memory>
19 
20 #ifdef TFLITE_HAVE_CPUINFO
21 #include "include/cpuinfo.h"
22 #endif
23 
24 #include "public/gemmlowp.h"
25 #include "ruy/context.h"  // from @ruy
26 #include "tensorflow/lite/c/common.h"
27 #include "tensorflow/lite/core/macros.h"
28 #include "tensorflow/lite/external_cpu_backend_context.h"
29 #include "tensorflow/lite/kernels/internal/compatibility.h"
30 #include "tensorflow/lite/kernels/op_macros.h"
31 
32 namespace {
33 const int kDefaultNumThreadpoolThreads = 1;
34 
35 }  // namespace
36 
37 namespace tflite {
38 
39 // Use weak symbols if possible to dispatch to deprecated paths.
40 #if TFLITE_HAS_ATTRIBUTE_WEAK && !defined(__APPLE__)
41 extern TFLITE_ATTRIBUTE_WEAK bool UseGemmlowpOnX86();
42 #endif  // defined(TFLITE_HAS_ATTRIBUTE_WEAK) && !(__APPLE__)
43 
44 // TODO(b/138922878) Enable when Ruy builds on Apple.
45 #if defined(TFLITE_HAVE_CPUINFO) && !defined(__APPLE__)
~CpuInfo()46 CpuBackendContext::CpuInfo::~CpuInfo() {
47   if (init_status_ == InitStatus::kInitialized) {
48     cpuinfo_deinitialize();
49   }
50 }
51 
EnsureInitialized()52 bool CpuBackendContext::CpuInfo::EnsureInitialized() {
53   if (init_status_ == InitStatus::kNotYetAttempted) {
54     init_status_ = Initialize();
55   }
56   return init_status_ == InitStatus::kInitialized;
57 }
58 
59 CpuBackendContext::CpuInfo::InitStatus
Initialize()60 CpuBackendContext::CpuInfo::Initialize() {
61   TFLITE_DCHECK_EQ(init_status_, InitStatus::kNotYetAttempted);
62   if (!cpuinfo_initialize()) {
63     return InitStatus::kFailed;
64   }
65   return InitStatus::kInitialized;
66 }
67 
Avx2Fma()68 bool CpuBackendContext::CpuInfo::Avx2Fma() {
69   return EnsureInitialized() && cpuinfo_has_x86_avx2() &&
70          cpuinfo_has_x86_fma3();
71 }
72 
Avx()73 bool CpuBackendContext::CpuInfo::Avx() {
74   return EnsureInitialized() && cpuinfo_has_x86_avx();
75 }
76 
Avx512()77 bool CpuBackendContext::CpuInfo::Avx512() {
78   return EnsureInitialized() && cpuinfo_has_x86_avx512f() &&
79          cpuinfo_has_x86_avx512dq() && cpuinfo_has_x86_avx512cd() &&
80          cpuinfo_has_x86_avx512bw() && cpuinfo_has_x86_avx512vl();
81 }
82 #else
83 
~CpuInfo()84 CpuBackendContext::CpuInfo::~CpuInfo() {}
85 
EnsureInitialized()86 bool CpuBackendContext::CpuInfo::EnsureInitialized() {
87   if (init_status_ == InitStatus::kNotYetAttempted) {
88     init_status_ = InitStatus::kInitialized;
89   }
90   TFLITE_DCHECK_EQ(init_status_, InitStatus::kInitialized);
91   return true;
92 }
93 
Avx2Fma()94 bool CpuBackendContext::CpuInfo::Avx2Fma() { return false; }
95 
Avx()96 bool CpuBackendContext::CpuInfo::Avx() { return false; }
97 
Avx512()98 bool CpuBackendContext::CpuInfo::Avx512() { return false; }
99 #endif  // TFLITE_HAVE_CPUINFO
100 
GetFromContext(TfLiteContext * context)101 CpuBackendContext* CpuBackendContext::GetFromContext(TfLiteContext* context) {
102   auto* external_context = static_cast<ExternalCpuBackendContext*>(
103       context->GetExternalContext(context, kTfLiteCpuBackendContext));
104 
105   if (external_context == nullptr) {
106     TF_LITE_FATAL(
107         "ExternalCpuBackendContext isn't properly initialized during TFLite "
108         "interpreter initialization.");
109   }
110 
111   auto* cpu_backend_context = static_cast<CpuBackendContext*>(
112       external_context->internal_backend_context());
113   if (cpu_backend_context == nullptr) {
114     // We do the lazy initialization here for the TfLiteInternalBackendContext
115     // that's wrapped inside ExternalCpuBackendContext.
116     cpu_backend_context = new CpuBackendContext();
117     cpu_backend_context->SetMaxNumThreads(context->recommended_num_threads);
118     external_context->set_internal_backend_context(
119         std::unique_ptr<TfLiteInternalBackendContext>(cpu_backend_context));
120   }
121 
122   return cpu_backend_context;
123 }
124 
CpuBackendContext()125 CpuBackendContext::CpuBackendContext()
126     : TfLiteInternalBackendContext(),
127       ruy_context_(new ruy::Context),
128       gemmlowp_context_(new gemmlowp::GemmContext) {
129   SetMaxNumThreads(kDefaultNumThreadpoolThreads);
130 // TODO(b/148289189) Remove when clients have transitioned to runtime flag.
131 #ifdef TFLITE_WITH_RUY_GEMV
132   SetUseCaching(true);
133 #else
134   SetUseCaching(false);
135 #endif
136 }
137 
~CpuBackendContext()138 CpuBackendContext::~CpuBackendContext() {}
139 
SetMaxNumThreads(int max_num_threads)140 void CpuBackendContext::SetMaxNumThreads(int max_num_threads) {
141   const int target_num_threads =
142       max_num_threads > -1 ? max_num_threads : kDefaultNumThreadpoolThreads;
143   max_num_threads_ = target_num_threads;
144   ruy_context_->set_max_num_threads(target_num_threads);
145   gemmlowp_context_->set_max_num_threads(target_num_threads);
146 }
147 
SetUseCaching(bool flag)148 void CpuBackendContext::SetUseCaching(bool flag) { use_caching_ = flag; }
149 
HasAvxOrAbove()150 bool CpuBackendContext::HasAvxOrAbove() {
151   return cpuinfo_.Avx() || cpuinfo_.Avx2Fma() || cpuinfo_.Avx512();
152 }
153 
PreferGemmlowpOnX86()154 bool CpuBackendContext::PreferGemmlowpOnX86() {
155   bool use_gemmlowp_on_x86 = false;
156 #if defined(TFLITE_X86_PLATFORM) && TFLITE_HAS_ATTRIBUTE_WEAK && \
157     !defined(__APPLE__)
158   if (::tflite::UseGemmlowpOnX86 != nullptr) {
159     use_gemmlowp_on_x86 = ::tflite::UseGemmlowpOnX86();
160   }
161 #endif  // TFLITE_X86_PLATFORM && TFLITE_HAS_ATTRIBUTE_WEAK && !(__APPLE__)
162   return use_gemmlowp_on_x86 || !HasAvxOrAbove();
163 }
164 
165 }  // namespace tflite
166