1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 
3 Licensed under the Apache License, Version 2.0 (the "License");
4 you may not use this file except in compliance with the License.
5 You may obtain a copy of the License at
6 
7     http://www.apache.org/licenses/LICENSE-2.0
8 
9 Unless required by applicable law or agreed to in writing, software
10 distributed under the License is distributed on an "AS IS" BASIS,
11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 See the License for the specific language governing permissions and
13 limitations under the License.
14 ==============================================================================*/
15 
16 #include "tensorflow/compiler/xla/service/cpu/simple_orc_jit.h"
17 
18 #include <stdint.h>
19 
20 #include <algorithm>
21 #include <cstdio>
22 #include <list>
23 #include <utility>
24 
25 #include "absl/memory/memory.h"
26 #include "llvm/ExecutionEngine/ExecutionEngine.h"
27 #include "llvm/ExecutionEngine/JITSymbol.h"
28 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
29 #include "llvm/IR/Mangler.h"
30 #include "llvm/IR/Operator.h"
31 #include "llvm/Support/CodeGen.h"
32 #include "llvm/Support/Host.h"
33 #include "tensorflow/compiler/xla/service/cpu/cpu_runtime.h"
34 #include "tensorflow/compiler/xla/service/cpu/orc_jit_memory_mapper.h"
35 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d.h"
36 #include "tensorflow/compiler/xla/service/cpu/runtime_conv2d_mkl.h"
37 #include "tensorflow/compiler/xla/service/cpu/runtime_fft.h"
38 #include "tensorflow/compiler/xla/service/cpu/runtime_fork_join.h"
39 #include "tensorflow/compiler/xla/service/cpu/runtime_fp16.h"
40 #include "tensorflow/compiler/xla/service/cpu/runtime_key_value_sort.h"
41 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul.h"
42 #include "tensorflow/compiler/xla/service/cpu/runtime_matmul_mkl.h"
43 #include "tensorflow/compiler/xla/service/cpu/runtime_pow.h"
44 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_conv2d.h"
45 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_fft.h"
46 #include "tensorflow/compiler/xla/service/cpu/runtime_single_threaded_matmul.h"
47 #include "tensorflow/compiler/xla/service/cpu/runtime_topk.h"
48 #include "tensorflow/compiler/xla/service/cpu/windows_compatibility.h"
49 #include "tensorflow/compiler/xla/service/custom_call_target_registry.h"
50 #include "tensorflow/compiler/xla/types.h"
51 #include "tensorflow/core/platform/logging.h"
52 
53 namespace xla {
54 namespace cpu {
55 namespace {
56 
DetectMachineAttributes()57 llvm::SmallVector<std::string, 0> DetectMachineAttributes() {
58   llvm::SmallVector<std::string, 0> result;
59   llvm::StringMap<bool> host_features;
60   if (llvm::sys::getHostCPUFeatures(host_features)) {
61     for (auto& feature : host_features) {
62       result.push_back((feature.second ? '+' : '-') +
63                        std::string(feature.first()));
64     }
65   }
66   return result;
67 }
68 
69 }  // namespace
70 
71 /*static*/ std::unique_ptr<llvm::TargetMachine>
InferTargetMachineForJIT(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level)72 SimpleOrcJIT::InferTargetMachineForJIT(
73     const llvm::TargetOptions& target_options,
74     llvm::CodeGenOpt::Level opt_level) {
75   std::unique_ptr<llvm::TargetMachine> target_machine(
76       llvm::EngineBuilder()
77           .setTargetOptions(target_options)
78           .setOptLevel(opt_level)
79           .selectTarget(
80               /*TargetTriple=*/llvm::Triple(), /*MArch=*/"",
81               /*MCPU=*/llvm::sys::getHostCPUName(),
82               /*MAttrs=*/DetectMachineAttributes()));
83   CHECK(target_machine != nullptr);
84   return target_machine;
85 }
86 
SimpleOrcJIT(std::unique_ptr<llvm::orc::TargetProcessControl> target_process_control,std::unique_ptr<llvm::orc::ExecutionSession> execution_session,const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)87 SimpleOrcJIT::SimpleOrcJIT(
88     std::unique_ptr<llvm::orc::TargetProcessControl> target_process_control,
89     std::unique_ptr<llvm::orc::ExecutionSession> execution_session,
90     const llvm::TargetOptions& target_options,
91     llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
92     bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
93     LLVMCompiler::ModuleHook pre_optimization_hook,
94     LLVMCompiler::ModuleHook post_optimization_hook,
95     std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook)
96     : target_machine_(InferTargetMachineForJIT(target_options, opt_level)),
97       data_layout_(target_machine_->createDataLayout()),
98       target_process_control_(std::move(target_process_control)),
99       execution_session_(std::move(execution_session)),
100       object_layer_(*execution_session_,
101                     []() {
102                       return std::make_unique<llvm::SectionMemoryManager>(
103                           orc_jit_memory_mapper::GetInstance());
104                     }),
105       compile_layer_(
106           *execution_session_, object_layer_,
107           std::make_unique<CompilerFunctor>(
108               target_machine_.get(), opt_level, optimize_for_size,
109               disable_expensive_passes, fast_math_flags,
110               std::move(pre_optimization_hook),
111               std::move(post_optimization_hook), std::move(post_codegen_hook))),
112       main_jit_dylib_(&execution_session_->createBareJITDylib("<main>")),
113       gdb_jit_event_listener_(
114           llvm::JITEventListener::createGDBRegistrationListener()) {
115   VLOG(1) << "CPU target: " << target_machine_->getTargetCPU().str()
116           << " features: " << target_machine_->getTargetFeatureString().str();
117 
118   // Materialize unknown symbols from the runtime symbol table.
119   class RuntimeSymbolGenerator : public llvm::orc::DefinitionGenerator {
120     SimpleOrcJIT& jit_;
121 
122    public:
RuntimeSymbolGenerator(SimpleOrcJIT & jit)123     explicit RuntimeSymbolGenerator(SimpleOrcJIT& jit) : jit_(jit) {}
tryToGenerate(llvm::orc::LookupState &,llvm::orc::LookupKind,llvm::orc::JITDylib & jit_dylib,llvm::orc::JITDylibLookupFlags,const llvm::orc::SymbolLookupSet & names)124     llvm::Error tryToGenerate(
125         llvm::orc::LookupState&, llvm::orc::LookupKind,
126         llvm::orc::JITDylib& jit_dylib, llvm::orc::JITDylibLookupFlags,
127         const llvm::orc::SymbolLookupSet& names) override {
128       llvm::orc::SymbolMap new_defs;
129 
130       for (const auto& kv : names) {
131         const auto& name = kv.first;
132         if (llvm::JITEvaluatedSymbol symbol =
133                 jit_.ResolveRuntimeSymbol(*name)) {
134           new_defs[name] = symbol;
135         }
136       }
137 
138       cantFail(jit_dylib.define(absoluteSymbols(std::move(new_defs))));
139       return llvm::Error::success();
140     }
141   };
142   main_jit_dylib_->addGenerator(
143       std::make_unique<RuntimeSymbolGenerator>(*this));
144   object_layer_.registerJITEventListener(*this);
145 
146   // Copied from LLJIT, required to find symbols on Windows.
147   if (target_machine_->getTargetTriple().isOSBinFormatCOFF()) {
148     object_layer_.setOverrideObjectFlagsWithResponsibilityFlags(true);
149     object_layer_.setAutoClaimResponsibilityForObjectSymbols(true);
150   }
151 }
152 
~SimpleOrcJIT()153 SimpleOrcJIT::~SimpleOrcJIT() {
154   if (auto err = execution_session_->endSession()) {
155     execution_session_->reportError(std::move(err));
156   }
157 }
158 
Create(const llvm::TargetOptions & target_options,llvm::CodeGenOpt::Level opt_level,bool optimize_for_size,bool disable_expensive_passes,llvm::FastMathFlags fast_math_flags,LLVMCompiler::ModuleHook pre_optimization_hook,LLVMCompiler::ModuleHook post_optimization_hook,std::function<void (const llvm::object::ObjectFile &)> post_codegen_hook)159 llvm::Expected<std::unique_ptr<SimpleOrcJIT>> SimpleOrcJIT::Create(
160     const llvm::TargetOptions& target_options,
161     llvm::CodeGenOpt::Level opt_level, bool optimize_for_size,
162     bool disable_expensive_passes, llvm::FastMathFlags fast_math_flags,
163     LLVMCompiler::ModuleHook pre_optimization_hook,
164     LLVMCompiler::ModuleHook post_optimization_hook,
165     std::function<void(const llvm::object::ObjectFile&)> post_codegen_hook) {
166   auto SSP = std::make_shared<llvm::orc::SymbolStringPool>();
167   auto target_process_control =
168       llvm::orc::SelfTargetProcessControl::Create(std::move(SSP));
169   if (!target_process_control) {
170     return target_process_control.takeError();
171   }
172 
173   auto execution_session = std::make_unique<llvm::orc::ExecutionSession>();
174   return std::make_unique<SimpleOrcJIT>(
175       std::move(*target_process_control), std::move(execution_session),
176       target_options, opt_level, optimize_for_size, disable_expensive_passes,
177       fast_math_flags, std::move(pre_optimization_hook),
178       std::move(post_optimization_hook), std::move(post_codegen_hook));
179 }
180 
ResolveRuntimeSymbol(llvm::StringRef name)181 llvm::JITEvaluatedSymbol SimpleOrcJIT::ResolveRuntimeSymbol(
182     llvm::StringRef name) {
183   void* func_addr = nullptr;
184   if (name.size() > 1 && name.front() == data_layout_.getGlobalPrefix()) {
185     // On Mac OS X, 'name' may have a leading underscore prefix, even though the
186     // registered name may not.
187     std::string stripped_name(name.begin() + 1, name.end());
188     func_addr =
189         xla::CustomCallTargetRegistry::Global()->Lookup(stripped_name, "Host");
190   } else {
191     func_addr =
192         xla::CustomCallTargetRegistry::Global()->Lookup(name.str(), "Host");
193   }
194 
195   if (func_addr == nullptr) {
196     LOG(ERROR)
197         << "Unable to resolve runtime symbol: `" << name.str()
198         << "'.  Hint: if the symbol a custom call target, make sure you've "
199            "registered it with the JIT using "
200            "XLA_CPU_REGISTER_CUSTOM_CALL_TARGET.";
201     return nullptr;
202   }
203   llvm::JITEvaluatedSymbol symbol_info(reinterpret_cast<uint64_t>(func_addr),
204                                        llvm::JITSymbolFlags::None);
205   return symbol_info;
206 }
207 
notifyObjectLoaded(llvm::JITEventListener::ObjectKey key,const llvm::object::ObjectFile & object,const llvm::RuntimeDyld::LoadedObjectInfo & object_info)208 void SimpleOrcJIT::notifyObjectLoaded(
209     llvm::JITEventListener::ObjectKey key,
210     const llvm::object::ObjectFile& object,
211     const llvm::RuntimeDyld::LoadedObjectInfo& object_info) {
212   gdb_jit_event_listener_->notifyObjectLoaded(key, object, object_info);
213   size_of_generated_code_in_bytes_ += object.getData().size();
214 }
215 
notifyFreeingObject(llvm::JITEventListener::ObjectKey key)216 void SimpleOrcJIT::notifyFreeingObject(llvm::JITEventListener::ObjectKey key) {
217   gdb_jit_event_listener_->notifyFreeingObject(key);
218 }
219 
AddModule(llvm::orc::ThreadSafeModule module)220 llvm::Error SimpleOrcJIT::AddModule(llvm::orc::ThreadSafeModule module) {
221   return compile_layer_.add(*main_jit_dylib_, std::move(module));
222 }
223 
FindCompiledSymbol(const std::string & name)224 llvm::Expected<llvm::JITEvaluatedSymbol> SimpleOrcJIT::FindCompiledSymbol(
225     const std::string& name) {
226   return execution_session_->lookup({main_jit_dylib_}, name);
227 }
228 
229 #if defined(PLATFORM_WINDOWS)
230 // This function is used by compiler-generated code on windows, but it's not
231 // declared anywhere. The signature does not matter, we just need the address.
232 extern "C" void __chkstk(size_t);
233 #endif
234 
235 namespace {
236 // Register some known symbols with the CustomCallTargetRegistry.
RegisterKnownJITSymbols()237 bool RegisterKnownJITSymbols() {
238   xla::CustomCallTargetRegistry* registry =
239       xla::CustomCallTargetRegistry::Global();
240   registry->Register("printf", reinterpret_cast<void*>(&printf), "Host");
241 
242 #define REGISTER_CPU_RUNTIME_SYMBOL(base_name)                               \
243   do {                                                                       \
244     auto* function_address =                                                 \
245         reinterpret_cast<void*>(__xla_cpu_runtime_##base_name);              \
246     registry->Register(xla::cpu::runtime::k##base_name##SymbolName,          \
247                        function_address, "Host");                            \
248     CHECK_EQ(absl::string_view(xla::cpu::runtime::k##base_name##SymbolName), \
249              "__xla_cpu_runtime_" #base_name);                               \
250   } while (false)
251 
252   REGISTER_CPU_RUNTIME_SYMBOL(AcquireInfeedBufferForDequeue);
253   REGISTER_CPU_RUNTIME_SYMBOL(AcquireOutfeedBufferForPopulation);
254   REGISTER_CPU_RUNTIME_SYMBOL(AllReduce);
255   REGISTER_CPU_RUNTIME_SYMBOL(CollectivePermute);
256   REGISTER_CPU_RUNTIME_SYMBOL(AllToAll);
257   REGISTER_CPU_RUNTIME_SYMBOL(ReplicaId);
258   REGISTER_CPU_RUNTIME_SYMBOL(MKLConvF32);
259   REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF16);
260   REGISTER_CPU_RUNTIME_SYMBOL(EigenConvF32);
261   REGISTER_CPU_RUNTIME_SYMBOL(EigenFft);
262   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF16);
263   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF32);
264   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulF64);
265   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC64);
266   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulC128);
267   REGISTER_CPU_RUNTIME_SYMBOL(EigenMatMulS32);
268   REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF32);
269   REGISTER_CPU_RUNTIME_SYMBOL(MKLMatMulF64);
270   REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF32);
271   REGISTER_CPU_RUNTIME_SYMBOL(MKLSingleThreadedMatMulF64);
272   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF16);
273   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedConvF32);
274   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedFft);
275   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF16);
276   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF32);
277   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulF64);
278   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC64);
279   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulC128);
280   REGISTER_CPU_RUNTIME_SYMBOL(EigenSingleThreadedMatMulS32);
281   REGISTER_CPU_RUNTIME_SYMBOL(ParallelForkJoin);
282   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseInfeedBufferAfterDequeue);
283   REGISTER_CPU_RUNTIME_SYMBOL(ReleaseOutfeedBufferAfterPopulation);
284   REGISTER_CPU_RUNTIME_SYMBOL(KeyValueSort);
285   REGISTER_CPU_RUNTIME_SYMBOL(TopKF32);
286   REGISTER_CPU_RUNTIME_SYMBOL(TracingStart);
287   REGISTER_CPU_RUNTIME_SYMBOL(TracingEnd);
288 
289   registry->Register("__gnu_f2h_ieee", reinterpret_cast<void*>(__gnu_f2h_ieee),
290                      "Host");
291   registry->Register("__gnu_h2f_ieee", reinterpret_cast<void*>(__gnu_h2f_ieee),
292                      "Host");
293   registry->Register("__truncdfhf2", reinterpret_cast<void*>(__truncdfhf2),
294                      "Host");
295   registry->Register("__powisf2", reinterpret_cast<void*>(__powisf2), "Host");
296   registry->Register("__powidf2", reinterpret_cast<void*>(__powidf2), "Host");
297 
298 #undef REGISTER_CPU_RUNTIME_SYMBOL
299 
300 // Register both the f32 (float) and f64 (double) versions of a libm symbol.
301 // Unfortunately the double versions are overloaded on some systems, e.g.
302 // Mac so we need an explicit cast. This requires passing the function signature
303 // for that case.
304 #define REGISTER_LIBM_SYMBOL(name, double_sig)                                 \
305   do {                                                                         \
306     registry->Register(#name "f", reinterpret_cast<void*>(name##f), "Host");   \
307     registry->Register(#name,                                                  \
308                        reinterpret_cast<void*>(static_cast<double_sig>(name)), \
309                        "Host");                                                \
310   } while (false)
311 
312   REGISTER_LIBM_SYMBOL(acos, double (*)(double));
313   REGISTER_LIBM_SYMBOL(acosh, double (*)(double));
314   REGISTER_LIBM_SYMBOL(asin, double (*)(double));
315   REGISTER_LIBM_SYMBOL(asinh, double (*)(double));
316   REGISTER_LIBM_SYMBOL(atan, double (*)(double));
317   REGISTER_LIBM_SYMBOL(atan2, double (*)(double, double));
318   REGISTER_LIBM_SYMBOL(atanh, double (*)(double));
319   REGISTER_LIBM_SYMBOL(cbrt, double (*)(double));
320   REGISTER_LIBM_SYMBOL(ceil, double (*)(double));
321   REGISTER_LIBM_SYMBOL(copysign, double (*)(double, double));
322   REGISTER_LIBM_SYMBOL(cos, double (*)(double));
323   REGISTER_LIBM_SYMBOL(cosh, double (*)(double));
324   REGISTER_LIBM_SYMBOL(erf, double (*)(double));
325   REGISTER_LIBM_SYMBOL(erfc, double (*)(double));
326   REGISTER_LIBM_SYMBOL(exp, double (*)(double));
327   REGISTER_LIBM_SYMBOL(exp2, double (*)(double));
328   REGISTER_LIBM_SYMBOL(expm1, double (*)(double));
329   REGISTER_LIBM_SYMBOL(fabs, double (*)(double));
330   REGISTER_LIBM_SYMBOL(fdim, double (*)(double, double));
331   REGISTER_LIBM_SYMBOL(floor, double (*)(double));
332   REGISTER_LIBM_SYMBOL(fma, double (*)(double, double, double));
333   REGISTER_LIBM_SYMBOL(fmax, double (*)(double, double));
334   REGISTER_LIBM_SYMBOL(fmin, double (*)(double, double));
335   REGISTER_LIBM_SYMBOL(fmod, double (*)(double, double));
336   REGISTER_LIBM_SYMBOL(frexp, double (*)(double, int*));
337   REGISTER_LIBM_SYMBOL(hypot, double (*)(double, double));
338   REGISTER_LIBM_SYMBOL(ilogb, int (*)(double));
339   REGISTER_LIBM_SYMBOL(ldexp, double (*)(double, int));
340   REGISTER_LIBM_SYMBOL(lgamma, double (*)(double));
341   REGISTER_LIBM_SYMBOL(llrint, long long (*)(double));   // NOLINT(runtime/int)
342   REGISTER_LIBM_SYMBOL(llround, long long (*)(double));  // NOLINT(runtime/int)
343   REGISTER_LIBM_SYMBOL(log, double (*)(double));
344   REGISTER_LIBM_SYMBOL(log10, double (*)(double));
345   REGISTER_LIBM_SYMBOL(log1p, double (*)(double));
346   REGISTER_LIBM_SYMBOL(log2, double (*)(double));
347   REGISTER_LIBM_SYMBOL(logb, double (*)(double));
348   REGISTER_LIBM_SYMBOL(lrint, long (*)(double));   // NOLINT(runtime/int)
349   REGISTER_LIBM_SYMBOL(lround, long (*)(double));  // NOLINT(runtime/int)
350   REGISTER_LIBM_SYMBOL(modf, double (*)(double, double*));
351   REGISTER_LIBM_SYMBOL(nan, double (*)(const char*));
352   REGISTER_LIBM_SYMBOL(nearbyint, double (*)(double));
353   REGISTER_LIBM_SYMBOL(nextafter, double (*)(double, double));
354   REGISTER_LIBM_SYMBOL(nexttoward, double (*)(double, long double));
355   REGISTER_LIBM_SYMBOL(pow, double (*)(double, double));
356   REGISTER_LIBM_SYMBOL(remainder, double (*)(double, double));
357   REGISTER_LIBM_SYMBOL(remquo, double (*)(double, double, int*));
358   REGISTER_LIBM_SYMBOL(rint, double (*)(double));
359   REGISTER_LIBM_SYMBOL(round, double (*)(double));
360   REGISTER_LIBM_SYMBOL(scalbln,
361                        double (*)(double, long));  // NOLINT(runtime/int)
362   REGISTER_LIBM_SYMBOL(scalbn, double (*)(double, int));
363   REGISTER_LIBM_SYMBOL(sin, double (*)(double));
364 #ifdef __APPLE__
365   REGISTER_LIBM_SYMBOL(__sincos, void (*)(double, double*, double*));
366   registry->Register("__sincosf_stret",
367                      reinterpret_cast<void*>(__sincosf_stret), "Host");
368   registry->Register("__sincos_stret", reinterpret_cast<void*>(__sincos_stret),
369                      "Host");
370 #else
371   REGISTER_LIBM_SYMBOL(sincos, void (*)(double, double*, double*));
372 #endif
373   REGISTER_LIBM_SYMBOL(sinh, double (*)(double));
374   REGISTER_LIBM_SYMBOL(sqrt, double (*)(double));
375   REGISTER_LIBM_SYMBOL(tan, double (*)(double));
376   REGISTER_LIBM_SYMBOL(tanh, double (*)(double));
377   REGISTER_LIBM_SYMBOL(tgamma, double (*)(double));
378   REGISTER_LIBM_SYMBOL(trunc, double (*)(double));
379 
380 #undef REGISTER_LIBM_SYMBOL
381 
382   registry->Register("memcpy", reinterpret_cast<void*>(memcpy), "Host");
383   registry->Register("memmove", reinterpret_cast<void*>(memmove), "Host");
384   registry->Register("memset", reinterpret_cast<void*>(memset), "Host");
385 
386 #ifdef __APPLE__
387   registry->Register("__bzero", reinterpret_cast<void*>(bzero), "Host");
388   registry->Register("memset_pattern16",
389                      reinterpret_cast<void*>(memset_pattern16), "Host");
390 #endif
391 
392 #ifdef MEMORY_SANITIZER
393   registry->Register("__msan_unpoison",
394                      reinterpret_cast<void*>(__msan_unpoison), "Host");
395 #endif
396 
397 #if defined(PLATFORM_WINDOWS)
398   registry->Register("__chkstk", reinterpret_cast<void*>(__chkstk), "Host");
399 #endif
400 
401   return true;
402 }
403 
404 bool unused = RegisterKnownJITSymbols();
405 }  // namespace
406 
407 }  // namespace cpu
408 }  // namespace xla
409