1 /**************************************************************************
2  *
3  * Copyright 2010 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 /**
30  * The purpose of this module is to expose LLVM functionality not available
31  * through the C++ bindings.
32  */
33 
34 
35 // Undef these vars just to silence warnings
36 #undef PACKAGE_BUGREPORT
37 #undef PACKAGE_NAME
38 #undef PACKAGE_STRING
39 #undef PACKAGE_TARNAME
40 #undef PACKAGE_VERSION
41 
42 
43 #include <stddef.h>
44 
45 // Workaround http://llvm.org/PR23628
46 #if HAVE_LLVM >= 0x0307
47 #  pragma push_macro("DEBUG")
48 #  undef DEBUG
49 #endif
50 
51 #include <llvm-c/Core.h>
52 #if HAVE_LLVM >= 0x0306
53 #include <llvm-c/Support.h>
54 #endif
55 #include <llvm-c/ExecutionEngine.h>
56 #include <llvm/Target/TargetOptions.h>
57 #include <llvm/ExecutionEngine/ExecutionEngine.h>
58 #include <llvm/ADT/Triple.h>
59 #if HAVE_LLVM >= 0x0307
60 #include <llvm/Analysis/TargetLibraryInfo.h>
61 #else
62 #include <llvm/Target/TargetLibraryInfo.h>
63 #endif
64 #if HAVE_LLVM < 0x0306
65 #include <llvm/ExecutionEngine/JITMemoryManager.h>
66 #else
67 #include <llvm/ExecutionEngine/SectionMemoryManager.h>
68 #endif
69 #include <llvm/Support/CommandLine.h>
70 #include <llvm/Support/Host.h>
71 #include <llvm/Support/PrettyStackTrace.h>
72 
73 #include <llvm/Support/TargetSelect.h>
74 
75 #if HAVE_LLVM >= 0x0305
76 #include <llvm/IR/CallSite.h>
77 #endif
78 #include <llvm/IR/IRBuilder.h>
79 #include <llvm/IR/Module.h>
80 #include <llvm/Support/CBindingWrapping.h>
81 
82 #include <llvm/Config/llvm-config.h>
83 #if LLVM_USE_INTEL_JITEVENTS
84 #include <llvm/ExecutionEngine/JITEventListener.h>
85 #endif
86 
87 // Workaround http://llvm.org/PR23628
88 #if HAVE_LLVM >= 0x0307
89 #  pragma pop_macro("DEBUG")
90 #endif
91 
92 #include "c11/threads.h"
93 #include "os/os_thread.h"
94 #include "pipe/p_config.h"
95 #include "util/u_debug.h"
96 #include "util/u_cpu_detect.h"
97 
98 #include "lp_bld_misc.h"
99 #include "lp_bld_debug.h"
100 
101 namespace {
102 
103 class LLVMEnsureMultithreaded {
104 public:
LLVMEnsureMultithreaded()105    LLVMEnsureMultithreaded()
106    {
107       if (!LLVMIsMultithreaded()) {
108          LLVMStartMultithreaded();
109       }
110    }
111 };
112 
113 static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
114 
115 }
116 
117 static once_flag init_native_targets_once_flag = ONCE_FLAG_INIT;
118 
init_native_targets()119 static void init_native_targets()
120 {
121    // If we have a native target, initialize it to ensure it is linked in and
122    // usable by the JIT.
123    llvm::InitializeNativeTarget();
124 
125    llvm::InitializeNativeTargetAsmPrinter();
126 
127    llvm::InitializeNativeTargetDisassembler();
128 #if DEBUG && HAVE_LLVM >= 0x0306
129    {
130       char *env_llc_options = getenv("GALLIVM_LLC_OPTIONS");
131       if (env_llc_options) {
132          char *option;
133          char *options[64] = {(char *) "llc"};      // Warning without cast
134          int   n;
135          for (n = 0, option = strtok(env_llc_options, " "); option; n++, option = strtok(NULL, " ")) {
136             options[n + 1] = option;
137          }
138          if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
139             debug_printf("llc additional options (%d):\n", n);
140             for (int i = 1; i <= n; i++)
141                debug_printf("\t%s\n", options[i]);
142             debug_printf("\n");
143          }
144          LLVMParseCommandLineOptions(n + 1, options, NULL);
145       }
146    }
147 #endif
148 }
149 
150 extern "C" void
lp_set_target_options(void)151 lp_set_target_options(void)
152 {
153 #if HAVE_LLVM < 0x0304
154    /*
155     * By default LLVM adds a signal handler to output a pretty stack trace.
156     * This signal handler is never removed, causing problems when unloading the
157     * shared object where the gallium driver resides.
158     */
159    llvm::DisablePrettyStackTrace = true;
160 #endif
161 
162    /* The llvm target registry is not thread-safe, so drivers and state-trackers
163     * that want to initialize targets should use the lp_set_target_options()
164     * function to safely initialize targets.
165     *
166     * LLVM targets should be initialized before the driver or state-tracker tries
167     * to access the registry.
168     */
169    call_once(&init_native_targets_once_flag, init_native_targets);
170 }
171 
172 extern "C"
173 LLVMTargetLibraryInfoRef
gallivm_create_target_library_info(const char * triple)174 gallivm_create_target_library_info(const char *triple)
175 {
176    return reinterpret_cast<LLVMTargetLibraryInfoRef>(
177 #if HAVE_LLVM < 0x0307
178    new llvm::TargetLibraryInfo(
179 #else
180    new llvm::TargetLibraryInfoImpl(
181 #endif
182    llvm::Triple(triple)));
183 }
184 
185 extern "C"
186 void
gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)187 gallivm_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
188 {
189    delete reinterpret_cast<
190 #if HAVE_LLVM < 0x0307
191    llvm::TargetLibraryInfo
192 #else
193    llvm::TargetLibraryInfoImpl
194 #endif
195    *>(library_info);
196 }
197 
198 
199 #if HAVE_LLVM < 0x0304
200 
201 extern "C"
202 void
LLVMSetAlignmentBackport(LLVMValueRef V,unsigned Bytes)203 LLVMSetAlignmentBackport(LLVMValueRef V,
204                          unsigned Bytes)
205 {
206    switch (LLVMGetInstructionOpcode(V)) {
207    case LLVMLoad:
208       llvm::unwrap<llvm::LoadInst>(V)->setAlignment(Bytes);
209       break;
210    case LLVMStore:
211       llvm::unwrap<llvm::StoreInst>(V)->setAlignment(Bytes);
212       break;
213    default:
214       assert(0);
215       break;
216    }
217 }
218 
219 #endif
220 
221 
222 #if HAVE_LLVM < 0x0306
223 typedef llvm::JITMemoryManager BaseMemoryManager;
224 #else
225 typedef llvm::RTDyldMemoryManager BaseMemoryManager;
226 #endif
227 
228 
229 /*
230  * Delegating is tedious but the default manager class is hidden in an
231  * anonymous namespace in LLVM, so we cannot just derive from it to change
232  * its behavior.
233  */
234 class DelegatingJITMemoryManager : public BaseMemoryManager {
235 
236    protected:
237       virtual BaseMemoryManager *mgr() const = 0;
238 
239    public:
240 #if HAVE_LLVM < 0x0306
241       /*
242        * From JITMemoryManager
243        */
setMemoryWritable()244       virtual void setMemoryWritable() {
245          mgr()->setMemoryWritable();
246       }
setMemoryExecutable()247       virtual void setMemoryExecutable() {
248          mgr()->setMemoryExecutable();
249       }
setPoisonMemory(bool poison)250       virtual void setPoisonMemory(bool poison) {
251          mgr()->setPoisonMemory(poison);
252       }
AllocateGOT()253       virtual void AllocateGOT() {
254          mgr()->AllocateGOT();
255          /*
256           * isManagingGOT() is not virtual in base class so we can't delegate.
257           * Instead we mirror the value of HasGOT in our instance.
258           */
259          HasGOT = mgr()->isManagingGOT();
260       }
getGOTBase() const261       virtual uint8_t *getGOTBase() const {
262          return mgr()->getGOTBase();
263       }
startFunctionBody(const llvm::Function * F,uintptr_t & ActualSize)264       virtual uint8_t *startFunctionBody(const llvm::Function *F,
265                                          uintptr_t &ActualSize) {
266          return mgr()->startFunctionBody(F, ActualSize);
267       }
allocateStub(const llvm::GlobalValue * F,unsigned StubSize,unsigned Alignment)268       virtual uint8_t *allocateStub(const llvm::GlobalValue *F,
269                                     unsigned StubSize,
270                                     unsigned Alignment) {
271          return mgr()->allocateStub(F, StubSize, Alignment);
272       }
endFunctionBody(const llvm::Function * F,uint8_t * FunctionStart,uint8_t * FunctionEnd)273       virtual void endFunctionBody(const llvm::Function *F,
274                                    uint8_t *FunctionStart,
275                                    uint8_t *FunctionEnd) {
276          mgr()->endFunctionBody(F, FunctionStart, FunctionEnd);
277       }
allocateSpace(intptr_t Size,unsigned Alignment)278       virtual uint8_t *allocateSpace(intptr_t Size, unsigned Alignment) {
279          return mgr()->allocateSpace(Size, Alignment);
280       }
allocateGlobal(uintptr_t Size,unsigned Alignment)281       virtual uint8_t *allocateGlobal(uintptr_t Size, unsigned Alignment) {
282          return mgr()->allocateGlobal(Size, Alignment);
283       }
deallocateFunctionBody(void * Body)284       virtual void deallocateFunctionBody(void *Body) {
285          mgr()->deallocateFunctionBody(Body);
286       }
287 #if HAVE_LLVM < 0x0304
startExceptionTable(const llvm::Function * F,uintptr_t & ActualSize)288       virtual uint8_t *startExceptionTable(const llvm::Function *F,
289                                            uintptr_t &ActualSize) {
290          return mgr()->startExceptionTable(F, ActualSize);
291       }
endExceptionTable(const llvm::Function * F,uint8_t * TableStart,uint8_t * TableEnd,uint8_t * FrameRegister)292       virtual void endExceptionTable(const llvm::Function *F,
293                                      uint8_t *TableStart,
294                                      uint8_t *TableEnd,
295                                      uint8_t *FrameRegister) {
296          mgr()->endExceptionTable(F, TableStart, TableEnd,
297                                   FrameRegister);
298       }
deallocateExceptionTable(void * ET)299       virtual void deallocateExceptionTable(void *ET) {
300          mgr()->deallocateExceptionTable(ET);
301       }
302 #endif
CheckInvariants(std::string & s)303       virtual bool CheckInvariants(std::string &s) {
304          return mgr()->CheckInvariants(s);
305       }
GetDefaultCodeSlabSize()306       virtual size_t GetDefaultCodeSlabSize() {
307          return mgr()->GetDefaultCodeSlabSize();
308       }
GetDefaultDataSlabSize()309       virtual size_t GetDefaultDataSlabSize() {
310          return mgr()->GetDefaultDataSlabSize();
311       }
GetDefaultStubSlabSize()312       virtual size_t GetDefaultStubSlabSize() {
313          return mgr()->GetDefaultStubSlabSize();
314       }
GetNumCodeSlabs()315       virtual unsigned GetNumCodeSlabs() {
316          return mgr()->GetNumCodeSlabs();
317       }
GetNumDataSlabs()318       virtual unsigned GetNumDataSlabs() {
319          return mgr()->GetNumDataSlabs();
320       }
GetNumStubSlabs()321       virtual unsigned GetNumStubSlabs() {
322          return mgr()->GetNumStubSlabs();
323       }
324 #endif
325 
326       /*
327        * From RTDyldMemoryManager
328        */
329 #if HAVE_LLVM >= 0x0304
allocateCodeSection(uintptr_t Size,unsigned Alignment,unsigned SectionID,llvm::StringRef SectionName)330       virtual uint8_t *allocateCodeSection(uintptr_t Size,
331                                            unsigned Alignment,
332                                            unsigned SectionID,
333                                            llvm::StringRef SectionName) {
334          return mgr()->allocateCodeSection(Size, Alignment, SectionID,
335                                            SectionName);
336       }
337 #else
allocateCodeSection(uintptr_t Size,unsigned Alignment,unsigned SectionID)338       virtual uint8_t *allocateCodeSection(uintptr_t Size,
339                                            unsigned Alignment,
340                                            unsigned SectionID) {
341          return mgr()->allocateCodeSection(Size, Alignment, SectionID);
342       }
343 #endif
allocateDataSection(uintptr_t Size,unsigned Alignment,unsigned SectionID,llvm::StringRef SectionName,bool IsReadOnly)344       virtual uint8_t *allocateDataSection(uintptr_t Size,
345                                            unsigned Alignment,
346                                            unsigned SectionID,
347 #if HAVE_LLVM >= 0x0304
348                                            llvm::StringRef SectionName,
349 #endif
350                                            bool IsReadOnly) {
351          return mgr()->allocateDataSection(Size, Alignment, SectionID,
352 #if HAVE_LLVM >= 0x0304
353                                            SectionName,
354 #endif
355                                            IsReadOnly);
356       }
357 #if HAVE_LLVM >= 0x0304
registerEHFrames(uint8_t * Addr,uint64_t LoadAddr,size_t Size)358       virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
359          mgr()->registerEHFrames(Addr, LoadAddr, Size);
360       }
361 #else
registerEHFrames(llvm::StringRef SectionData)362       virtual void registerEHFrames(llvm::StringRef SectionData) {
363          mgr()->registerEHFrames(SectionData);
364       }
365 #endif
366 #if HAVE_LLVM >= 0x0500
deregisterEHFrames()367       virtual void deregisterEHFrames() {
368          mgr()->deregisterEHFrames();
369       }
370 #elif HAVE_LLVM >= 0x0304
deregisterEHFrames(uint8_t * Addr,uint64_t LoadAddr,size_t Size)371       virtual void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) {
372          mgr()->deregisterEHFrames(Addr, LoadAddr, Size);
373       }
374 #endif
getPointerToNamedFunction(const std::string & Name,bool AbortOnFailure=true)375       virtual void *getPointerToNamedFunction(const std::string &Name,
376                                               bool AbortOnFailure=true) {
377          return mgr()->getPointerToNamedFunction(Name, AbortOnFailure);
378       }
379 #if HAVE_LLVM <= 0x0303
applyPermissions(std::string * ErrMsg=0)380       virtual bool applyPermissions(std::string *ErrMsg = 0) {
381          return mgr()->applyPermissions(ErrMsg);
382       }
383 #else
finalizeMemory(std::string * ErrMsg=0)384       virtual bool finalizeMemory(std::string *ErrMsg = 0) {
385          return mgr()->finalizeMemory(ErrMsg);
386       }
387 #endif
388 };
389 
390 
391 /*
392  * Delegate memory management to one shared manager for more efficient use
393  * of memory than creating a separate pool for each LLVM engine.
394  * Keep generated code until freeGeneratedCode() is called, instead of when
395  * memory manager is destroyed, which happens during engine destruction.
396  * This allows additional memory savings as we don't have to keep the engine
397  * around in order to use the code.
398  * All methods are delegated to the shared manager except destruction and
399  * deallocating code.  For the latter we just remember what needs to be
400  * deallocated later.  The shared manager is deleted once it is empty.
401  */
402 class ShaderMemoryManager : public DelegatingJITMemoryManager {
403 
404    BaseMemoryManager *TheMM;
405 
406    struct GeneratedCode {
407       typedef std::vector<void *> Vec;
408       Vec FunctionBody, ExceptionTable;
409       BaseMemoryManager *TheMM;
410 
GeneratedCodeShaderMemoryManager::GeneratedCode411       GeneratedCode(BaseMemoryManager *MM) {
412          TheMM = MM;
413       }
414 
~GeneratedCodeShaderMemoryManager::GeneratedCode415       ~GeneratedCode() {
416          /*
417           * Deallocate things as previously requested and
418           * free shared manager when no longer used.
419           */
420 #if HAVE_LLVM < 0x0306
421          Vec::iterator i;
422 
423          assert(TheMM);
424          for ( i = FunctionBody.begin(); i != FunctionBody.end(); ++i )
425             TheMM->deallocateFunctionBody(*i);
426 #if HAVE_LLVM < 0x0304
427          for ( i = ExceptionTable.begin(); i != ExceptionTable.end(); ++i )
428             TheMM->deallocateExceptionTable(*i);
429 #endif /* HAVE_LLVM < 0x0304 */
430 #endif /* HAVE_LLVM < 0x0306 */
431       }
432    };
433 
434    GeneratedCode *code;
435 
mgr() const436    BaseMemoryManager *mgr() const {
437       return TheMM;
438    }
439 
440    public:
441 
ShaderMemoryManager(BaseMemoryManager * MM)442       ShaderMemoryManager(BaseMemoryManager* MM) {
443          TheMM = MM;
444          code = new GeneratedCode(MM);
445       }
446 
~ShaderMemoryManager()447       virtual ~ShaderMemoryManager() {
448          /*
449           * 'code' is purposely not deleted.  It is the user's responsibility
450           * to call getGeneratedCode() and freeGeneratedCode().
451           */
452       }
453 
getGeneratedCode()454       struct lp_generated_code *getGeneratedCode() {
455          return (struct lp_generated_code *) code;
456       }
457 
freeGeneratedCode(struct lp_generated_code * code)458       static void freeGeneratedCode(struct lp_generated_code *code) {
459          delete (GeneratedCode *) code;
460       }
461 
462 #if HAVE_LLVM < 0x0304
deallocateExceptionTable(void * ET)463       virtual void deallocateExceptionTable(void *ET) {
464          // remember for later deallocation
465          code->ExceptionTable.push_back(ET);
466       }
467 #endif
468 
deallocateFunctionBody(void * Body)469       virtual void deallocateFunctionBody(void *Body) {
470          // remember for later deallocation
471          code->FunctionBody.push_back(Body);
472       }
473 };
474 
475 
476 /**
477  * Same as LLVMCreateJITCompilerForModule, but:
478  * - allows using MCJIT and enabling AVX feature where available.
479  * - set target options
480  *
481  * See also:
482  * - llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
483  * - llvm/tools/lli/lli.cpp
484  * - http://markmail.org/message/ttkuhvgj4cxxy2on#query:+page:1+mid:aju2dggerju3ivd3+state:results
485  */
486 extern "C"
487 LLVMBool
lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef * OutJIT,lp_generated_code ** OutCode,LLVMModuleRef M,LLVMMCJITMemoryManagerRef CMM,unsigned OptLevel,int useMCJIT,char ** OutError)488 lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
489                                         lp_generated_code **OutCode,
490                                         LLVMModuleRef M,
491                                         LLVMMCJITMemoryManagerRef CMM,
492                                         unsigned OptLevel,
493                                         int useMCJIT,
494                                         char **OutError)
495 {
496    using namespace llvm;
497 
498    std::string Error;
499 #if HAVE_LLVM >= 0x0306
500    EngineBuilder builder(std::unique_ptr<Module>(unwrap(M)));
501 #else
502    EngineBuilder builder(unwrap(M));
503 #endif
504 
505    /**
506     * LLVM 3.1+ haven't more "extern unsigned llvm::StackAlignmentOverride" and
507     * friends for configuring code generation options, like stack alignment.
508     */
509    TargetOptions options;
510 #if defined(PIPE_ARCH_X86)
511    options.StackAlignmentOverride = 4;
512 #if HAVE_LLVM < 0x0304
513    options.RealignStack = true;
514 #endif
515 #endif
516 
517 #if defined(DEBUG) && HAVE_LLVM < 0x0307
518    options.JITEmitDebugInfo = true;
519 #endif
520 
521    /* XXX: Workaround http://llvm.org/PR21435 */
522 #if defined(DEBUG) || defined(PROFILE) || \
523     (HAVE_LLVM >= 0x0303 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)))
524 #if HAVE_LLVM < 0x0304
525    options.NoFramePointerElimNonLeaf = true;
526 #endif
527 #if HAVE_LLVM < 0x0307
528    options.NoFramePointerElim = true;
529 #endif
530 #endif
531 
532    builder.setEngineKind(EngineKind::JIT)
533           .setErrorStr(&Error)
534           .setTargetOptions(options)
535           .setOptLevel((CodeGenOpt::Level)OptLevel);
536 
537    if (useMCJIT) {
538 #if HAVE_LLVM < 0x0306
539        builder.setUseMCJIT(true);
540 #endif
541 #ifdef _WIN32
542        /*
543         * MCJIT works on Windows, but currently only through ELF object format.
544         *
545         * XXX: We could use `LLVM_HOST_TRIPLE "-elf"` but LLVM_HOST_TRIPLE has
546         * different strings for MinGW/MSVC, so better play it safe and be
547         * explicit.
548         */
549 #  ifdef _WIN64
550        LLVMSetTarget(M, "x86_64-pc-win32-elf");
551 #  else
552        LLVMSetTarget(M, "i686-pc-win32-elf");
553 #  endif
554 #endif
555    }
556 
557    llvm::SmallVector<std::string, 16> MAttrs;
558 
559 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
560 #if HAVE_LLVM >= 0x0400
561    /* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
562     * which allows us to enable/disable code generation based
563     * on the results of cpuid.
564     */
565    llvm::StringMap<bool> features;
566    llvm::sys::getHostCPUFeatures(features);
567 
568    for (StringMapIterator<bool> f = features.begin();
569         f != features.end();
570         ++f) {
571       MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
572    }
573 #else
574    /*
575     * We need to unset attributes because sometimes LLVM mistakenly assumes
576     * certain features are present given the processor name.
577     *
578     * https://bugs.freedesktop.org/show_bug.cgi?id=92214
579     * http://llvm.org/PR25021
580     * http://llvm.org/PR19429
581     * http://llvm.org/PR16721
582     */
583    MAttrs.push_back(util_cpu_caps.has_sse    ? "+sse"    : "-sse"   );
584    MAttrs.push_back(util_cpu_caps.has_sse2   ? "+sse2"   : "-sse2"  );
585    MAttrs.push_back(util_cpu_caps.has_sse3   ? "+sse3"   : "-sse3"  );
586    MAttrs.push_back(util_cpu_caps.has_ssse3  ? "+ssse3"  : "-ssse3" );
587 #if HAVE_LLVM >= 0x0304
588    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse4.1" : "-sse4.1");
589 #else
590    MAttrs.push_back(util_cpu_caps.has_sse4_1 ? "+sse41"  : "-sse41" );
591 #endif
592 #if HAVE_LLVM >= 0x0304
593    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse4.2" : "-sse4.2");
594 #else
595    MAttrs.push_back(util_cpu_caps.has_sse4_2 ? "+sse42"  : "-sse42" );
596 #endif
597    /*
598     * AVX feature is not automatically detected from CPUID by the X86 target
599     * yet, because the old (yet default) JIT engine is not capable of
600     * emitting the opcodes. On newer llvm versions it is and at least some
601     * versions (tested with 3.3) will emit avx opcodes without this anyway.
602     */
603    MAttrs.push_back(util_cpu_caps.has_avx  ? "+avx"  : "-avx");
604    MAttrs.push_back(util_cpu_caps.has_f16c ? "+f16c" : "-f16c");
605    if (HAVE_LLVM >= 0x0304) {
606       MAttrs.push_back(util_cpu_caps.has_fma  ? "+fma"  : "-fma");
607    } else {
608       /*
609        * The old JIT in LLVM 3.3 has a bug encoding llvm.fmuladd.f32 and
610        * llvm.fmuladd.v2f32 intrinsics when FMA is available.
611        */
612       MAttrs.push_back("-fma");
613    }
614    MAttrs.push_back(util_cpu_caps.has_avx2 ? "+avx2" : "-avx2");
615    /* disable avx512 and all subvariants */
616 #if HAVE_LLVM >= 0x0304
617    MAttrs.push_back("-avx512cd");
618    MAttrs.push_back("-avx512er");
619    MAttrs.push_back("-avx512f");
620    MAttrs.push_back("-avx512pf");
621 #endif
622 #if HAVE_LLVM >= 0x0305
623    MAttrs.push_back("-avx512bw");
624    MAttrs.push_back("-avx512dq");
625    MAttrs.push_back("-avx512vl");
626 #endif
627 #endif
628 #endif
629 
630 #if defined(PIPE_ARCH_PPC)
631    MAttrs.push_back(util_cpu_caps.has_altivec ? "+altivec" : "-altivec");
632 #if (HAVE_LLVM >= 0x0304)
633 #if (HAVE_LLVM < 0x0400)
634    /*
635     * Make sure VSX instructions are disabled
636     * See LLVM bugs:
637     * https://llvm.org/bugs/show_bug.cgi?id=25503#c7 (fixed in 3.8.1)
638     * https://llvm.org/bugs/show_bug.cgi?id=26775 (fixed in 3.8.1)
639     * https://llvm.org/bugs/show_bug.cgi?id=33531 (fixed in 4.0)
640     * https://llvm.org/bugs/show_bug.cgi?id=34647 (llc performance on certain unusual shader IR; intro'd in 4.0, pending as of 5.0)
641     */
642    if (util_cpu_caps.has_altivec) {
643       MAttrs.push_back("-vsx");
644    }
645 #else
646    /*
647     * Bug 25503 is fixed, by the same fix that fixed
648     * bug 26775, in versions of LLVM later than 3.8 (starting with 3.8.1).
649     * BZ 33531 actually comprises more than one bug, all of
650     * which are fixed in LLVM 4.0.
651     *
652     * With LLVM 4.0 or higher:
653     * Make sure VSX instructions are ENABLED, unless
654     * a) the entire -mattr option is overridden via GALLIVM_MATTRS, or
655     * b) VSX instructions are explicitly enabled/disabled via GALLIVM_VSX=1 or 0.
656     */
657    if (util_cpu_caps.has_altivec) {
658       char *env_mattrs = getenv("GALLIVM_MATTRS");
659       if (env_mattrs) {
660          MAttrs.push_back(env_mattrs);
661       }
662       else {
663          boolean enable_vsx = true;
664          char *env_vsx = getenv("GALLIVM_VSX");
665          if (env_vsx && env_vsx[0] == '0') {
666             enable_vsx = false;
667          }
668          if (enable_vsx)
669             MAttrs.push_back("+vsx");
670          else
671             MAttrs.push_back("-vsx");
672       }
673    }
674 #endif
675 #endif
676 #endif
677 
678    builder.setMAttrs(MAttrs);
679 
680    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
681       int n = MAttrs.size();
682       if (n > 0) {
683          debug_printf("llc -mattr option(s): ");
684          for (int i = 0; i < n; i++)
685             debug_printf("%s%s", MAttrs[i].c_str(), (i < n - 1) ? "," : "");
686          debug_printf("\n");
687       }
688    }
689 
690 #if HAVE_LLVM >= 0x0305
691    StringRef MCPU = llvm::sys::getHostCPUName();
692    /*
693     * The cpu bits are no longer set automatically, so need to set mcpu manually.
694     * Note that the MAttrs set above will be sort of ignored (since we should
695     * not set any which would not be set by specifying the cpu anyway).
696     * It ought to be safe though since getHostCPUName() should include bits
697     * not only from the cpu but environment as well (for instance if it's safe
698     * to use avx instructions which need OS support). According to
699     * http://llvm.org/bugs/show_bug.cgi?id=19429 however if I understand this
700     * right it may be necessary to specify older cpu (or disable mattrs) though
701     * when not using MCJIT so no instructions are generated which the old JIT
702     * can't handle. Not entirely sure if we really need to do anything yet.
703     */
704 #if defined(PIPE_ARCH_LITTLE_ENDIAN)  && defined(PIPE_ARCH_PPC_64)
705    /*
706     * Versions of LLVM prior to 4.0 lacked a table entry for "POWER8NVL",
707     * resulting in (big-endian) "generic" being returned on
708     * little-endian Power8NVL systems.  The result was that code that
709     * attempted to load the least significant 32 bits of a 64-bit quantity
710     * from memory loaded the wrong half.  This resulted in failures in some
711     * Piglit tests, e.g.
712     * .../arb_gpu_shader_fp64/execution/conversion/frag-conversion-explicit-double-uint
713     */
714    if (MCPU == "generic")
715       MCPU = "pwr8";
716 #endif
717    builder.setMCPU(MCPU);
718    if (gallivm_debug & (GALLIVM_DEBUG_IR | GALLIVM_DEBUG_ASM | GALLIVM_DEBUG_DUMP_BC)) {
719       debug_printf("llc -mcpu option: %s\n", MCPU.str().c_str());
720    }
721 #endif
722 
723    ShaderMemoryManager *MM = NULL;
724    if (useMCJIT) {
725        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
726        MM = new ShaderMemoryManager(JMM);
727        *OutCode = MM->getGeneratedCode();
728 
729 #if HAVE_LLVM >= 0x0306
730        builder.setMCJITMemoryManager(std::unique_ptr<RTDyldMemoryManager>(MM));
731        MM = NULL; // ownership taken by std::unique_ptr
732 #elif HAVE_LLVM > 0x0303
733        builder.setMCJITMemoryManager(MM);
734 #else
735        builder.setJITMemoryManager(MM);
736 #endif
737    } else {
738 #if HAVE_LLVM < 0x0306
739        BaseMemoryManager* JMM = reinterpret_cast<BaseMemoryManager*>(CMM);
740        MM = new ShaderMemoryManager(JMM);
741        *OutCode = MM->getGeneratedCode();
742 
743        builder.setJITMemoryManager(MM);
744 #else
745        assert(0);
746 #endif
747    }
748 
749    ExecutionEngine *JIT;
750 
751    JIT = builder.create();
752 #if LLVM_USE_INTEL_JITEVENTS
753    JITEventListener *JEL = JITEventListener::createIntelJITEventListener();
754    JIT->RegisterJITEventListener(JEL);
755 #endif
756    if (JIT) {
757       *OutJIT = wrap(JIT);
758       return 0;
759    }
760    lp_free_generated_code(*OutCode);
761    *OutCode = 0;
762    delete MM;
763    *OutError = strdup(Error.c_str());
764    return 1;
765 }
766 
767 
768 extern "C"
769 void
lp_free_generated_code(struct lp_generated_code * code)770 lp_free_generated_code(struct lp_generated_code *code)
771 {
772    ShaderMemoryManager::freeGeneratedCode(code);
773 }
774 
775 extern "C"
776 LLVMMCJITMemoryManagerRef
lp_get_default_memory_manager()777 lp_get_default_memory_manager()
778 {
779    BaseMemoryManager *mm;
780 #if HAVE_LLVM < 0x0306
781    mm = llvm::JITMemoryManager::CreateDefaultMemManager();
782 #else
783    mm = new llvm::SectionMemoryManager();
784 #endif
785    return reinterpret_cast<LLVMMCJITMemoryManagerRef>(mm);
786 }
787 
788 extern "C"
789 void
lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)790 lp_free_memory_manager(LLVMMCJITMemoryManagerRef memorymgr)
791 {
792    delete reinterpret_cast<BaseMemoryManager*>(memorymgr);
793 }
794 
795 extern "C" LLVMValueRef
lp_get_called_value(LLVMValueRef call)796 lp_get_called_value(LLVMValueRef call)
797 {
798 #if HAVE_LLVM >= 0x0309
799 	return LLVMGetCalledValue(call);
800 #elif HAVE_LLVM >= 0x0305
801 	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
802 #else
803 	return NULL; /* radeonsi doesn't support so old LLVM. */
804 #endif
805 }
806 
807 extern "C" bool
lp_is_function(LLVMValueRef v)808 lp_is_function(LLVMValueRef v)
809 {
810 #if HAVE_LLVM >= 0x0309
811 	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
812 #else
813 	return llvm::isa<llvm::Function>(llvm::unwrap(v));
814 #endif
815 }
816