1 //===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Top-level implementation for the PTX target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "PTX.h"
15 #include "PTXTargetMachine.h"
16 #include "llvm/PassManager.h"
17 #include "llvm/Analysis/Passes.h"
18 #include "llvm/Analysis/Verifier.h"
19 #include "llvm/Assembly/PrintModulePass.h"
20 #include "llvm/ADT/OwningPtr.h"
21 #include "llvm/CodeGen/AsmPrinter.h"
22 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/Passes.h"
25 #include "llvm/MC/MCAsmInfo.h"
26 #include "llvm/MC/MCInstrInfo.h"
27 #include "llvm/MC/MCStreamer.h"
28 #include "llvm/MC/MCSubtargetInfo.h"
29 #include "llvm/Support/TargetRegistry.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include "llvm/Target/TargetData.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetLowering.h"
34 #include "llvm/Target/TargetLoweringObjectFile.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/Target/TargetOptions.h"
37 #include "llvm/Target/TargetRegisterInfo.h"
38 #include "llvm/Target/TargetSubtargetInfo.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include "llvm/Support/Debug.h"
41 #include "llvm/Support/TargetRegistry.h"
42 
43 
44 using namespace llvm;
45 
46 namespace llvm {
47   MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
48                                    bool isVerboseAsm, bool useLoc,
49                                    bool useCFI,
50                                    MCInstPrinter *InstPrint,
51                                    MCCodeEmitter *CE,
52                                    MCAsmBackend *MAB,
53                                    bool ShowInst);
54 }
55 
LLVMInitializePTXTarget()56 extern "C" void LLVMInitializePTXTarget() {
57 
58   RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
59   RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
60 
61   TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
62   TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
63 }
64 
65 namespace {
66   const char* DataLayout32 =
67     "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
68   const char* DataLayout64 =
69     "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
70 
71   // Copied from LLVMTargetMachine.cpp
printNoVerify(PassManagerBase & PM,const char * Banner)72   void printNoVerify(PassManagerBase &PM, const char *Banner) {
73     if (PrintMachineCode)
74       PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
75   }
76 
printAndVerify(PassManagerBase & PM,const char * Banner)77   void printAndVerify(PassManagerBase &PM,
78                       const char *Banner) {
79     if (PrintMachineCode)
80       PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
81 
82     //if (VerifyMachineCode)
83     //  PM.add(createMachineVerifierPass(Banner));
84   }
85 }
86 
87 // DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine(const Target & T,StringRef TT,StringRef CPU,StringRef FS,Reloc::Model RM,CodeModel::Model CM,bool is64Bit)88 PTXTargetMachine::PTXTargetMachine(const Target &T,
89                                    StringRef TT, StringRef CPU, StringRef FS,
90                                    Reloc::Model RM, CodeModel::Model CM,
91                                    bool is64Bit)
92   : LLVMTargetMachine(T, TT, CPU, FS, RM, CM),
93     DataLayout(is64Bit ? DataLayout64 : DataLayout32),
94     Subtarget(TT, CPU, FS, is64Bit),
95     FrameLowering(Subtarget),
96     InstrInfo(*this),
97     TSInfo(*this),
98     TLInfo(*this) {
99 }
100 
PTX32TargetMachine(const Target & T,StringRef TT,StringRef CPU,StringRef FS,Reloc::Model RM,CodeModel::Model CM)101 PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
102                                        StringRef CPU, StringRef FS,
103                                        Reloc::Model RM, CodeModel::Model CM)
104   : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) {
105 }
106 
PTX64TargetMachine(const Target & T,StringRef TT,StringRef CPU,StringRef FS,Reloc::Model RM,CodeModel::Model CM)107 PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
108                                        StringRef CPU, StringRef FS,
109                                        Reloc::Model RM, CodeModel::Model CM)
110   : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) {
111 }
112 
addInstSelector(PassManagerBase & PM,CodeGenOpt::Level OptLevel)113 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM,
114                                        CodeGenOpt::Level OptLevel) {
115   PM.add(createPTXISelDag(*this, OptLevel));
116   return false;
117 }
118 
addPostRegAlloc(PassManagerBase & PM,CodeGenOpt::Level OptLevel)119 bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM,
120                                        CodeGenOpt::Level OptLevel) {
121   // PTXMFInfoExtract must after register allocation!
122   //PM.add(createPTXMFInfoExtract(*this, OptLevel));
123   return false;
124 }
125 
addPassesToEmitFile(PassManagerBase & PM,formatted_raw_ostream & Out,CodeGenFileType FileType,CodeGenOpt::Level OptLevel,bool DisableVerify)126 bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
127                                            formatted_raw_ostream &Out,
128                                            CodeGenFileType FileType,
129                                            CodeGenOpt::Level OptLevel,
130                                            bool DisableVerify) {
131   // This is mostly based on LLVMTargetMachine::addPassesToEmitFile
132 
133   // Add common CodeGen passes.
134   MCContext *Context = 0;
135   if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context))
136     return true;
137   assert(Context != 0 && "Failed to get MCContext");
138 
139   if (hasMCSaveTempLabels())
140     Context->setAllowTemporaryLabels(false);
141 
142   const MCAsmInfo &MAI = *getMCAsmInfo();
143   const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>();
144   OwningPtr<MCStreamer> AsmStreamer;
145 
146   switch (FileType) {
147   default: return true;
148   case CGFT_AssemblyFile: {
149     MCInstPrinter *InstPrinter =
150       getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI);
151 
152     // Create a code emitter if asked to show the encoding.
153     MCCodeEmitter *MCE = 0;
154     MCAsmBackend *MAB = 0;
155 
156     MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
157                                                   true, /* verbose asm */
158                                                   hasMCUseLoc(),
159                                                   hasMCUseCFI(),
160                                                   InstPrinter,
161                                                   MCE, MAB,
162                                                   false /* show MC encoding */);
163     AsmStreamer.reset(S);
164     break;
165   }
166   case CGFT_ObjectFile: {
167     llvm_unreachable("Object file emission is not supported with PTX");
168   }
169   case CGFT_Null:
170     // The Null output is intended for use for performance analysis and testing,
171     // not real users.
172     AsmStreamer.reset(createNullStreamer(*Context));
173     break;
174   }
175 
176   // MC Logging
177   //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs()));
178 
179   // Create the AsmPrinter, which takes ownership of AsmStreamer if successful.
180   FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer);
181   if (Printer == 0)
182     return true;
183 
184   // If successful, createAsmPrinter took ownership of AsmStreamer.
185   AsmStreamer.take();
186 
187   PM.add(Printer);
188 
189   PM.add(createGCInfoDeleter());
190   return false;
191 }
192 
addCommonCodeGenPasses(PassManagerBase & PM,CodeGenOpt::Level OptLevel,bool DisableVerify,MCContext * & OutContext)193 bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
194                                               CodeGenOpt::Level OptLevel,
195                                               bool DisableVerify,
196                                               MCContext *&OutContext) {
197   // Add standard LLVM codegen passes.
198   // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some
199   // modifications for the PTX target.
200 
201   // Standard LLVM-Level Passes.
202 
203   // Basic AliasAnalysis support.
204   // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that
205   // BasicAliasAnalysis wins if they disagree. This is intended to help
206   // support "obvious" type-punning idioms.
207   PM.add(createTypeBasedAliasAnalysisPass());
208   PM.add(createBasicAliasAnalysisPass());
209 
210   // Before running any passes, run the verifier to determine if the input
211   // coming from the front-end and/or optimizer is valid.
212   if (!DisableVerify)
213     PM.add(createVerifierPass());
214 
215   // Run loop strength reduction before anything else.
216   if (OptLevel != CodeGenOpt::None) {
217     PM.add(createLoopStrengthReducePass(getTargetLowering()));
218     //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
219   }
220 
221   PM.add(createGCLoweringPass());
222 
223   // Make sure that no unreachable blocks are instruction selected.
224   PM.add(createUnreachableBlockEliminationPass());
225 
226   PM.add(createLowerInvokePass(getTargetLowering()));
227   // The lower invoke pass may create unreachable code. Remove it.
228   PM.add(createUnreachableBlockEliminationPass());
229 
230   if (OptLevel != CodeGenOpt::None)
231     PM.add(createCodeGenPreparePass(getTargetLowering()));
232 
233   PM.add(createStackProtectorPass(getTargetLowering()));
234 
235   addPreISel(PM, OptLevel);
236 
237   //PM.add(createPrintFunctionPass("\n\n"
238   //                               "*** Final LLVM Code input to ISel ***\n",
239   //                               &dbgs()));
240 
241   // All passes which modify the LLVM IR are now complete; run the verifier
242   // to ensure that the IR is valid.
243   if (!DisableVerify)
244     PM.add(createVerifierPass());
245 
246   // Standard Lower-Level Passes.
247 
248   // Install a MachineModuleInfo class, which is an immutable pass that holds
249   // all the per-module stuff we're generating, including MCContext.
250   MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
251                                                  *getRegisterInfo(),
252                                     &getTargetLowering()->getObjFileLowering());
253   PM.add(MMI);
254   OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
255 
256   // Set up a MachineFunction for the rest of CodeGen to work on.
257   PM.add(new MachineFunctionAnalysis(*this, OptLevel));
258 
259   // Ask the target for an isel.
260   if (addInstSelector(PM, OptLevel))
261     return true;
262 
263   // Print the instruction selected machine code...
264   printAndVerify(PM, "After Instruction Selection");
265 
266   // Expand pseudo-instructions emitted by ISel.
267   PM.add(createExpandISelPseudosPass());
268 
269   // Pre-ra tail duplication.
270   if (OptLevel != CodeGenOpt::None) {
271     PM.add(createTailDuplicatePass(true));
272     printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
273   }
274 
275   // Optimize PHIs before DCE: removing dead PHI cycles may make more
276   // instructions dead.
277   if (OptLevel != CodeGenOpt::None)
278     PM.add(createOptimizePHIsPass());
279 
280   // If the target requests it, assign local variables to stack slots relative
281   // to one another and simplify frame index references where possible.
282   PM.add(createLocalStackSlotAllocationPass());
283 
284   if (OptLevel != CodeGenOpt::None) {
285     // With optimization, dead code should already be eliminated. However
286     // there is one known exception: lowered code for arguments that are only
287     // used by tail calls, where the tail calls reuse the incoming stack
288     // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
289     PM.add(createDeadMachineInstructionElimPass());
290     printAndVerify(PM, "After codegen DCE pass");
291 
292     PM.add(createMachineLICMPass());
293     PM.add(createMachineCSEPass());
294     PM.add(createMachineSinkingPass());
295     printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
296 
297     PM.add(createPeepholeOptimizerPass());
298     printAndVerify(PM, "After codegen peephole optimization pass");
299   }
300 
301   // Run pre-ra passes.
302   if (addPreRegAlloc(PM, OptLevel))
303     printAndVerify(PM, "After PreRegAlloc passes");
304 
305   // Perform register allocation.
306   PM.add(createPTXRegisterAllocator());
307   printAndVerify(PM, "After Register Allocation");
308 
309   // Perform stack slot coloring and post-ra machine LICM.
310   if (OptLevel != CodeGenOpt::None) {
311     // FIXME: Re-enable coloring with register when it's capable of adding
312     // kill markers.
313     PM.add(createStackSlotColoringPass(false));
314 
315     // FIXME: Post-RA LICM has asserts that fire on virtual registers.
316     // Run post-ra machine LICM to hoist reloads / remats.
317     //if (!DisablePostRAMachineLICM)
318     //  PM.add(createMachineLICMPass(false));
319 
320     printAndVerify(PM, "After StackSlotColoring and postra Machine LICM");
321   }
322 
323   // Run post-ra passes.
324   if (addPostRegAlloc(PM, OptLevel))
325     printAndVerify(PM, "After PostRegAlloc passes");
326 
327   PM.add(createExpandPostRAPseudosPass());
328   printAndVerify(PM, "After ExpandPostRAPseudos");
329 
330   // Insert prolog/epilog code.  Eliminate abstract frame index references...
331   PM.add(createPrologEpilogCodeInserter());
332   printAndVerify(PM, "After PrologEpilogCodeInserter");
333 
334   // Run pre-sched2 passes.
335   if (addPreSched2(PM, OptLevel))
336     printAndVerify(PM, "After PreSched2 passes");
337 
338   // Second pass scheduler.
339   if (OptLevel != CodeGenOpt::None) {
340     PM.add(createPostRAScheduler(OptLevel));
341     printAndVerify(PM, "After PostRAScheduler");
342   }
343 
344   // Branch folding must be run after regalloc and prolog/epilog insertion.
345   if (OptLevel != CodeGenOpt::None) {
346     PM.add(createBranchFoldingPass(getEnableTailMergeDefault()));
347     printNoVerify(PM, "After BranchFolding");
348   }
349 
350   // Tail duplication.
351   if (OptLevel != CodeGenOpt::None) {
352     PM.add(createTailDuplicatePass(false));
353     printNoVerify(PM, "After TailDuplicate");
354   }
355 
356   PM.add(createGCMachineCodeAnalysisPass());
357 
358   //if (PrintGCInfo)
359   //  PM.add(createGCInfoPrinter(dbgs()));
360 
361   if (OptLevel != CodeGenOpt::None) {
362     PM.add(createCodePlacementOptPass());
363     printNoVerify(PM, "After CodePlacementOpt");
364   }
365 
366   if (addPreEmitPass(PM, OptLevel))
367     printNoVerify(PM, "After PreEmit passes");
368 
369   PM.add(createPTXMFInfoExtract(*this, OptLevel));
370   PM.add(createPTXFPRoundingModePass(*this, OptLevel));
371 
372   return false;
373 }
374