1 //===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains support for writing line tables info into COFF files.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "WinCodeViewLineTables.h"
15 #include "llvm/MC/MCExpr.h"
16 #include "llvm/MC/MCSymbol.h"
17 #include "llvm/Support/COFF.h"
18 
19 namespace llvm {
20 
getFullFilepath(const MDNode * S)21 StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
22   assert(S);
23   assert((isa<MDCompileUnit>(S) || isa<MDFile>(S) || isa<MDSubprogram>(S) ||
24           isa<MDLexicalBlockBase>(S)) &&
25          "Unexpected scope info");
26 
27   auto *Scope = cast<MDScope>(S);
28   StringRef Dir = Scope->getDirectory(),
29             Filename = Scope->getFilename();
30   char *&Result = DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
31   if (Result)
32     return Result;
33 
34   // Clang emits directory and relative filename info into the IR, but CodeView
35   // operates on full paths.  We could change Clang to emit full paths too, but
36   // that would increase the IR size and probably not needed for other users.
37   // For now, just concatenate and canonicalize the path here.
38   std::string Filepath;
39   if (Filename.find(':') == 1)
40     Filepath = Filename;
41   else
42     Filepath = (Dir + "\\" + Filename).str();
43 
44   // Canonicalize the path.  We have to do it textually because we may no longer
45   // have access the file in the filesystem.
46   // First, replace all slashes with backslashes.
47   std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
48 
49   // Remove all "\.\" with "\".
50   size_t Cursor = 0;
51   while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
52     Filepath.erase(Cursor, 2);
53 
54   // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
55   // path should be well-formatted, e.g. start with a drive letter, etc.
56   Cursor = 0;
57   while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
58     // Something's wrong if the path starts with "\..\", abort.
59     if (Cursor == 0)
60       break;
61 
62     size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
63     if (PrevSlash == std::string::npos)
64       // Something's wrong, abort.
65       break;
66 
67     Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
68     // The next ".." might be following the one we've just erased.
69     Cursor = PrevSlash;
70   }
71 
72   // Remove all duplicate backslashes.
73   Cursor = 0;
74   while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
75     Filepath.erase(Cursor, 1);
76 
77   Result = strdup(Filepath.c_str());
78   return StringRef(Result);
79 }
80 
maybeRecordLocation(DebugLoc DL,const MachineFunction * MF)81 void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
82                                                 const MachineFunction *MF) {
83   const MDNode *Scope = DL.getScope();
84   if (!Scope)
85     return;
86   StringRef Filename = getFullFilepath(Scope);
87 
88   // Skip this instruction if it has the same file:line as the previous one.
89   assert(CurFn);
90   if (!CurFn->Instrs.empty()) {
91     const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
92     if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
93       return;
94   }
95   FileNameRegistry.add(Filename);
96 
97   MCSymbol *MCL = Asm->MMI->getContext().CreateTempSymbol();
98   Asm->OutStreamer.EmitLabel(MCL);
99   CurFn->Instrs.push_back(MCL);
100   InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine());
101 }
102 
WinCodeViewLineTables(AsmPrinter * AP)103 WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
104     : Asm(nullptr), CurFn(nullptr) {
105   MachineModuleInfo *MMI = AP->MMI;
106 
107   // If module doesn't have named metadata anchors or COFF debug section
108   // is not available, skip any debug info related stuff.
109   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
110       !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
111     return;
112 
113   // Tell MMI that we have debug info.
114   MMI->setDebugInfoAvailability(true);
115   Asm = AP;
116 }
117 
endModule()118 void WinCodeViewLineTables::endModule() {
119   if (FnDebugInfo.empty())
120     return;
121 
122   assert(Asm != nullptr);
123   Asm->OutStreamer.SwitchSection(
124       Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
125   Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
126 
127   // The COFF .debug$S section consists of several subsections, each starting
128   // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
129   // of the payload followed by the payload itself.  The subsections are 4-byte
130   // aligned.
131 
132   // Emit per-function debug information.  This code is extracted into a
133   // separate function for readability.
134   for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
135     emitDebugInfoForFunction(VisitedFunctions[I]);
136 
137   // This subsection holds a file index to offset in string table table.
138   Asm->OutStreamer.AddComment("File index to string table offset subsection");
139   Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
140   size_t NumFilenames = FileNameRegistry.Infos.size();
141   Asm->EmitInt32(8 * NumFilenames);
142   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
143     StringRef Filename = FileNameRegistry.Filenames[I];
144     // For each unique filename, just write its offset in the string table.
145     Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
146     // The function name offset is not followed by any additional data.
147     Asm->EmitInt32(0);
148   }
149 
150   // This subsection holds the string table.
151   Asm->OutStreamer.AddComment("String table");
152   Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
153   Asm->EmitInt32(FileNameRegistry.LastOffset);
154   // The payload starts with a null character.
155   Asm->EmitInt8(0);
156 
157   for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
158     // Just emit unique filenames one by one, separated by a null character.
159     Asm->OutStreamer.EmitBytes(FileNameRegistry.Filenames[I]);
160     Asm->EmitInt8(0);
161   }
162 
163   // No more subsections. Fill with zeros to align the end of the section by 4.
164   Asm->OutStreamer.EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
165 
166   clear();
167 }
168 
EmitLabelDiff(MCStreamer & Streamer,const MCSymbol * From,const MCSymbol * To,unsigned int Size=4)169 static void EmitLabelDiff(MCStreamer &Streamer,
170                           const MCSymbol *From, const MCSymbol *To,
171                           unsigned int Size = 4) {
172   MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
173   MCContext &Context = Streamer.getContext();
174   const MCExpr *FromRef = MCSymbolRefExpr::Create(From, Variant, Context),
175                *ToRef   = MCSymbolRefExpr::Create(To, Variant, Context);
176   const MCExpr *AddrDelta =
177       MCBinaryExpr::Create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
178   Streamer.EmitValue(AddrDelta, Size);
179 }
180 
emitDebugInfoForFunction(const Function * GV)181 void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
182   // For each function there is a separate subsection
183   // which holds the PC to file:line table.
184   const MCSymbol *Fn = Asm->getSymbol(GV);
185   assert(Fn);
186 
187   const FunctionInfo &FI = FnDebugInfo[GV];
188   if (FI.Instrs.empty())
189     return;
190   assert(FI.End && "Don't know where the function ends?");
191 
192   StringRef GVName = GV->getName();
193   StringRef FuncName;
194   if (DISubprogram SP = getDISubprogram(GV))
195     FuncName = SP->getDisplayName();
196 
197   // FIXME Clang currently sets DisplayName to "bar" for a C++
198   // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
199   // to demangle display names anyways, so let's just put a mangled name into
200   // the symbols subsection until Clang gives us what we need.
201   if (GVName.startswith("\01?"))
202     FuncName = GVName.substr(1);
203   // Emit a symbol subsection, required by VS2012+ to find function boundaries.
204   MCSymbol *SymbolsBegin = Asm->MMI->getContext().CreateTempSymbol(),
205            *SymbolsEnd = Asm->MMI->getContext().CreateTempSymbol();
206   Asm->OutStreamer.AddComment("Symbol subsection for " + Twine(FuncName));
207   Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
208   EmitLabelDiff(Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
209   Asm->OutStreamer.EmitLabel(SymbolsBegin);
210   {
211     MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().CreateTempSymbol(),
212              *ProcSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
213     EmitLabelDiff(Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
214     Asm->OutStreamer.EmitLabel(ProcSegmentBegin);
215 
216     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
217     // Some bytes of this segment don't seem to be required for basic debugging,
218     // so just fill them with zeroes.
219     Asm->OutStreamer.EmitFill(12, 0);
220     // This is the important bit that tells the debugger where the function
221     // code is located and what's its size:
222     EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
223     Asm->OutStreamer.EmitFill(12, 0);
224     Asm->OutStreamer.EmitCOFFSecRel32(Fn);
225     Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
226     Asm->EmitInt8(0);
227     // Emit the function display name as a null-terminated string.
228     Asm->OutStreamer.EmitBytes(FuncName);
229     Asm->EmitInt8(0);
230     Asm->OutStreamer.EmitLabel(ProcSegmentEnd);
231 
232     // We're done with this function.
233     Asm->EmitInt16(0x0002);
234     Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
235   }
236   Asm->OutStreamer.EmitLabel(SymbolsEnd);
237   // Every subsection must be aligned to a 4-byte boundary.
238   Asm->OutStreamer.EmitFill((-FuncName.size()) % 4, 0);
239 
240   // PCs/Instructions are grouped into segments sharing the same filename.
241   // Pre-calculate the lengths (in instructions) of these segments and store
242   // them in a map for convenience.  Each index in the map is the sequential
243   // number of the respective instruction that starts a new segment.
244   DenseMap<size_t, size_t> FilenameSegmentLengths;
245   size_t LastSegmentEnd = 0;
246   StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename;
247   for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
248     if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename)
249       continue;
250     FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
251     LastSegmentEnd = J;
252     PrevFilename = InstrInfo[FI.Instrs[J]].Filename;
253   }
254   FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
255 
256   // Emit a line table subsection, requred to do PC-to-file:line lookup.
257   Asm->OutStreamer.AddComment("Line table subsection for " + Twine(FuncName));
258   Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
259   MCSymbol *LineTableBegin = Asm->MMI->getContext().CreateTempSymbol(),
260            *LineTableEnd = Asm->MMI->getContext().CreateTempSymbol();
261   EmitLabelDiff(Asm->OutStreamer, LineTableBegin, LineTableEnd);
262   Asm->OutStreamer.EmitLabel(LineTableBegin);
263 
264   // Identify the function this subsection is for.
265   Asm->OutStreamer.EmitCOFFSecRel32(Fn);
266   Asm->OutStreamer.EmitCOFFSectionIndex(Fn);
267   // Insert padding after a 16-bit section index.
268   Asm->EmitInt16(0);
269 
270   // Length of the function's code, in bytes.
271   EmitLabelDiff(Asm->OutStreamer, Fn, FI.End);
272 
273   // PC-to-linenumber lookup table:
274   MCSymbol *FileSegmentEnd = nullptr;
275   for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
276     MCSymbol *Instr = FI.Instrs[J];
277     assert(InstrInfo.count(Instr));
278 
279     if (FilenameSegmentLengths.count(J)) {
280       // We came to a beginning of a new filename segment.
281       if (FileSegmentEnd)
282         Asm->OutStreamer.EmitLabel(FileSegmentEnd);
283       StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
284       assert(FileNameRegistry.Infos.count(CurFilename));
285       size_t IndexInStringTable =
286           FileNameRegistry.Infos[CurFilename].FilenameID;
287       // Each segment starts with the offset of the filename
288       // in the string table.
289       Asm->OutStreamer.AddComment(
290           "Segment for file '" + Twine(CurFilename) + "' begins");
291       MCSymbol *FileSegmentBegin = Asm->MMI->getContext().CreateTempSymbol();
292       Asm->OutStreamer.EmitLabel(FileSegmentBegin);
293       Asm->EmitInt32(8 * IndexInStringTable);
294 
295       // Number of PC records in the lookup table.
296       size_t SegmentLength = FilenameSegmentLengths[J];
297       Asm->EmitInt32(SegmentLength);
298 
299       // Full size of the segment for this filename, including the prev two
300       // records.
301       FileSegmentEnd = Asm->MMI->getContext().CreateTempSymbol();
302       EmitLabelDiff(Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
303     }
304 
305     // The first PC with the given linenumber and the linenumber itself.
306     EmitLabelDiff(Asm->OutStreamer, Fn, Instr);
307     Asm->EmitInt32(InstrInfo[Instr].LineNumber);
308   }
309 
310   if (FileSegmentEnd)
311     Asm->OutStreamer.EmitLabel(FileSegmentEnd);
312   Asm->OutStreamer.EmitLabel(LineTableEnd);
313 }
314 
beginFunction(const MachineFunction * MF)315 void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
316   assert(!CurFn && "Can't process two functions at once!");
317 
318   if (!Asm || !Asm->MMI->hasDebugInfo())
319     return;
320 
321   const Function *GV = MF->getFunction();
322   assert(FnDebugInfo.count(GV) == false);
323   VisitedFunctions.push_back(GV);
324   CurFn = &FnDebugInfo[GV];
325 
326   // Find the end of the function prolog.
327   // FIXME: is there a simpler a way to do this? Can we just search
328   // for the first instruction of the function, not the last of the prolog?
329   DebugLoc PrologEndLoc;
330   bool EmptyPrologue = true;
331   for (const auto &MBB : *MF) {
332     if (PrologEndLoc)
333       break;
334     for (const auto &MI : MBB) {
335       if (MI.isDebugValue())
336         continue;
337 
338       // First known non-DBG_VALUE and non-frame setup location marks
339       // the beginning of the function body.
340       // FIXME: do we need the first subcondition?
341       if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
342         PrologEndLoc = MI.getDebugLoc();
343         break;
344       }
345       EmptyPrologue = false;
346     }
347   }
348   // Record beginning of function if we have a non-empty prologue.
349   if (PrologEndLoc && !EmptyPrologue) {
350     DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
351     maybeRecordLocation(FnStartDL, MF);
352   }
353 }
354 
endFunction(const MachineFunction * MF)355 void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
356   if (!Asm || !CurFn)  // We haven't created any debug info for this function.
357     return;
358 
359   const Function *GV = MF->getFunction();
360   assert(FnDebugInfo.count(GV));
361   assert(CurFn == &FnDebugInfo[GV]);
362 
363   if (CurFn->Instrs.empty()) {
364     FnDebugInfo.erase(GV);
365     VisitedFunctions.pop_back();
366   } else {
367     CurFn->End = Asm->getFunctionEnd();
368   }
369   CurFn = nullptr;
370 }
371 
beginInstruction(const MachineInstr * MI)372 void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
373   // Ignore DBG_VALUE locations and function prologue.
374   if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
375     return;
376   DebugLoc DL = MI->getDebugLoc();
377   if (DL == PrevInstLoc || !DL)
378     return;
379   maybeRecordLocation(DL, Asm->MF);
380 }
381 }
382