1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
31 // a line/address map for use with BasicSourceLineResolver.
32 
33 #ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
34 #define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
35 
36 #include <atlcomcli.h>
37 
38 #include <string>
39 #include <unordered_map>
40 
41 #include "common/windows/module_info.h"
42 #include "common/windows/omap.h"
43 
44 struct IDiaEnumLineNumbers;
45 struct IDiaSession;
46 struct IDiaSymbol;
47 
48 namespace google_breakpad {
49 
50 using std::wstring;
51 using std::unordered_map;
52 
53 class PDBSourceLineWriter {
54  public:
55   enum FileFormat {
56     PDB_FILE,  // a .pdb file containing debug symbols
57     EXE_FILE,  // a .exe or .dll file
58     ANY_FILE   // try PDB_FILE and then EXE_FILE
59   };
60 
61   explicit PDBSourceLineWriter();
62   ~PDBSourceLineWriter();
63 
64   // Opens the given file.  For executable files, the corresponding pdb
65   // file must be available; Open will be if it is not.
66   // If there is already a pdb file open, it is automatically closed.
67   // Returns true on success.
68   bool Open(const wstring &file, FileFormat format);
69 
70   // Closes the current pdb file and its associated resources.
71   void Close();
72 
73   // Sets the code file full path.  This is optional for 32-bit modules.  It is
74   // also optional for 64-bit modules when there is an executable file stored
75   // in the same directory as the PDB file.  It is only required for 64-bit
76   // modules when the executable file is not in the same location as the PDB
77   // file and it must be called after Open() and before WriteMap().
78   // If Open() was called for an executable file, then it is an error to call
79   // SetCodeFile() with a different file path and it will return false.
80   bool SetCodeFile(const wstring &exe_file);
81 
82   // Writes a Breakpad symbol file from the current pdb file to |symbol_file|.
83   // Returns true on success.
84   bool WriteSymbols(FILE *symbol_file);
85 
86   // Retrieves information about the module's debugging file.  Returns
87   // true on success and false on failure.
88   bool GetModuleInfo(PDBModuleInfo *info);
89 
90   // Retrieves information about the module's PE file.  Returns
91   // true on success and false on failure.
92   bool GetPEInfo(PEModuleInfo *info);
93 
94   // Sets uses_guid to true if the opened file uses a new-style CodeView
95   // record with a 128-bit GUID, or false if the opened file uses an old-style
96   // CodeView record.  When no GUID is available, a 32-bit signature should be
97   // used to identify the module instead.  If the information cannot be
98   // determined, this method returns false.
99   bool UsesGUID(bool *uses_guid);
100 
101  private:
102   // Outputs the line/address pairs for each line in the enumerator.
103   // Returns true on success.
104   bool PrintLines(IDiaEnumLineNumbers *lines);
105 
106   // Outputs a function address and name, followed by its source line list.
107   // block can be the same object as function, or it can be a reference to a
108   // code block that is lexically part of this function, but resides at a
109   // separate address. If has_multiple_symbols is true, this function's
110   // instructions correspond to multiple symbols. Returns true on success.
111   bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block,
112                      bool has_multiple_symbols);
113 
114   // Outputs all functions as described above.  Returns true on success.
115   bool PrintFunctions();
116 
117   // Outputs all of the source files in the session's pdb file.
118   // Returns true on success.
119   bool PrintSourceFiles();
120 
121   // Outputs all of the frame information necessary to construct stack
122   // backtraces in the absence of frame pointers. For x86 data stored in
123   // .pdb files. Returns true on success.
124   bool PrintFrameDataUsingPDB();
125 
126   // Outputs all of the frame information necessary to construct stack
127   // backtraces in the absence of frame pointers. For x64 data stored in
128   // .exe, .dll files. Returns true on success.
129   bool PrintFrameDataUsingEXE();
130 
131   // Outputs all of the frame information necessary to construct stack
132   // backtraces in the absence of frame pointers.  Returns true on success.
133   bool PrintFrameData();
134 
135   // Outputs a single public symbol address and name, if the symbol corresponds
136   // to a code address.  Returns true on success.  If symbol is does not
137   // correspond to code, returns true without outputting anything. If
138   // has_multiple_symbols is true, the symbol corresponds to a code address and
139   // the instructions correspond to multiple symbols.
140   bool PrintCodePublicSymbol(IDiaSymbol *symbol, bool has_multiple_symbols);
141 
142   // Outputs a line identifying the PDB file that is being dumped, along with
143   // its uuid and age.
144   bool PrintPDBInfo();
145 
146   // Outputs a line identifying the PE file corresponding to the PDB
147   // file that is being dumped, along with its code identifier,
148   // which consists of its timestamp and file size.
149   bool PrintPEInfo();
150 
151   // Returns true if this filename has already been seen,
152   // and an ID is stored for it, or false if it has not.
FileIDIsCached(const wstring & file)153   bool FileIDIsCached(const wstring &file) {
154     return unique_files_.find(file) != unique_files_.end();
155   }
156 
157   // Cache this filename and ID for later reuse.
CacheFileID(const wstring & file,DWORD id)158   void CacheFileID(const wstring &file, DWORD id) {
159     unique_files_[file] = id;
160   }
161 
162   // Store this ID in the cache as a duplicate for this filename.
StoreDuplicateFileID(const wstring & file,DWORD id)163   void StoreDuplicateFileID(const wstring &file, DWORD id) {
164     unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
165     if (iter != unique_files_.end()) {
166       // map this id to the previously seen one
167       file_ids_[id] = iter->second;
168     }
169   }
170 
171   // Given a file's unique ID, return the ID that should be used to
172   // reference it. There may be multiple files with identical filenames
173   // but different unique IDs. The cache attempts to coalesce these into
174   // one ID per unique filename.
GetRealFileID(DWORD id)175   DWORD GetRealFileID(DWORD id) {
176     unordered_map<DWORD, DWORD>::iterator iter = file_ids_.find(id);
177     if (iter == file_ids_.end())
178       return id;
179     return iter->second;
180   }
181 
182   // Find the PE file corresponding to the loaded PDB file, and
183   // set the code_file_ member. Returns false on failure.
184   bool FindPEFile();
185 
186   // Returns the function name for a symbol.  If possible, the name is
187   // undecorated.  If the symbol's decorated form indicates the size of
188   // parameters on the stack, this information is returned in stack_param_size.
189   // Returns true on success.  If the symbol doesn't encode parameter size
190   // information, stack_param_size is set to -1.
191   static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
192                                     int *stack_param_size);
193 
194   // Returns the number of bytes of stack space used for a function's
195   // parameters.  function must have the tag SymTagFunction.  In the event of
196   // a failure, returns 0, which is also a valid number of bytes.
197   static int GetFunctionStackParamSize(IDiaSymbol *function);
198 
199   // The filename of the PE file corresponding to the currently-open
200   // pdb file.
201   wstring code_file_;
202 
203   // The session for the currently-open pdb file.
204   CComPtr<IDiaSession> session_;
205 
206   // The current output file for this WriteMap invocation.
207   FILE *output_;
208 
209   // There may be many duplicate filenames with different IDs.
210   // This maps from the DIA "unique ID" to a single ID per unique
211   // filename.
212   unordered_map<DWORD, DWORD> file_ids_;
213   // This maps unique filenames to file IDs.
214   unordered_map<wstring, DWORD> unique_files_;
215 
216   // This is used for calculating post-transform symbol addresses and lengths.
217   ImageMap image_map_;
218 
219   // Disallow copy ctor and operator=
220   PDBSourceLineWriter(const PDBSourceLineWriter&);
221   void operator=(const PDBSourceLineWriter&);
222 };
223 
224 }  // namespace google_breakpad
225 
226 #endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
227