1 // Copyright (c) 2006, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // PDBSourceLineWriter uses a pdb file produced by Visual C++ to output
31 // a line/address map for use with BasicSourceLineResolver.
32 
33 #ifndef COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
34 #define COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
35 
36 #include <atlcomcli.h>
37 
38 #include <unordered_map>
39 #include <string>
40 
41 #include "common/windows/omap.h"
42 
43 struct IDiaEnumLineNumbers;
44 struct IDiaSession;
45 struct IDiaSymbol;
46 
47 namespace google_breakpad {
48 
49 using std::wstring;
50 using std::unordered_map;
51 
52 // A structure that carries information that identifies a pdb file.
53 struct PDBModuleInfo {
54  public:
55   // The basename of the pdb file from which information was loaded.
56   wstring debug_file;
57 
58   // The pdb's identifier.  For recent pdb files, the identifier consists
59   // of the pdb's guid, in uppercase hexadecimal form without any dashes
60   // or separators, followed immediately by the pdb's age, also in
61   // uppercase hexadecimal form.  For older pdb files which have no guid,
62   // the identifier is the pdb's 32-bit signature value, in zero-padded
63   // hexadecimal form, followed immediately by the pdb's age, in lowercase
64   // hexadecimal form.
65   wstring debug_identifier;
66 
67   // A string identifying the cpu that the pdb is associated with.
68   // Currently, this may be "x86" or "unknown".
69   wstring cpu;
70 };
71 
72 // A structure that carries information that identifies a PE file,
73 // either an EXE or a DLL.
74 struct PEModuleInfo {
75   // The basename of the PE file.
76   wstring code_file;
77 
78   // The PE file's code identifier, which consists of its timestamp
79   // and file size concatenated together into a single hex string.
80   // (The fields IMAGE_OPTIONAL_HEADER::SizeOfImage and
81   // IMAGE_FILE_HEADER::TimeDateStamp, as defined in the ImageHlp
82   // documentation.) This is not well documented, if it's documented
83   // at all, but it's what symstore does and what DbgHelp supports.
84   wstring code_identifier;
85 };
86 
87 class PDBSourceLineWriter {
88  public:
89   enum FileFormat {
90     PDB_FILE,  // a .pdb file containing debug symbols
91     EXE_FILE,  // a .exe or .dll file
92     ANY_FILE   // try PDB_FILE and then EXE_FILE
93   };
94 
95   explicit PDBSourceLineWriter();
96   ~PDBSourceLineWriter();
97 
98   // Opens the given file.  For executable files, the corresponding pdb
99   // file must be available; Open will be if it is not.
100   // If there is already a pdb file open, it is automatically closed.
101   // Returns true on success.
102   bool Open(const wstring &file, FileFormat format);
103 
104   // Sets the code file full path.  This is optional for 32-bit modules.  It is
105   // also optional for 64-bit modules when there is an executable file stored
106   // in the same directory as the PDB file.  It is only required for 64-bit
107   // modules when the executable file is not in the same location as the PDB
108   // file and it must be called after Open() and before WriteMap().
109   // If Open() was called for an executable file, then it is an error to call
110   // SetCodeFile() with a different file path and it will return false.
111   bool SetCodeFile(const wstring &exe_file);
112 
113   // Writes a map file from the current pdb file to the given file stream.
114   // Returns true on success.
115   bool WriteMap(FILE *map_file);
116 
117   // Closes the current pdb file and its associated resources.
118   void Close();
119 
120   // Retrieves information about the module's debugging file.  Returns
121   // true on success and false on failure.
122   bool GetModuleInfo(PDBModuleInfo *info);
123 
124   // Retrieves information about the module's PE file.  Returns
125   // true on success and false on failure.
126   bool GetPEInfo(PEModuleInfo *info);
127 
128   // Sets uses_guid to true if the opened file uses a new-style CodeView
129   // record with a 128-bit GUID, or false if the opened file uses an old-style
130   // CodeView record.  When no GUID is available, a 32-bit signature should be
131   // used to identify the module instead.  If the information cannot be
132   // determined, this method returns false.
133   bool UsesGUID(bool *uses_guid);
134 
135  private:
136   // Outputs the line/address pairs for each line in the enumerator.
137   // Returns true on success.
138   bool PrintLines(IDiaEnumLineNumbers *lines);
139 
140   // Outputs a function address and name, followed by its source line list.
141   // block can be the same object as function, or it can be a reference
142   // to a code block that is lexically part of this function, but
143   // resides at a separate address.
144   // Returns true on success.
145   bool PrintFunction(IDiaSymbol *function, IDiaSymbol *block);
146 
147   // Outputs all functions as described above.  Returns true on success.
148   bool PrintFunctions();
149 
150   // Outputs all of the source files in the session's pdb file.
151   // Returns true on success.
152   bool PrintSourceFiles();
153 
154   // Outputs all of the frame information necessary to construct stack
155   // backtraces in the absence of frame pointers. For x86 data stored in
156   // .pdb files. Returns true on success.
157   bool PrintFrameDataUsingPDB();
158 
159   // Outputs all of the frame information necessary to construct stack
160   // backtraces in the absence of frame pointers. For x64 data stored in
161   // .exe, .dll files. Returns true on success.
162   bool PrintFrameDataUsingEXE();
163 
164   // Outputs all of the frame information necessary to construct stack
165   // backtraces in the absence of frame pointers.  Returns true on success.
166   bool PrintFrameData();
167 
168   // Outputs a single public symbol address and name, if the symbol corresponds
169   // to a code address.  Returns true on success.  If symbol is does not
170   // correspond to code, returns true without outputting anything.
171   bool PrintCodePublicSymbol(IDiaSymbol *symbol);
172 
173   // Outputs a line identifying the PDB file that is being dumped, along with
174   // its uuid and age.
175   bool PrintPDBInfo();
176 
177   // Outputs a line identifying the PE file corresponding to the PDB
178   // file that is being dumped, along with its code identifier,
179   // which consists of its timestamp and file size.
180   bool PrintPEInfo();
181 
182   // Returns true if this filename has already been seen,
183   // and an ID is stored for it, or false if it has not.
FileIDIsCached(const wstring & file)184   bool FileIDIsCached(const wstring &file) {
185     return unique_files_.find(file) != unique_files_.end();
186   }
187 
188   // Cache this filename and ID for later reuse.
CacheFileID(const wstring & file,DWORD id)189   void CacheFileID(const wstring &file, DWORD id) {
190     unique_files_[file] = id;
191   }
192 
193   // Store this ID in the cache as a duplicate for this filename.
StoreDuplicateFileID(const wstring & file,DWORD id)194   void StoreDuplicateFileID(const wstring &file, DWORD id) {
195     unordered_map<wstring, DWORD>::iterator iter = unique_files_.find(file);
196     if (iter != unique_files_.end()) {
197       // map this id to the previously seen one
198       file_ids_[id] = iter->second;
199     }
200   }
201 
202   // Given a file's unique ID, return the ID that should be used to
203   // reference it. There may be multiple files with identical filenames
204   // but different unique IDs. The cache attempts to coalesce these into
205   // one ID per unique filename.
GetRealFileID(DWORD id)206   DWORD GetRealFileID(DWORD id) {
207     unordered_map<DWORD, DWORD>::iterator iter = file_ids_.find(id);
208     if (iter == file_ids_.end())
209       return id;
210     return iter->second;
211   }
212 
213   // Find the PE file corresponding to the loaded PDB file, and
214   // set the code_file_ member. Returns false on failure.
215   bool FindPEFile();
216 
217   // Returns the function name for a symbol.  If possible, the name is
218   // undecorated.  If the symbol's decorated form indicates the size of
219   // parameters on the stack, this information is returned in stack_param_size.
220   // Returns true on success.  If the symbol doesn't encode parameter size
221   // information, stack_param_size is set to -1.
222   static bool GetSymbolFunctionName(IDiaSymbol *function, BSTR *name,
223                                     int *stack_param_size);
224 
225   // Returns the number of bytes of stack space used for a function's
226   // parameters.  function must have the tag SymTagFunction.  In the event of
227   // a failure, returns 0, which is also a valid number of bytes.
228   static int GetFunctionStackParamSize(IDiaSymbol *function);
229 
230   // The filename of the PE file corresponding to the currently-open
231   // pdb file.
232   wstring code_file_;
233 
234   // The session for the currently-open pdb file.
235   CComPtr<IDiaSession> session_;
236 
237   // The current output file for this WriteMap invocation.
238   FILE *output_;
239 
240   // There may be many duplicate filenames with different IDs.
241   // This maps from the DIA "unique ID" to a single ID per unique
242   // filename.
243   unordered_map<DWORD, DWORD> file_ids_;
244   // This maps unique filenames to file IDs.
245   unordered_map<wstring, DWORD> unique_files_;
246 
247   // This is used for calculating post-transform symbol addresses and lengths.
248   ImageMap image_map_;
249 
250   // Disallow copy ctor and operator=
251   PDBSourceLineWriter(const PDBSourceLineWriter&);
252   void operator=(const PDBSourceLineWriter&);
253 };
254 
255 }  // namespace google_breakpad
256 
257 #endif  // COMMON_WINDOWS_PDB_SOURCE_LINE_WRITER_H_
258