1 // -*- mode: c++ -*-
2 
3 // Copyright (c) 2010 Google Inc. All Rights Reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 //     * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 //     * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 //     * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 
31 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
32 
33 // stabs_reader.h: Define StabsReader, a parser for STABS debugging
34 // information. A description of the STABS debugging format can be
35 // found at:
36 //
37 //    http://sourceware.org/gdb/current/onlinedocs/stabs_toc.html
38 //
39 // The comments here assume you understand the format.
40 //
41 // This parser can handle big-endian and little-endian data, and the symbol
42 // values may be either 32 or 64 bits long. It handles both STABS in
43 // sections (as used on Linux) and STABS appearing directly in an
44 // a.out-like symbol table (as used in Darwin OS X Mach-O files).
45 
46 #ifndef COMMON_STABS_READER_H__
47 #define COMMON_STABS_READER_H__
48 
49 #include <stddef.h>
50 #include <stdint.h>
51 
52 #ifdef HAVE_CONFIG_H
53 #include <config.h>
54 #endif
55 
56 #ifdef HAVE_MACH_O_NLIST_H
57 #include <mach-o/nlist.h>
58 #elif defined(HAVE_A_OUT_H)
59 #include <a.out.h>
60 #endif
61 
62 #include <string>
63 #include <vector>
64 
65 #include "common/byte_cursor.h"
66 #include "common/using_std_string.h"
67 
68 namespace google_breakpad {
69 
70 class StabsHandler;
71 
72 class StabsReader {
73  public:
74   // Create a reader for the STABS debug information whose .stab section is
75   // being traversed by ITERATOR, and whose .stabstr section is referred to
76   // by STRINGS. The reader will call the member functions of HANDLER to
77   // report the information it finds, when the reader's 'Process' member
78   // function is called.
79   //
80   // BIG_ENDIAN should be true if the entries in the .stab section are in
81   // big-endian form, or false if they are in little-endian form.
82   //
83   // VALUE_SIZE should be either 4 or 8, indicating the size of the 'value'
84   // field in each entry in bytes.
85   //
86   // UNITIZED should be true if the STABS data is stored in units with
87   // N_UNDF headers. This is usually the case for STABS stored in sections,
88   // like .stab/.stabstr, and usually not the case for STABS stored in the
89   // actual symbol table; UNITIZED should be true when parsing Linux stabs,
90   // false when parsing Mac OS X STABS. For details, see:
91   // http://sourceware.org/gdb/current/onlinedocs/stabs/Stab-Section-Basics.html
92   //
93   // Note that, in ELF, the .stabstr section should be found using the
94   // 'sh_link' field of the .stab section header, not by name.
95   StabsReader(const uint8_t *stab,    size_t stab_size,
96               const uint8_t *stabstr, size_t stabstr_size,
97               bool big_endian, size_t value_size, bool unitized,
98               StabsHandler *handler);
99 
100   // Process the STABS data, calling the handler's member functions to
101   // report what we find.  While the handler functions return true,
102   // continue to process until we reach the end of the section.  If we
103   // processed the entire section and all handlers returned true,
104   // return true.  If any handler returned false, return false.
105   //
106   // This is only meant to be called once per StabsReader instance;
107   // resuming a prior processing pass that stopped abruptly isn't supported.
108   bool Process();
109 
110  private:
111 
112   // An class for walking arrays of STABS entries. This isolates the main
113   // STABS reader from the exact format (size; endianness) of the entries
114   // themselves.
115   class EntryIterator {
116    public:
117     // The contents of a STABS entry, adjusted for the host's endianness,
118     // word size, 'struct nlist' layout, and so on.
119     struct Entry {
120       // True if this iterator has reached the end of the entry array. When
121       // this is set, the other members of this structure are not valid.
122       bool at_end;
123 
124       // The number of this entry within the list.
125       size_t index;
126 
127       // The current entry's name offset. This is the offset within the
128       // current compilation unit's strings, as establish by the N_UNDF entries.
129       size_t name_offset;
130 
131       // The current entry's type, 'other' field, descriptor, and value.
132       unsigned char type;
133       unsigned char other;
134       short descriptor;
135       uint64_t value;
136     };
137 
138     // Create a EntryIterator walking the entries in BUFFER. Treat the
139     // entries as big-endian if BIG_ENDIAN is true, as little-endian
140     // otherwise. Assume each entry has a 'value' field whose size is
141     // VALUE_SIZE.
142     //
143     // This would not be terribly clean to extend to other format variations,
144     // but it's enough to handle Linux and Mac, and we'd like STABS to die
145     // anyway.
146     //
147     // For the record: on Linux, STABS entry values are always 32 bits,
148     // regardless of the architecture address size (don't ask me why); on
149     // Mac, they are 32 or 64 bits long. Oddly, the section header's entry
150     // size for a Linux ELF .stab section varies according to the ELF class
151     // from 12 to 20 even as the actual entries remain unchanged.
152     EntryIterator(const ByteBuffer *buffer, bool big_endian, size_t value_size);
153 
154     // Move to the next entry. This function's behavior is undefined if
155     // at_end() is true when it is called.
156     EntryIterator &operator++() { Fetch(); entry_.index++; return *this; }
157 
158     // Dereferencing this iterator produces a reference to an Entry structure
159     // that holds the current entry's values. The entry is owned by this
160     // EntryIterator, and will be invalidated at the next call to operator++.
161     const Entry &operator*() const { return entry_; }
162     const Entry *operator->() const { return &entry_; }
163 
164    private:
165     // Read the STABS entry at cursor_, and set entry_ appropriately.
166     void Fetch();
167 
168     // The size of entries' value field, in bytes.
169     size_t value_size_;
170 
171     // A byte cursor traversing buffer_.
172     ByteCursor cursor_;
173 
174     // Values for the entry this iterator refers to.
175     Entry entry_;
176   };
177 
178   // A source line, saved to be reported later.
179   struct Line {
180     uint64_t address;
181     const char *filename;
182     int number;
183   };
184 
185   // Return the name of the current symbol.
186   const char *SymbolString();
187 
188   // Process a compilation unit starting at symbol_.  Return true
189   // to continue processing, or false to abort.
190   bool ProcessCompilationUnit();
191 
192   // Process a function in current_source_file_ starting at symbol_.
193   // Return true to continue processing, or false to abort.
194   bool ProcessFunction();
195 
196   // Process an exported function symbol.
197   // Return true to continue processing, or false to abort.
198   bool ProcessExtern();
199 
200   // The STABS entries being parsed.
201   ByteBuffer entries_;
202 
203   // The string section to which the entries refer.
204   ByteBuffer strings_;
205 
206   // The iterator walking the STABS entries.
207   EntryIterator iterator_;
208 
209   // True if the data is "unitized"; see the explanation in the comment for
210   // StabsReader::StabsReader.
211   bool unitized_;
212 
213   StabsHandler *handler_;
214 
215   // The offset of the current compilation unit's strings within stabstr_.
216   size_t string_offset_;
217 
218   // The value string_offset_ should have for the next compilation unit,
219   // as established by N_UNDF entries.
220   size_t next_cu_string_offset_;
221 
222   // The current source file name.
223   const char *current_source_file_;
224 
225   // Mac OS X STABS place SLINE records before functions; we accumulate a
226   // vector of these until we see the FUN record, and then report them
227   // after the StartFunction call.
228   std::vector<Line> queued_lines_;
229 };
230 
231 // Consumer-provided callback structure for the STABS reader.  Clients
232 // of the STABS reader provide an instance of this structure.  The
233 // reader then invokes the member functions of that instance to report
234 // the information it finds.
235 //
236 // The default definitions of the member functions do nothing, and return
237 // true so processing will continue.
238 class StabsHandler {
239  public:
StabsHandler()240   StabsHandler() { }
~StabsHandler()241   virtual ~StabsHandler() { }
242 
243   // Some general notes about the handler callback functions:
244 
245   // Processing proceeds until the end of the .stabs section, or until
246   // one of these functions returns false.
247 
248   // The addresses given are as reported in the STABS info, without
249   // regard for whether the module may be loaded at different
250   // addresses at different times (a shared library, say).  When
251   // processing STABS from an ELF shared library, the addresses given
252   // all assume the library is loaded at its nominal load address.
253   // They are *not* offsets from the nominal load address.  If you
254   // want offsets, you must subtract off the library's nominal load
255   // address.
256 
257   // The arguments to these functions named FILENAME are all
258   // references to strings stored in the .stabstr section.  Because
259   // both the Linux and Solaris linkers factor out duplicate strings
260   // from the .stabstr section, the consumer can assume that if two
261   // FILENAME values are different addresses, they represent different
262   // file names.
263   //
264   // Thus, it's safe to use (say) std::map<char *, ...>, which does
265   // string address comparisons, not string content comparisons.
266   // Since all the strings are in same array of characters --- the
267   // .stabstr section --- comparing their addresses produces
268   // predictable, if not lexicographically meaningful, results.
269 
270   // Begin processing a compilation unit whose main source file is
271   // named FILENAME, and whose base address is ADDRESS.  If
272   // BUILD_DIRECTORY is non-NULL, it is the name of the build
273   // directory in which the compilation occurred.
StartCompilationUnit(const char * filename,uint64_t address,const char * build_directory)274   virtual bool StartCompilationUnit(const char *filename, uint64_t address,
275                                     const char *build_directory) {
276     return true;
277   }
278 
279   // Finish processing the compilation unit.  If ADDRESS is non-zero,
280   // it is the ending address of the compilation unit.  If ADDRESS is
281   // zero, then the compilation unit's ending address is not
282   // available, and the consumer must infer it by other means.
EndCompilationUnit(uint64_t address)283   virtual bool EndCompilationUnit(uint64_t address) { return true; }
284 
285   // Begin processing a function named NAME, whose starting address is
286   // ADDRESS.  This function belongs to the compilation unit that was
287   // most recently started but not ended.
288   //
289   // Note that, unlike filenames, NAME is not a pointer into the
290   // .stabstr section; this is because the name as it appears in the
291   // STABS data is followed by type information.  The value passed to
292   // StartFunction is the function name alone.
293   //
294   // In languages that use name mangling, like C++, NAME is mangled.
StartFunction(const string & name,uint64_t address)295   virtual bool StartFunction(const string &name, uint64_t address) {
296     return true;
297   }
298 
299   // Finish processing the function.  If ADDRESS is non-zero, it is
300   // the ending address for the function.  If ADDRESS is zero, then
301   // the function's ending address is not available, and the consumer
302   // must infer it by other means.
EndFunction(uint64_t address)303   virtual bool EndFunction(uint64_t address) { return true; }
304 
305   // Report that the code at ADDRESS is attributable to line NUMBER of
306   // the source file named FILENAME.  The caller must infer the ending
307   // address of the line.
Line(uint64_t address,const char * filename,int number)308   virtual bool Line(uint64_t address, const char *filename, int number) {
309     return true;
310   }
311 
312   // Report that an exported function NAME is present at ADDRESS.
313   // The size of the function is unknown.
Extern(const string & name,uint64_t address)314   virtual bool Extern(const string &name, uint64_t address) {
315     return true;
316   }
317 
318   // Report a warning.  FORMAT is a printf-like format string,
319   // specifying how to format the subsequent arguments.
320   virtual void Warning(const char *format, ...) = 0;
321 };
322 
323 } // namespace google_breakpad
324 
325 #endif  // COMMON_STABS_READER_H__
326