1 // Copyright (c) 2010 Google Inc. All Rights Reserved.
2 //
3 // Redistribution and use in source and binary forms, with or without
4 // modification, are permitted provided that the following conditions are
5 // met:
6 //
7 //     * Redistributions of source code must retain the above copyright
8 // notice, this list of conditions and the following disclaimer.
9 //     * Redistributions in binary form must reproduce the above
10 // copyright notice, this list of conditions and the following disclaimer
11 // in the documentation and/or other materials provided with the
12 // distribution.
13 //     * Neither the name of Google Inc. nor the names of its
14 // contributors may be used to endorse or promote products derived from
15 // this software without specific prior written permission.
16 //
17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 
29 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
30 
31 // This file implements the google_breakpad::StabsReader class.
32 // See stabs_reader.h.
33 
34 #include "common/stabs_reader.h"
35 
36 #include <assert.h>
37 #include <stab.h>
38 #include <string.h>
39 
40 #include <string>
41 
42 #include "common/using_std_string.h"
43 
44 using std::vector;
45 
46 namespace google_breakpad {
47 
EntryIterator(const ByteBuffer * buffer,bool big_endian,size_t value_size)48 StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer,
49                                           bool big_endian, size_t value_size)
50     : value_size_(value_size), cursor_(buffer, big_endian) {
51   // Actually, we could handle weird sizes just fine, but they're
52   // probably mistakes --- expressed in bits, say.
53   assert(value_size == 4 || value_size == 8);
54   entry_.index = 0;
55   Fetch();
56 }
57 
Fetch()58 void StabsReader::EntryIterator::Fetch() {
59   cursor_
60       .Read(4, false, &entry_.name_offset)
61       .Read(1, false, &entry_.type)
62       .Read(1, false, &entry_.other)
63       .Read(2, false, &entry_.descriptor)
64       .Read(value_size_, false, &entry_.value);
65   entry_.at_end = !cursor_;
66 }
67 
StabsReader(const uint8_t * stab,size_t stab_size,const uint8_t * stabstr,size_t stabstr_size,bool big_endian,size_t value_size,bool unitized,StabsHandler * handler)68 StabsReader::StabsReader(const uint8_t *stab,    size_t stab_size,
69                          const uint8_t *stabstr, size_t stabstr_size,
70                          bool big_endian, size_t value_size, bool unitized,
71                          StabsHandler *handler)
72     : entries_(stab, stab_size),
73       strings_(stabstr, stabstr_size),
74       iterator_(&entries_, big_endian, value_size),
75       unitized_(unitized),
76       handler_(handler),
77       string_offset_(0),
78       next_cu_string_offset_(0),
79       current_source_file_(NULL) { }
80 
SymbolString()81 const char *StabsReader::SymbolString() {
82   ptrdiff_t offset = string_offset_ + iterator_->name_offset;
83   if (offset < 0 || (size_t) offset >= strings_.Size()) {
84     handler_->Warning("symbol %d: name offset outside the string section\n",
85                       iterator_->index);
86     // Return our null string, to keep our promise about all names being
87     // taken from the string section.
88     offset = 0;
89   }
90   return reinterpret_cast<const char *>(strings_.start + offset);
91 }
92 
Process()93 bool StabsReader::Process() {
94   while (!iterator_->at_end) {
95     if (iterator_->type == N_SO) {
96       if (! ProcessCompilationUnit())
97         return false;
98     } else if (iterator_->type == N_UNDF && unitized_) {
99       // In unitized STABS (including Linux STABS, and pretty much anything
100       // else that puts STABS data in sections), at the head of each
101       // compilation unit's entries there is an N_UNDF stab giving the
102       // number of symbols in the compilation unit, and the number of bytes
103       // that compilation unit's strings take up in the .stabstr section.
104       // Each CU's strings are separate; the n_strx values are offsets
105       // within the current CU's portion of the .stabstr section.
106       //
107       // As an optimization, the GNU linker combines all the
108       // compilation units into one, with a single N_UNDF at the
109       // beginning. However, other linkers, like Gold, do not perform
110       // this optimization.
111       string_offset_ = next_cu_string_offset_;
112       next_cu_string_offset_ = iterator_->value;
113       ++iterator_;
114     }
115 #if defined(HAVE_MACH_O_NLIST_H)
116     // Export symbols in Mach-O binaries look like this.
117     // This is necessary in order to be able to dump symbols
118     // from OS X system libraries.
119     else if ((iterator_->type & N_STAB) == 0 &&
120                (iterator_->type & N_TYPE) == N_SECT) {
121       ProcessExtern();
122     }
123 #endif
124     else {
125       ++iterator_;
126     }
127   }
128   return true;
129 }
130 
ProcessCompilationUnit()131 bool StabsReader::ProcessCompilationUnit() {
132   assert(!iterator_->at_end && iterator_->type == N_SO);
133 
134   // There may be an N_SO entry whose name ends with a slash,
135   // indicating the directory in which the compilation occurred.
136   // The build directory defaults to NULL.
137   const char *build_directory = NULL;
138   {
139     const char *name = SymbolString();
140     if (name[0] && name[strlen(name) - 1] == '/') {
141       build_directory = name;
142       ++iterator_;
143     }
144   }
145 
146   // We expect to see an N_SO entry with a filename next, indicating
147   // the start of the compilation unit.
148   {
149     if (iterator_->at_end || iterator_->type != N_SO)
150       return true;
151     const char *name = SymbolString();
152     if (name[0] == '\0') {
153       // This seems to be a stray end-of-compilation-unit marker;
154       // consume it, but don't report the end, since we didn't see a
155       // beginning.
156       ++iterator_;
157       return true;
158     }
159     current_source_file_ = name;
160   }
161 
162   if (! handler_->StartCompilationUnit(current_source_file_,
163                                        iterator_->value,
164                                        build_directory))
165     return false;
166 
167   ++iterator_;
168 
169   // The STABS documentation says that some compilers may emit
170   // additional N_SO entries with names immediately following the
171   // first, and that they should be ignored.  However, the original
172   // Breakpad STABS reader doesn't ignore them, so we won't either.
173 
174   // Process the body of the compilation unit, up to the next N_SO.
175   while (!iterator_->at_end && iterator_->type != N_SO) {
176     if (iterator_->type == N_FUN) {
177       if (! ProcessFunction())
178         return false;
179     } else if (iterator_->type == N_SLINE) {
180       // Mac OS X STABS place SLINE records before functions.
181       Line line;
182       // The value of an N_SLINE entry that appears outside a function is
183       // the absolute address of the line.
184       line.address = iterator_->value;
185       line.filename = current_source_file_;
186       // The n_desc of a N_SLINE entry is the line number.  It's a
187       // signed 16-bit field; line numbers from 32768 to 65535 are
188       // stored as n-65536.
189       line.number = (uint16_t) iterator_->descriptor;
190       queued_lines_.push_back(line);
191       ++iterator_;
192     } else if (iterator_->type == N_SOL) {
193       current_source_file_ = SymbolString();
194       ++iterator_;
195     } else {
196       // Ignore anything else.
197       ++iterator_;
198     }
199   }
200 
201   // An N_SO with an empty name indicates the end of the compilation
202   // unit.  Default to zero.
203   uint64_t ending_address = 0;
204   if (!iterator_->at_end) {
205     assert(iterator_->type == N_SO);
206     const char *name = SymbolString();
207     if (name[0] == '\0') {
208       ending_address = iterator_->value;
209       ++iterator_;
210     }
211   }
212 
213   if (! handler_->EndCompilationUnit(ending_address))
214     return false;
215 
216   queued_lines_.clear();
217 
218   return true;
219 }
220 
ProcessFunction()221 bool StabsReader::ProcessFunction() {
222   assert(!iterator_->at_end && iterator_->type == N_FUN);
223 
224   uint64_t function_address = iterator_->value;
225   // The STABS string for an N_FUN entry is the name of the function,
226   // followed by a colon, followed by type information for the
227   // function.  We want to pass the name alone to StartFunction.
228   const char *stab_string = SymbolString();
229   const char *name_end = strchr(stab_string, ':');
230   if (! name_end)
231     name_end = stab_string + strlen(stab_string);
232   string name(stab_string, name_end - stab_string);
233   if (! handler_->StartFunction(name, function_address))
234     return false;
235   ++iterator_;
236 
237   // If there were any SLINE records given before the function, report them now.
238   for (vector<Line>::const_iterator it = queued_lines_.begin();
239        it != queued_lines_.end(); it++) {
240     if (!handler_->Line(it->address, it->filename, it->number))
241       return false;
242   }
243   queued_lines_.clear();
244 
245   while (!iterator_->at_end) {
246     if (iterator_->type == N_SO || iterator_->type == N_FUN)
247       break;
248     else if (iterator_->type == N_SLINE) {
249       // The value of an N_SLINE entry is the offset of the line from
250       // the function's start address.
251       uint64_t line_address = function_address + iterator_->value;
252       // The n_desc of a N_SLINE entry is the line number.  It's a
253       // signed 16-bit field; line numbers from 32768 to 65535 are
254       // stored as n-65536.
255       uint16_t line_number = iterator_->descriptor;
256       if (! handler_->Line(line_address, current_source_file_, line_number))
257         return false;
258       ++iterator_;
259     } else if (iterator_->type == N_SOL) {
260       current_source_file_ = SymbolString();
261       ++iterator_;
262     } else
263       // Ignore anything else.
264       ++iterator_;
265   }
266 
267   // We've reached the end of the function. See if we can figure out its
268   // ending address.
269   uint64_t ending_address = 0;
270   if (!iterator_->at_end) {
271     assert(iterator_->type == N_SO || iterator_->type == N_FUN);
272     if (iterator_->type == N_FUN) {
273       const char *symbol_name = SymbolString();
274       if (symbol_name[0] == '\0') {
275         // An N_FUN entry with no name is a terminator for this function;
276         // its value is the function's size.
277         ending_address = function_address + iterator_->value;
278         ++iterator_;
279       } else {
280         // An N_FUN entry with a name is the next function, and we can take
281         // its value as our ending address. Don't advance the iterator, as
282         // we'll use this symbol to start the next function as well.
283         ending_address = iterator_->value;
284       }
285     } else {
286       // An N_SO entry could be an end-of-compilation-unit marker, or the
287       // start of the next compilation unit, but in either case, its value
288       // is our ending address. We don't advance the iterator;
289       // ProcessCompilationUnit will decide what to do with this symbol.
290       ending_address = iterator_->value;
291     }
292   }
293 
294   if (! handler_->EndFunction(ending_address))
295     return false;
296 
297   return true;
298 }
299 
ProcessExtern()300 bool StabsReader::ProcessExtern() {
301 #if defined(HAVE_MACH_O_NLIST_H)
302   assert(!iterator_->at_end &&
303          (iterator_->type & N_STAB) == 0 &&
304          (iterator_->type & N_TYPE) == N_SECT);
305 #endif
306 
307   // TODO(mark): only do symbols in the text section?
308   if (!handler_->Extern(SymbolString(), iterator_->value))
309     return false;
310 
311   ++iterator_;
312   return true;
313 }
314 
315 } // namespace google_breakpad
316