1 // Copyright (c) 2010, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 //     * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 //     * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 //     * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31 
32 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34 
35 #include "common/mac/macho_reader.h"
36 
37 #include <assert.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 
41 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
42 #if !defined(CPU_TYPE_ARM)
43 #define CPU_TYPE_ARM 12
44 #endif
45 
46 #if !defined(CPU_TYPE_ARM_64)
47 #define CPU_TYPE_ARM_64 16777228
48 #endif
49 
50 namespace google_breakpad {
51 namespace mach_o {
52 
53 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
54 // arguments, so you can't place expressions that do necessary work in
55 // the argument of an assert. Nor can you assign the result of the
56 // expression to a variable and assert that the variable's value is
57 // true: you'll get unused variable warnings when NDEBUG is #defined.
58 //
59 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
60 // the result is true if NDEBUG is not #defined.
61 #if defined(NDEBUG)
62 #define ASSERT_ALWAYS_EVAL(x) (x)
63 #else
64 #define ASSERT_ALWAYS_EVAL(x) assert(x)
65 #endif
66 
BadHeader()67 void FatReader::Reporter::BadHeader() {
68   fprintf(stderr, "%s: file is neither a fat binary file"
69           " nor a Mach-O object file\n", filename_.c_str());
70 }
71 
TooShort()72 void FatReader::Reporter::TooShort() {
73   fprintf(stderr, "%s: file too short for the data it claims to contain\n",
74           filename_.c_str());
75 }
76 
MisplacedObjectFile()77 void FatReader::Reporter::MisplacedObjectFile() {
78   fprintf(stderr, "%s: file too short for the object files it claims"
79           " to contain\n", filename_.c_str());
80 }
81 
Read(const uint8_t * buffer,size_t size)82 bool FatReader::Read(const uint8_t *buffer, size_t size) {
83   buffer_.start = buffer;
84   buffer_.end = buffer + size;
85   ByteCursor cursor(&buffer_);
86 
87   // Fat binaries always use big-endian, so read the magic number in
88   // that endianness. To recognize Mach-O magic numbers, which can use
89   // either endianness, check for both the proper and reversed forms
90   // of the magic numbers.
91   cursor.set_big_endian(true);
92   if (cursor >> magic_) {
93     if (magic_ == FAT_MAGIC) {
94       // How many object files does this fat binary contain?
95       uint32_t object_files_count;
96       if (!(cursor >> object_files_count)) {  // nfat_arch
97         reporter_->TooShort();
98         return false;
99       }
100 
101       // Read the list of object files.
102       object_files_.resize(object_files_count);
103       for (size_t i = 0; i < object_files_count; i++) {
104         struct fat_arch *objfile = &object_files_[i];
105 
106         // Read this object file entry, byte-swapping as appropriate.
107         cursor >> objfile->cputype
108                >> objfile->cpusubtype
109                >> objfile->offset
110                >> objfile->size
111                >> objfile->align;
112         if (!cursor) {
113           reporter_->TooShort();
114           return false;
115         }
116         // Does the file actually have the bytes this entry refers to?
117         size_t fat_size = buffer_.Size();
118         if (objfile->offset > fat_size ||
119             objfile->size > fat_size - objfile->offset) {
120           reporter_->MisplacedObjectFile();
121           return false;
122         }
123       }
124 
125       return true;
126     } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
127                magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
128       // If this is a little-endian Mach-O file, fix the cursor's endianness.
129       if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
130         cursor.set_big_endian(false);
131       // Record the entire file as a single entry in the object file list.
132       object_files_.resize(1);
133 
134       // Get the cpu type and subtype from the Mach-O header.
135       if (!(cursor >> object_files_[0].cputype
136                    >> object_files_[0].cpusubtype)) {
137         reporter_->TooShort();
138         return false;
139       }
140 
141       object_files_[0].offset = 0;
142       object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
143       // This alignment is correct for 32 and 64-bit x86 and ppc.
144       // See get_align in the lipo source for other architectures:
145       // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
146       object_files_[0].align = 12;  // 2^12 == 4096
147 
148       return true;
149     }
150   }
151 
152   reporter_->BadHeader();
153   return false;
154 }
155 
BadHeader()156 void Reader::Reporter::BadHeader() {
157   fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
158 }
159 
CPUTypeMismatch(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)160 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
161                                        cpu_subtype_t cpu_subtype,
162                                        cpu_type_t expected_cpu_type,
163                                        cpu_subtype_t expected_cpu_subtype) {
164   fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
165           " type %d, subtype %d\n",
166           filename_.c_str(), cpu_type, cpu_subtype,
167           expected_cpu_type, expected_cpu_subtype);
168 }
169 
HeaderTruncated()170 void Reader::Reporter::HeaderTruncated() {
171   fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
172           filename_.c_str());
173 }
174 
LoadCommandRegionTruncated()175 void Reader::Reporter::LoadCommandRegionTruncated() {
176   fprintf(stderr, "%s: file too short to hold load command region"
177           " given in Mach-O header\n", filename_.c_str());
178 }
179 
LoadCommandsOverrun(size_t claimed,size_t i,LoadCommandType type)180 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
181                                            LoadCommandType type) {
182   fprintf(stderr, "%s: file's header claims there are %ld"
183           " load commands, but load command #%ld",
184           filename_.c_str(), claimed, i);
185   if (type) fprintf(stderr, ", of type %d,", type);
186   fprintf(stderr, " extends beyond the end of the load command region\n");
187 }
188 
LoadCommandTooShort(size_t i,LoadCommandType type)189 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
190   fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
191           " extend beyond the size given in the load command's header\n",
192           filename_.c_str(), i, type);
193 }
194 
SectionsMissing(const string & name)195 void Reader::Reporter::SectionsMissing(const string &name) {
196   fprintf(stderr, "%s: the load command for segment '%s'"
197           " is too short to hold the section headers it claims to have\n",
198           filename_.c_str(), name.c_str());
199 }
200 
MisplacedSegmentData(const string & name)201 void Reader::Reporter::MisplacedSegmentData(const string &name) {
202   fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
203           " the end of the file\n", filename_.c_str(), name.c_str());
204 }
205 
MisplacedSectionData(const string & section,const string & segment)206 void Reader::Reporter::MisplacedSectionData(const string &section,
207                                             const string &segment) {
208   fprintf(stderr, "%s: the section '%s' in segment '%s'"
209           " claims its contents lie outside the segment's contents\n",
210           filename_.c_str(), section.c_str(), segment.c_str());
211 }
212 
MisplacedSymbolTable()213 void Reader::Reporter::MisplacedSymbolTable() {
214   fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
215           " table's contents are located beyond the end of the file\n",
216           filename_.c_str());
217 }
218 
UnsupportedCPUType(cpu_type_t cpu_type)219 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
220   fprintf(stderr, "%s: CPU type %d is not supported\n",
221           filename_.c_str(), cpu_type);
222 }
223 
Read(const uint8_t * buffer,size_t size,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)224 bool Reader::Read(const uint8_t *buffer,
225                   size_t size,
226                   cpu_type_t expected_cpu_type,
227                   cpu_subtype_t expected_cpu_subtype) {
228   assert(!buffer_.start);
229   buffer_.start = buffer;
230   buffer_.end = buffer + size;
231   ByteCursor cursor(&buffer_, true);
232   uint32_t magic;
233   if (!(cursor >> magic)) {
234     reporter_->HeaderTruncated();
235     return false;
236   }
237 
238   if (expected_cpu_type != CPU_TYPE_ANY) {
239     uint32_t expected_magic;
240     // validate that magic matches the expected cpu type
241     switch (expected_cpu_type) {
242       case CPU_TYPE_ARM:
243       case CPU_TYPE_I386:
244         expected_magic = MH_CIGAM;
245         break;
246       case CPU_TYPE_POWERPC:
247         expected_magic = MH_MAGIC;
248         break;
249       case CPU_TYPE_ARM_64:
250       case CPU_TYPE_X86_64:
251         expected_magic = MH_CIGAM_64;
252         break;
253       case CPU_TYPE_POWERPC64:
254         expected_magic = MH_MAGIC_64;
255         break;
256       default:
257         reporter_->UnsupportedCPUType(expected_cpu_type);
258         return false;
259     }
260 
261     if (expected_magic != magic) {
262       reporter_->BadHeader();
263       return false;
264     }
265   }
266 
267   // Since the byte cursor is in big-endian mode, a reversed magic number
268   // always indicates a little-endian file, regardless of our own endianness.
269   switch (magic) {
270     case MH_MAGIC:    big_endian_ = true;  bits_64_ = false; break;
271     case MH_CIGAM:    big_endian_ = false; bits_64_ = false; break;
272     case MH_MAGIC_64: big_endian_ = true;  bits_64_ = true;  break;
273     case MH_CIGAM_64: big_endian_ = false; bits_64_ = true;  break;
274     default:
275       reporter_->BadHeader();
276       return false;
277   }
278   cursor.set_big_endian(big_endian_);
279   uint32_t commands_size, reserved;
280   cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
281          >> commands_size >> flags_;
282   if (bits_64_)
283     cursor >> reserved;
284   if (!cursor) {
285     reporter_->HeaderTruncated();
286     return false;
287   }
288 
289   if (expected_cpu_type != CPU_TYPE_ANY &&
290       (expected_cpu_type != cpu_type_ ||
291        expected_cpu_subtype != cpu_subtype_)) {
292     reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
293                               expected_cpu_type, expected_cpu_subtype);
294     return false;
295   }
296 
297   cursor
298       .PointTo(&load_commands_.start, commands_size)
299       .PointTo(&load_commands_.end, 0);
300   if (!cursor) {
301     reporter_->LoadCommandRegionTruncated();
302     return false;
303   }
304 
305   return true;
306 }
307 
WalkLoadCommands(Reader::LoadCommandHandler * handler) const308 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
309   ByteCursor list_cursor(&load_commands_, big_endian_);
310 
311   for (size_t index = 0; index < load_command_count_; ++index) {
312     // command refers to this load command alone, so that cursor will
313     // refuse to read past the load command's end. But since we haven't
314     // read the size yet, let command initially refer to the entire
315     // remainder of the load command series.
316     ByteBuffer command(list_cursor.here(), list_cursor.Available());
317     ByteCursor cursor(&command, big_endian_);
318 
319     // Read the command type and size --- fields common to all commands.
320     uint32_t type, size;
321     if (!(cursor >> type)) {
322       reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
323       return false;
324     }
325     if (!(cursor >> size) || size > command.Size()) {
326       reporter_->LoadCommandsOverrun(load_command_count_, index, type);
327       return false;
328     }
329 
330     // Now that we've read the length, restrict command's range to this
331     // load command only.
332     command.end = command.start + size;
333 
334     switch (type) {
335       case LC_SEGMENT:
336       case LC_SEGMENT_64: {
337         Segment segment;
338         segment.bits_64 = (type == LC_SEGMENT_64);
339         size_t word_size = segment.bits_64 ? 8 : 4;
340         cursor.CString(&segment.name, 16);
341         size_t file_offset, file_size;
342         cursor
343             .Read(word_size, false, &segment.vmaddr)
344             .Read(word_size, false, &segment.vmsize)
345             .Read(word_size, false, &file_offset)
346             .Read(word_size, false, &file_size);
347         cursor >> segment.maxprot
348                >> segment.initprot
349                >> segment.nsects
350                >> segment.flags;
351         if (!cursor) {
352           reporter_->LoadCommandTooShort(index, type);
353           return false;
354         }
355         if (file_offset > buffer_.Size() ||
356             file_size > buffer_.Size() - file_offset) {
357           reporter_->MisplacedSegmentData(segment.name);
358           return false;
359         }
360         // Mach-O files in .dSYM bundles have the contents of the loaded
361         // segments removed, and their file offsets and file sizes zeroed
362         // out. To help us handle this special case properly, give such
363         // segments' contents NULL starting and ending pointers.
364         if (file_offset == 0 && file_size == 0) {
365           segment.contents.start = segment.contents.end = NULL;
366         } else {
367           segment.contents.start = buffer_.start + file_offset;
368           segment.contents.end = segment.contents.start + file_size;
369         }
370         // The section list occupies the remainder of this load command's space.
371         segment.section_list.start = cursor.here();
372         segment.section_list.end = command.end;
373 
374         if (!handler->SegmentCommand(segment))
375           return false;
376         break;
377       }
378 
379       case LC_SYMTAB: {
380         uint32_t symoff, nsyms, stroff, strsize;
381         cursor >> symoff >> nsyms >> stroff >> strsize;
382         if (!cursor) {
383           reporter_->LoadCommandTooShort(index, type);
384           return false;
385         }
386         // How big are the entries in the symbol table?
387         // sizeof(struct nlist_64) : sizeof(struct nlist),
388         // but be paranoid about alignment vs. target architecture.
389         size_t symbol_size = bits_64_ ? 16 : 12;
390         // How big is the entire symbol array?
391         size_t symbols_size = nsyms * symbol_size;
392         if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
393             stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
394           reporter_->MisplacedSymbolTable();
395           return false;
396         }
397         ByteBuffer entries(buffer_.start + symoff, symbols_size);
398         ByteBuffer names(buffer_.start + stroff, strsize);
399         if (!handler->SymtabCommand(entries, names))
400           return false;
401         break;
402       }
403 
404       default: {
405         if (!handler->UnknownCommand(type, command))
406           return false;
407         break;
408       }
409     }
410 
411     list_cursor.set_here(command.end);
412   }
413 
414   return true;
415 }
416 
417 // A load command handler that looks for a segment of a given name.
418 class Reader::SegmentFinder : public LoadCommandHandler {
419  public:
420   // Create a load command handler that looks for a segment named NAME,
421   // and sets SEGMENT to describe it if found.
SegmentFinder(const string & name,Segment * segment)422   SegmentFinder(const string &name, Segment *segment)
423       : name_(name), segment_(segment), found_() { }
424 
425   // Return true if the traversal found the segment, false otherwise.
found() const426   bool found() const { return found_; }
427 
SegmentCommand(const Segment & segment)428   bool SegmentCommand(const Segment &segment) {
429     if (segment.name == name_) {
430       *segment_ = segment;
431       found_ = true;
432       return false;
433     }
434     return true;
435   }
436 
437  private:
438   // The name of the segment our creator is looking for.
439   const string &name_;
440 
441   // Where we should store the segment if found. (WEAK)
442   Segment *segment_;
443 
444   // True if we found the segment.
445   bool found_;
446 };
447 
FindSegment(const string & name,Segment * segment) const448 bool Reader::FindSegment(const string &name, Segment *segment) const {
449   SegmentFinder finder(name, segment);
450   WalkLoadCommands(&finder);
451   return finder.found();
452 }
453 
WalkSegmentSections(const Segment & segment,SectionHandler * handler) const454 bool Reader::WalkSegmentSections(const Segment &segment,
455                                  SectionHandler *handler) const {
456   size_t word_size = segment.bits_64 ? 8 : 4;
457   ByteCursor cursor(&segment.section_list, big_endian_);
458 
459   for (size_t i = 0; i < segment.nsects; i++) {
460     Section section;
461     section.bits_64 = segment.bits_64;
462     uint64_t size;
463     uint32_t offset, dummy32;
464     cursor
465         .CString(&section.section_name, 16)
466         .CString(&section.segment_name, 16)
467         .Read(word_size, false, &section.address)
468         .Read(word_size, false, &size)
469         >> offset
470         >> section.align
471         >> dummy32
472         >> dummy32
473         >> section.flags
474         >> dummy32
475         >> dummy32;
476     if (section.bits_64)
477       cursor >> dummy32;
478     if (!cursor) {
479       reporter_->SectionsMissing(segment.name);
480       return false;
481     }
482     if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
483       // Zero-fill sections have a size, but no contents.
484       section.contents.start = section.contents.end = NULL;
485     } else if (segment.contents.start == NULL &&
486                segment.contents.end == NULL) {
487       // Mach-O files in .dSYM bundles have the contents of the loaded
488       // segments removed, and their file offsets and file sizes zeroed
489       // out.  However, the sections within those segments still have
490       // non-zero sizes.  There's no reason to call MisplacedSectionData in
491       // this case; the caller may just need the section's load
492       // address. But do set the contents' limits to NULL, for safety.
493       section.contents.start = section.contents.end = NULL;
494     } else {
495       if (offset < size_t(segment.contents.start - buffer_.start) ||
496           offset > size_t(segment.contents.end - buffer_.start) ||
497           size > size_t(segment.contents.end - buffer_.start - offset)) {
498         reporter_->MisplacedSectionData(section.section_name,
499                                         section.segment_name);
500         return false;
501       }
502       section.contents.start = buffer_.start + offset;
503       section.contents.end = section.contents.start + size;
504     }
505     if (!handler->HandleSection(section))
506       return false;
507   }
508   return true;
509 }
510 
511 // A SectionHandler that builds a SectionMap for the sections within a
512 // given segment.
513 class Reader::SectionMapper: public SectionHandler {
514  public:
515   // Create a SectionHandler that populates MAP with an entry for
516   // each section it is given.
SectionMapper(SectionMap * map)517   SectionMapper(SectionMap *map) : map_(map) { }
HandleSection(const Section & section)518   bool HandleSection(const Section &section) {
519     (*map_)[section.section_name] = section;
520     return true;
521   }
522  private:
523   // The map under construction. (WEAK)
524   SectionMap *map_;
525 };
526 
MapSegmentSections(const Segment & segment,SectionMap * section_map) const527 bool Reader::MapSegmentSections(const Segment &segment,
528                                 SectionMap *section_map) const {
529   section_map->clear();
530   SectionMapper mapper(section_map);
531   return WalkSegmentSections(segment, &mapper);
532 }
533 
534 }  // namespace mach_o
535 }  // namespace google_breakpad
536