1 // Copyright (c) 2010, Google Inc.
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are
6 // met:
7 //
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above
11 // copyright notice, this list of conditions and the following disclaimer
12 // in the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google Inc. nor the names of its
15 // contributors may be used to endorse or promote products derived from
16 // this software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 // Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>
31
32 // macho_reader.cc: Implementation of google_breakpad::Mach_O::FatReader and
33 // google_breakpad::Mach_O::Reader. See macho_reader.h for details.
34
35 #include "common/mac/macho_reader.h"
36
37 #include <assert.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40
41 // Unfortunately, CPU_TYPE_ARM is not define for 10.4.
42 #if !defined(CPU_TYPE_ARM)
43 #define CPU_TYPE_ARM 12
44 #endif
45
46 #if !defined(CPU_TYPE_ARM_64)
47 #define CPU_TYPE_ARM_64 16777228
48 #endif
49
50 namespace google_breakpad {
51 namespace mach_o {
52
53 // If NDEBUG is #defined, then the 'assert' macro doesn't evaluate its
54 // arguments, so you can't place expressions that do necessary work in
55 // the argument of an assert. Nor can you assign the result of the
56 // expression to a variable and assert that the variable's value is
57 // true: you'll get unused variable warnings when NDEBUG is #defined.
58 //
59 // ASSERT_ALWAYS_EVAL always evaluates its argument, and asserts that
60 // the result is true if NDEBUG is not #defined.
61 #if defined(NDEBUG)
62 #define ASSERT_ALWAYS_EVAL(x) (x)
63 #else
64 #define ASSERT_ALWAYS_EVAL(x) assert(x)
65 #endif
66
BadHeader()67 void FatReader::Reporter::BadHeader() {
68 fprintf(stderr, "%s: file is neither a fat binary file"
69 " nor a Mach-O object file\n", filename_.c_str());
70 }
71
TooShort()72 void FatReader::Reporter::TooShort() {
73 fprintf(stderr, "%s: file too short for the data it claims to contain\n",
74 filename_.c_str());
75 }
76
MisplacedObjectFile()77 void FatReader::Reporter::MisplacedObjectFile() {
78 fprintf(stderr, "%s: file too short for the object files it claims"
79 " to contain\n", filename_.c_str());
80 }
81
Read(const uint8_t * buffer,size_t size)82 bool FatReader::Read(const uint8_t *buffer, size_t size) {
83 buffer_.start = buffer;
84 buffer_.end = buffer + size;
85 ByteCursor cursor(&buffer_);
86
87 // Fat binaries always use big-endian, so read the magic number in
88 // that endianness. To recognize Mach-O magic numbers, which can use
89 // either endianness, check for both the proper and reversed forms
90 // of the magic numbers.
91 cursor.set_big_endian(true);
92 if (cursor >> magic_) {
93 if (magic_ == FAT_MAGIC) {
94 // How many object files does this fat binary contain?
95 uint32_t object_files_count;
96 if (!(cursor >> object_files_count)) { // nfat_arch
97 reporter_->TooShort();
98 return false;
99 }
100
101 // Read the list of object files.
102 object_files_.resize(object_files_count);
103 for (size_t i = 0; i < object_files_count; i++) {
104 struct fat_arch *objfile = &object_files_[i];
105
106 // Read this object file entry, byte-swapping as appropriate.
107 cursor >> objfile->cputype
108 >> objfile->cpusubtype
109 >> objfile->offset
110 >> objfile->size
111 >> objfile->align;
112 if (!cursor) {
113 reporter_->TooShort();
114 return false;
115 }
116 // Does the file actually have the bytes this entry refers to?
117 size_t fat_size = buffer_.Size();
118 if (objfile->offset > fat_size ||
119 objfile->size > fat_size - objfile->offset) {
120 reporter_->MisplacedObjectFile();
121 return false;
122 }
123 }
124
125 return true;
126 } else if (magic_ == MH_MAGIC || magic_ == MH_MAGIC_64 ||
127 magic_ == MH_CIGAM || magic_ == MH_CIGAM_64) {
128 // If this is a little-endian Mach-O file, fix the cursor's endianness.
129 if (magic_ == MH_CIGAM || magic_ == MH_CIGAM_64)
130 cursor.set_big_endian(false);
131 // Record the entire file as a single entry in the object file list.
132 object_files_.resize(1);
133
134 // Get the cpu type and subtype from the Mach-O header.
135 if (!(cursor >> object_files_[0].cputype
136 >> object_files_[0].cpusubtype)) {
137 reporter_->TooShort();
138 return false;
139 }
140
141 object_files_[0].offset = 0;
142 object_files_[0].size = static_cast<uint32_t>(buffer_.Size());
143 // This alignment is correct for 32 and 64-bit x86 and ppc.
144 // See get_align in the lipo source for other architectures:
145 // http://www.opensource.apple.com/source/cctools/cctools-773/misc/lipo.c
146 object_files_[0].align = 12; // 2^12 == 4096
147
148 return true;
149 }
150 }
151
152 reporter_->BadHeader();
153 return false;
154 }
155
BadHeader()156 void Reader::Reporter::BadHeader() {
157 fprintf(stderr, "%s: file is not a Mach-O object file\n", filename_.c_str());
158 }
159
CPUTypeMismatch(cpu_type_t cpu_type,cpu_subtype_t cpu_subtype,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)160 void Reader::Reporter::CPUTypeMismatch(cpu_type_t cpu_type,
161 cpu_subtype_t cpu_subtype,
162 cpu_type_t expected_cpu_type,
163 cpu_subtype_t expected_cpu_subtype) {
164 fprintf(stderr, "%s: CPU type %d, subtype %d does not match expected"
165 " type %d, subtype %d\n",
166 filename_.c_str(), cpu_type, cpu_subtype,
167 expected_cpu_type, expected_cpu_subtype);
168 }
169
HeaderTruncated()170 void Reader::Reporter::HeaderTruncated() {
171 fprintf(stderr, "%s: file does not contain a complete Mach-O header\n",
172 filename_.c_str());
173 }
174
LoadCommandRegionTruncated()175 void Reader::Reporter::LoadCommandRegionTruncated() {
176 fprintf(stderr, "%s: file too short to hold load command region"
177 " given in Mach-O header\n", filename_.c_str());
178 }
179
LoadCommandsOverrun(size_t claimed,size_t i,LoadCommandType type)180 void Reader::Reporter::LoadCommandsOverrun(size_t claimed, size_t i,
181 LoadCommandType type) {
182 fprintf(stderr, "%s: file's header claims there are %ld"
183 " load commands, but load command #%ld",
184 filename_.c_str(), claimed, i);
185 if (type) fprintf(stderr, ", of type %d,", type);
186 fprintf(stderr, " extends beyond the end of the load command region\n");
187 }
188
LoadCommandTooShort(size_t i,LoadCommandType type)189 void Reader::Reporter::LoadCommandTooShort(size_t i, LoadCommandType type) {
190 fprintf(stderr, "%s: the contents of load command #%ld, of type %d,"
191 " extend beyond the size given in the load command's header\n",
192 filename_.c_str(), i, type);
193 }
194
SectionsMissing(const string & name)195 void Reader::Reporter::SectionsMissing(const string &name) {
196 fprintf(stderr, "%s: the load command for segment '%s'"
197 " is too short to hold the section headers it claims to have\n",
198 filename_.c_str(), name.c_str());
199 }
200
MisplacedSegmentData(const string & name)201 void Reader::Reporter::MisplacedSegmentData(const string &name) {
202 fprintf(stderr, "%s: the segment '%s' claims its contents lie beyond"
203 " the end of the file\n", filename_.c_str(), name.c_str());
204 }
205
MisplacedSectionData(const string & section,const string & segment)206 void Reader::Reporter::MisplacedSectionData(const string §ion,
207 const string &segment) {
208 fprintf(stderr, "%s: the section '%s' in segment '%s'"
209 " claims its contents lie outside the segment's contents\n",
210 filename_.c_str(), section.c_str(), segment.c_str());
211 }
212
MisplacedSymbolTable()213 void Reader::Reporter::MisplacedSymbolTable() {
214 fprintf(stderr, "%s: the LC_SYMTAB load command claims that the symbol"
215 " table's contents are located beyond the end of the file\n",
216 filename_.c_str());
217 }
218
UnsupportedCPUType(cpu_type_t cpu_type)219 void Reader::Reporter::UnsupportedCPUType(cpu_type_t cpu_type) {
220 fprintf(stderr, "%s: CPU type %d is not supported\n",
221 filename_.c_str(), cpu_type);
222 }
223
Read(const uint8_t * buffer,size_t size,cpu_type_t expected_cpu_type,cpu_subtype_t expected_cpu_subtype)224 bool Reader::Read(const uint8_t *buffer,
225 size_t size,
226 cpu_type_t expected_cpu_type,
227 cpu_subtype_t expected_cpu_subtype) {
228 assert(!buffer_.start);
229 buffer_.start = buffer;
230 buffer_.end = buffer + size;
231 ByteCursor cursor(&buffer_, true);
232 uint32_t magic;
233 if (!(cursor >> magic)) {
234 reporter_->HeaderTruncated();
235 return false;
236 }
237
238 if (expected_cpu_type != CPU_TYPE_ANY) {
239 uint32_t expected_magic;
240 // validate that magic matches the expected cpu type
241 switch (expected_cpu_type) {
242 case CPU_TYPE_ARM:
243 case CPU_TYPE_I386:
244 expected_magic = MH_CIGAM;
245 break;
246 case CPU_TYPE_POWERPC:
247 expected_magic = MH_MAGIC;
248 break;
249 case CPU_TYPE_ARM_64:
250 case CPU_TYPE_X86_64:
251 expected_magic = MH_CIGAM_64;
252 break;
253 case CPU_TYPE_POWERPC64:
254 expected_magic = MH_MAGIC_64;
255 break;
256 default:
257 reporter_->UnsupportedCPUType(expected_cpu_type);
258 return false;
259 }
260
261 if (expected_magic != magic) {
262 reporter_->BadHeader();
263 return false;
264 }
265 }
266
267 // Since the byte cursor is in big-endian mode, a reversed magic number
268 // always indicates a little-endian file, regardless of our own endianness.
269 switch (magic) {
270 case MH_MAGIC: big_endian_ = true; bits_64_ = false; break;
271 case MH_CIGAM: big_endian_ = false; bits_64_ = false; break;
272 case MH_MAGIC_64: big_endian_ = true; bits_64_ = true; break;
273 case MH_CIGAM_64: big_endian_ = false; bits_64_ = true; break;
274 default:
275 reporter_->BadHeader();
276 return false;
277 }
278 cursor.set_big_endian(big_endian_);
279 uint32_t commands_size, reserved;
280 cursor >> cpu_type_ >> cpu_subtype_ >> file_type_ >> load_command_count_
281 >> commands_size >> flags_;
282 if (bits_64_)
283 cursor >> reserved;
284 if (!cursor) {
285 reporter_->HeaderTruncated();
286 return false;
287 }
288
289 if (expected_cpu_type != CPU_TYPE_ANY &&
290 (expected_cpu_type != cpu_type_ ||
291 expected_cpu_subtype != cpu_subtype_)) {
292 reporter_->CPUTypeMismatch(cpu_type_, cpu_subtype_,
293 expected_cpu_type, expected_cpu_subtype);
294 return false;
295 }
296
297 cursor
298 .PointTo(&load_commands_.start, commands_size)
299 .PointTo(&load_commands_.end, 0);
300 if (!cursor) {
301 reporter_->LoadCommandRegionTruncated();
302 return false;
303 }
304
305 return true;
306 }
307
WalkLoadCommands(Reader::LoadCommandHandler * handler) const308 bool Reader::WalkLoadCommands(Reader::LoadCommandHandler *handler) const {
309 ByteCursor list_cursor(&load_commands_, big_endian_);
310
311 for (size_t index = 0; index < load_command_count_; ++index) {
312 // command refers to this load command alone, so that cursor will
313 // refuse to read past the load command's end. But since we haven't
314 // read the size yet, let command initially refer to the entire
315 // remainder of the load command series.
316 ByteBuffer command(list_cursor.here(), list_cursor.Available());
317 ByteCursor cursor(&command, big_endian_);
318
319 // Read the command type and size --- fields common to all commands.
320 uint32_t type, size;
321 if (!(cursor >> type)) {
322 reporter_->LoadCommandsOverrun(load_command_count_, index, 0);
323 return false;
324 }
325 if (!(cursor >> size) || size > command.Size()) {
326 reporter_->LoadCommandsOverrun(load_command_count_, index, type);
327 return false;
328 }
329
330 // Now that we've read the length, restrict command's range to this
331 // load command only.
332 command.end = command.start + size;
333
334 switch (type) {
335 case LC_SEGMENT:
336 case LC_SEGMENT_64: {
337 Segment segment;
338 segment.bits_64 = (type == LC_SEGMENT_64);
339 size_t word_size = segment.bits_64 ? 8 : 4;
340 cursor.CString(&segment.name, 16);
341 size_t file_offset, file_size;
342 cursor
343 .Read(word_size, false, &segment.vmaddr)
344 .Read(word_size, false, &segment.vmsize)
345 .Read(word_size, false, &file_offset)
346 .Read(word_size, false, &file_size);
347 cursor >> segment.maxprot
348 >> segment.initprot
349 >> segment.nsects
350 >> segment.flags;
351 if (!cursor) {
352 reporter_->LoadCommandTooShort(index, type);
353 return false;
354 }
355 if (file_offset > buffer_.Size() ||
356 file_size > buffer_.Size() - file_offset) {
357 reporter_->MisplacedSegmentData(segment.name);
358 return false;
359 }
360 // Mach-O files in .dSYM bundles have the contents of the loaded
361 // segments removed, and their file offsets and file sizes zeroed
362 // out. To help us handle this special case properly, give such
363 // segments' contents NULL starting and ending pointers.
364 if (file_offset == 0 && file_size == 0) {
365 segment.contents.start = segment.contents.end = NULL;
366 } else {
367 segment.contents.start = buffer_.start + file_offset;
368 segment.contents.end = segment.contents.start + file_size;
369 }
370 // The section list occupies the remainder of this load command's space.
371 segment.section_list.start = cursor.here();
372 segment.section_list.end = command.end;
373
374 if (!handler->SegmentCommand(segment))
375 return false;
376 break;
377 }
378
379 case LC_SYMTAB: {
380 uint32_t symoff, nsyms, stroff, strsize;
381 cursor >> symoff >> nsyms >> stroff >> strsize;
382 if (!cursor) {
383 reporter_->LoadCommandTooShort(index, type);
384 return false;
385 }
386 // How big are the entries in the symbol table?
387 // sizeof(struct nlist_64) : sizeof(struct nlist),
388 // but be paranoid about alignment vs. target architecture.
389 size_t symbol_size = bits_64_ ? 16 : 12;
390 // How big is the entire symbol array?
391 size_t symbols_size = nsyms * symbol_size;
392 if (symoff > buffer_.Size() || symbols_size > buffer_.Size() - symoff ||
393 stroff > buffer_.Size() || strsize > buffer_.Size() - stroff) {
394 reporter_->MisplacedSymbolTable();
395 return false;
396 }
397 ByteBuffer entries(buffer_.start + symoff, symbols_size);
398 ByteBuffer names(buffer_.start + stroff, strsize);
399 if (!handler->SymtabCommand(entries, names))
400 return false;
401 break;
402 }
403
404 default: {
405 if (!handler->UnknownCommand(type, command))
406 return false;
407 break;
408 }
409 }
410
411 list_cursor.set_here(command.end);
412 }
413
414 return true;
415 }
416
417 // A load command handler that looks for a segment of a given name.
418 class Reader::SegmentFinder : public LoadCommandHandler {
419 public:
420 // Create a load command handler that looks for a segment named NAME,
421 // and sets SEGMENT to describe it if found.
SegmentFinder(const string & name,Segment * segment)422 SegmentFinder(const string &name, Segment *segment)
423 : name_(name), segment_(segment), found_() { }
424
425 // Return true if the traversal found the segment, false otherwise.
found() const426 bool found() const { return found_; }
427
SegmentCommand(const Segment & segment)428 bool SegmentCommand(const Segment &segment) {
429 if (segment.name == name_) {
430 *segment_ = segment;
431 found_ = true;
432 return false;
433 }
434 return true;
435 }
436
437 private:
438 // The name of the segment our creator is looking for.
439 const string &name_;
440
441 // Where we should store the segment if found. (WEAK)
442 Segment *segment_;
443
444 // True if we found the segment.
445 bool found_;
446 };
447
FindSegment(const string & name,Segment * segment) const448 bool Reader::FindSegment(const string &name, Segment *segment) const {
449 SegmentFinder finder(name, segment);
450 WalkLoadCommands(&finder);
451 return finder.found();
452 }
453
WalkSegmentSections(const Segment & segment,SectionHandler * handler) const454 bool Reader::WalkSegmentSections(const Segment &segment,
455 SectionHandler *handler) const {
456 size_t word_size = segment.bits_64 ? 8 : 4;
457 ByteCursor cursor(&segment.section_list, big_endian_);
458
459 for (size_t i = 0; i < segment.nsects; i++) {
460 Section section;
461 section.bits_64 = segment.bits_64;
462 uint64_t size;
463 uint32_t offset, dummy32;
464 cursor
465 .CString(§ion.section_name, 16)
466 .CString(§ion.segment_name, 16)
467 .Read(word_size, false, §ion.address)
468 .Read(word_size, false, &size)
469 >> offset
470 >> section.align
471 >> dummy32
472 >> dummy32
473 >> section.flags
474 >> dummy32
475 >> dummy32;
476 if (section.bits_64)
477 cursor >> dummy32;
478 if (!cursor) {
479 reporter_->SectionsMissing(segment.name);
480 return false;
481 }
482 if ((section.flags & SECTION_TYPE) == S_ZEROFILL) {
483 // Zero-fill sections have a size, but no contents.
484 section.contents.start = section.contents.end = NULL;
485 } else if (segment.contents.start == NULL &&
486 segment.contents.end == NULL) {
487 // Mach-O files in .dSYM bundles have the contents of the loaded
488 // segments removed, and their file offsets and file sizes zeroed
489 // out. However, the sections within those segments still have
490 // non-zero sizes. There's no reason to call MisplacedSectionData in
491 // this case; the caller may just need the section's load
492 // address. But do set the contents' limits to NULL, for safety.
493 section.contents.start = section.contents.end = NULL;
494 } else {
495 if (offset < size_t(segment.contents.start - buffer_.start) ||
496 offset > size_t(segment.contents.end - buffer_.start) ||
497 size > size_t(segment.contents.end - buffer_.start - offset)) {
498 reporter_->MisplacedSectionData(section.section_name,
499 section.segment_name);
500 return false;
501 }
502 section.contents.start = buffer_.start + offset;
503 section.contents.end = section.contents.start + size;
504 }
505 if (!handler->HandleSection(section))
506 return false;
507 }
508 return true;
509 }
510
511 // A SectionHandler that builds a SectionMap for the sections within a
512 // given segment.
513 class Reader::SectionMapper: public SectionHandler {
514 public:
515 // Create a SectionHandler that populates MAP with an entry for
516 // each section it is given.
SectionMapper(SectionMap * map)517 SectionMapper(SectionMap *map) : map_(map) { }
HandleSection(const Section & section)518 bool HandleSection(const Section §ion) {
519 (*map_)[section.section_name] = section;
520 return true;
521 }
522 private:
523 // The map under construction. (WEAK)
524 SectionMap *map_;
525 };
526
MapSegmentSections(const Segment & segment,SectionMap * section_map) const527 bool Reader::MapSegmentSections(const Segment &segment,
528 SectionMap *section_map) const {
529 section_map->clear();
530 SectionMapper mapper(section_map);
531 return WalkSegmentSections(segment, &mapper);
532 }
533
534 } // namespace mach_o
535 } // namespace google_breakpad
536