1#!/usr/bin/env python 2from __future__ import print_function 3 4import cmd 5import dict_utils 6import file_extract 7import optparse 8import re 9import struct 10import string 11import io 12import sys 13import uuid 14 15# Mach header "magic" constants 16MH_MAGIC = 0xfeedface 17MH_CIGAM = 0xcefaedfe 18MH_MAGIC_64 = 0xfeedfacf 19MH_CIGAM_64 = 0xcffaedfe 20FAT_MAGIC = 0xcafebabe 21FAT_CIGAM = 0xbebafeca 22 23# Mach haeder "filetype" constants 24MH_OBJECT = 0x00000001 25MH_EXECUTE = 0x00000002 26MH_FVMLIB = 0x00000003 27MH_CORE = 0x00000004 28MH_PRELOAD = 0x00000005 29MH_DYLIB = 0x00000006 30MH_DYLINKER = 0x00000007 31MH_BUNDLE = 0x00000008 32MH_DYLIB_STUB = 0x00000009 33MH_DSYM = 0x0000000a 34MH_KEXT_BUNDLE = 0x0000000b 35 36# Mach haeder "flag" constant bits 37MH_NOUNDEFS = 0x00000001 38MH_INCRLINK = 0x00000002 39MH_DYLDLINK = 0x00000004 40MH_BINDATLOAD = 0x00000008 41MH_PREBOUND = 0x00000010 42MH_SPLIT_SEGS = 0x00000020 43MH_LAZY_INIT = 0x00000040 44MH_TWOLEVEL = 0x00000080 45MH_FORCE_FLAT = 0x00000100 46MH_NOMULTIDEFS = 0x00000200 47MH_NOFIXPREBINDING = 0x00000400 48MH_PREBINDABLE = 0x00000800 49MH_ALLMODSBOUND = 0x00001000 50MH_SUBSECTIONS_VIA_SYMBOLS = 0x00002000 51MH_CANONICAL = 0x00004000 52MH_WEAK_DEFINES = 0x00008000 53MH_BINDS_TO_WEAK = 0x00010000 54MH_ALLOW_STACK_EXECUTION = 0x00020000 55MH_ROOT_SAFE = 0x00040000 56MH_SETUID_SAFE = 0x00080000 57MH_NO_REEXPORTED_DYLIBS = 0x00100000 58MH_PIE = 0x00200000 59MH_DEAD_STRIPPABLE_DYLIB = 0x00400000 60MH_HAS_TLV_DESCRIPTORS = 0x00800000 61MH_NO_HEAP_EXECUTION = 0x01000000 62 63# Mach load command constants 64LC_REQ_DYLD = 0x80000000 65LC_SEGMENT = 0x00000001 66LC_SYMTAB = 0x00000002 67LC_SYMSEG = 0x00000003 68LC_THREAD = 0x00000004 69LC_UNIXTHREAD = 0x00000005 70LC_LOADFVMLIB = 0x00000006 71LC_IDFVMLIB = 0x00000007 72LC_IDENT = 0x00000008 73LC_FVMFILE = 0x00000009 74LC_PREPAGE = 0x0000000a 75LC_DYSYMTAB = 0x0000000b 76LC_LOAD_DYLIB = 0x0000000c 77LC_ID_DYLIB = 0x0000000d 78LC_LOAD_DYLINKER = 0x0000000e 79LC_ID_DYLINKER = 0x0000000f 80LC_PREBOUND_DYLIB = 0x00000010 81LC_ROUTINES = 0x00000011 82LC_SUB_FRAMEWORK = 0x00000012 83LC_SUB_UMBRELLA = 0x00000013 84LC_SUB_CLIENT = 0x00000014 85LC_SUB_LIBRARY = 0x00000015 86LC_TWOLEVEL_HINTS = 0x00000016 87LC_PREBIND_CKSUM = 0x00000017 88LC_LOAD_WEAK_DYLIB = 0x00000018 | LC_REQ_DYLD 89LC_SEGMENT_64 = 0x00000019 90LC_ROUTINES_64 = 0x0000001a 91LC_UUID = 0x0000001b 92LC_RPATH = 0x0000001c | LC_REQ_DYLD 93LC_CODE_SIGNATURE = 0x0000001d 94LC_SEGMENT_SPLIT_INFO = 0x0000001e 95LC_REEXPORT_DYLIB = 0x0000001f | LC_REQ_DYLD 96LC_LAZY_LOAD_DYLIB = 0x00000020 97LC_ENCRYPTION_INFO = 0x00000021 98LC_DYLD_INFO = 0x00000022 99LC_DYLD_INFO_ONLY = 0x00000022 | LC_REQ_DYLD 100LC_LOAD_UPWARD_DYLIB = 0x00000023 | LC_REQ_DYLD 101LC_VERSION_MIN_MACOSX = 0x00000024 102LC_VERSION_MIN_IPHONEOS = 0x00000025 103LC_FUNCTION_STARTS = 0x00000026 104LC_DYLD_ENVIRONMENT = 0x00000027 105 106# Mach CPU constants 107CPU_ARCH_MASK = 0xff000000 108CPU_ARCH_ABI64 = 0x01000000 109CPU_TYPE_ANY = 0xffffffff 110CPU_TYPE_VAX = 1 111CPU_TYPE_MC680x0 = 6 112CPU_TYPE_I386 = 7 113CPU_TYPE_X86_64 = CPU_TYPE_I386 | CPU_ARCH_ABI64 114CPU_TYPE_MIPS = 8 115CPU_TYPE_MC98000 = 10 116CPU_TYPE_HPPA = 11 117CPU_TYPE_ARM = 12 118CPU_TYPE_MC88000 = 13 119CPU_TYPE_SPARC = 14 120CPU_TYPE_I860 = 15 121CPU_TYPE_ALPHA = 16 122CPU_TYPE_POWERPC = 18 123CPU_TYPE_POWERPC64 = CPU_TYPE_POWERPC | CPU_ARCH_ABI64 124 125# VM protection constants 126VM_PROT_READ = 1 127VM_PROT_WRITE = 2 128VM_PROT_EXECUTE = 4 129 130# VM protection constants 131N_STAB = 0xe0 132N_PEXT = 0x10 133N_TYPE = 0x0e 134N_EXT = 0x01 135 136# Values for nlist N_TYPE bits of the "Mach.NList.type" field. 137N_UNDF = 0x0 138N_ABS = 0x2 139N_SECT = 0xe 140N_PBUD = 0xc 141N_INDR = 0xa 142 143# Section indexes for the "Mach.NList.sect_idx" fields 144NO_SECT = 0 145MAX_SECT = 255 146 147# Stab defines 148N_GSYM = 0x20 149N_FNAME = 0x22 150N_FUN = 0x24 151N_STSYM = 0x26 152N_LCSYM = 0x28 153N_BNSYM = 0x2e 154N_OPT = 0x3c 155N_RSYM = 0x40 156N_SLINE = 0x44 157N_ENSYM = 0x4e 158N_SSYM = 0x60 159N_SO = 0x64 160N_OSO = 0x66 161N_LSYM = 0x80 162N_BINCL = 0x82 163N_SOL = 0x84 164N_PARAMS = 0x86 165N_VERSION = 0x88 166N_OLEVEL = 0x8A 167N_PSYM = 0xa0 168N_EINCL = 0xa2 169N_ENTRY = 0xa4 170N_LBRAC = 0xc0 171N_EXCL = 0xc2 172N_RBRAC = 0xe0 173N_BCOMM = 0xe2 174N_ECOMM = 0xe4 175N_ECOML = 0xe8 176N_LENG = 0xfe 177 178vm_prot_names = ['---', 'r--', '-w-', 'rw-', '--x', 'r-x', '-wx', 'rwx'] 179 180 181def dump_memory(base_addr, data, hex_bytes_len, num_per_line): 182 hex_bytes = data.encode('hex') 183 if hex_bytes_len == -1: 184 hex_bytes_len = len(hex_bytes) 185 addr = base_addr 186 ascii_str = '' 187 i = 0 188 while i < hex_bytes_len: 189 if ((i / 2) % num_per_line) == 0: 190 if i > 0: 191 print(' %s' % (ascii_str)) 192 ascii_str = '' 193 print('0x%8.8x:' % (addr + i), end=' ') 194 hex_byte = hex_bytes[i:i + 2] 195 print(hex_byte, end=' ') 196 int_byte = int(hex_byte, 16) 197 ascii_char = '%c' % (int_byte) 198 if int_byte >= 32 and int_byte < 127: 199 ascii_str += ascii_char 200 else: 201 ascii_str += '.' 202 i = i + 2 203 if ascii_str: 204 if (i / 2) % num_per_line: 205 padding = num_per_line - ((i / 2) % num_per_line) 206 else: 207 padding = 0 208 print('%*s%s' % (padding * 3 + 1, '', ascii_str)) 209 print() 210 211 212class TerminalColors: 213 '''Simple terminal colors class''' 214 215 def __init__(self, enabled=True): 216 # TODO: discover terminal type from "file" and disable if 217 # it can't handle the color codes 218 self.enabled = enabled 219 220 def reset(self): 221 '''Reset all terminal colors and formatting.''' 222 if self.enabled: 223 return "\x1b[0m" 224 return '' 225 226 def bold(self, on=True): 227 '''Enable or disable bold depending on the "on" parameter.''' 228 if self.enabled: 229 if on: 230 return "\x1b[1m" 231 else: 232 return "\x1b[22m" 233 return '' 234 235 def italics(self, on=True): 236 '''Enable or disable italics depending on the "on" parameter.''' 237 if self.enabled: 238 if on: 239 return "\x1b[3m" 240 else: 241 return "\x1b[23m" 242 return '' 243 244 def underline(self, on=True): 245 '''Enable or disable underline depending on the "on" parameter.''' 246 if self.enabled: 247 if on: 248 return "\x1b[4m" 249 else: 250 return "\x1b[24m" 251 return '' 252 253 def inverse(self, on=True): 254 '''Enable or disable inverse depending on the "on" parameter.''' 255 if self.enabled: 256 if on: 257 return "\x1b[7m" 258 else: 259 return "\x1b[27m" 260 return '' 261 262 def strike(self, on=True): 263 '''Enable or disable strike through depending on the "on" parameter.''' 264 if self.enabled: 265 if on: 266 return "\x1b[9m" 267 else: 268 return "\x1b[29m" 269 return '' 270 271 def black(self, fg=True): 272 '''Set the foreground or background color to black. 273 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 274 if self.enabled: 275 if fg: 276 return "\x1b[30m" 277 else: 278 return "\x1b[40m" 279 return '' 280 281 def red(self, fg=True): 282 '''Set the foreground or background color to red. 283 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 284 if self.enabled: 285 if fg: 286 return "\x1b[31m" 287 else: 288 return "\x1b[41m" 289 return '' 290 291 def green(self, fg=True): 292 '''Set the foreground or background color to green. 293 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 294 if self.enabled: 295 if fg: 296 return "\x1b[32m" 297 else: 298 return "\x1b[42m" 299 return '' 300 301 def yellow(self, fg=True): 302 '''Set the foreground or background color to yellow. 303 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 304 if self.enabled: 305 if fg: 306 return "\x1b[43m" 307 else: 308 return "\x1b[33m" 309 return '' 310 311 def blue(self, fg=True): 312 '''Set the foreground or background color to blue. 313 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 314 if self.enabled: 315 if fg: 316 return "\x1b[34m" 317 else: 318 return "\x1b[44m" 319 return '' 320 321 def magenta(self, fg=True): 322 '''Set the foreground or background color to magenta. 323 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 324 if self.enabled: 325 if fg: 326 return "\x1b[35m" 327 else: 328 return "\x1b[45m" 329 return '' 330 331 def cyan(self, fg=True): 332 '''Set the foreground or background color to cyan. 333 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 334 if self.enabled: 335 if fg: 336 return "\x1b[36m" 337 else: 338 return "\x1b[46m" 339 return '' 340 341 def white(self, fg=True): 342 '''Set the foreground or background color to white. 343 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 344 if self.enabled: 345 if fg: 346 return "\x1b[37m" 347 else: 348 return "\x1b[47m" 349 return '' 350 351 def default(self, fg=True): 352 '''Set the foreground or background color to the default. 353 The foreground color will be set if "fg" tests True. The background color will be set if "fg" tests False.''' 354 if self.enabled: 355 if fg: 356 return "\x1b[39m" 357 else: 358 return "\x1b[49m" 359 return '' 360 361 362def swap_unpack_char(): 363 """Returns the unpack prefix that will for non-native endian-ness.""" 364 if struct.pack('H', 1).startswith("\x00"): 365 return '<' 366 return '>' 367 368 369def dump_hex_bytes(addr, s, bytes_per_line=16): 370 i = 0 371 line = '' 372 for ch in s: 373 if (i % bytes_per_line) == 0: 374 if line: 375 print(line) 376 line = '%#8.8x: ' % (addr + i) 377 line += "%02X " % ord(ch) 378 i += 1 379 print(line) 380 381 382def dump_hex_byte_string_diff(addr, a, b, bytes_per_line=16): 383 i = 0 384 line = '' 385 a_len = len(a) 386 b_len = len(b) 387 if a_len < b_len: 388 max_len = b_len 389 else: 390 max_len = a_len 391 tty_colors = TerminalColors(True) 392 for i in range(max_len): 393 ch = None 394 if i < a_len: 395 ch_a = a[i] 396 ch = ch_a 397 else: 398 ch_a = None 399 if i < b_len: 400 ch_b = b[i] 401 if not ch: 402 ch = ch_b 403 else: 404 ch_b = None 405 mismatch = ch_a != ch_b 406 if (i % bytes_per_line) == 0: 407 if line: 408 print(line) 409 line = '%#8.8x: ' % (addr + i) 410 if mismatch: 411 line += tty_colors.red() 412 line += "%02X " % ord(ch) 413 if mismatch: 414 line += tty_colors.default() 415 i += 1 416 417 print(line) 418 419 420class Mach: 421 """Class that does everything mach-o related""" 422 423 class Arch: 424 """Class that implements mach-o architectures""" 425 426 def __init__(self, c=0, s=0): 427 self.cpu = c 428 self.sub = s 429 430 def set_cpu_type(self, c): 431 self.cpu = c 432 433 def set_cpu_subtype(self, s): 434 self.sub = s 435 436 def set_arch(self, c, s): 437 self.cpu = c 438 self.sub = s 439 440 def is_64_bit(self): 441 return (self.cpu & CPU_ARCH_ABI64) != 0 442 443 cpu_infos = [ 444 ["arm", CPU_TYPE_ARM, CPU_TYPE_ANY], 445 ["arm", CPU_TYPE_ARM, 0], 446 ["armv4", CPU_TYPE_ARM, 5], 447 ["armv6", CPU_TYPE_ARM, 6], 448 ["armv5", CPU_TYPE_ARM, 7], 449 ["xscale", CPU_TYPE_ARM, 8], 450 ["armv7", CPU_TYPE_ARM, 9], 451 ["armv7f", CPU_TYPE_ARM, 10], 452 ["armv7s", CPU_TYPE_ARM, 11], 453 ["armv7k", CPU_TYPE_ARM, 12], 454 ["armv7m", CPU_TYPE_ARM, 15], 455 ["armv7em", CPU_TYPE_ARM, 16], 456 ["ppc", CPU_TYPE_POWERPC, CPU_TYPE_ANY], 457 ["ppc", CPU_TYPE_POWERPC, 0], 458 ["ppc601", CPU_TYPE_POWERPC, 1], 459 ["ppc602", CPU_TYPE_POWERPC, 2], 460 ["ppc603", CPU_TYPE_POWERPC, 3], 461 ["ppc603e", CPU_TYPE_POWERPC, 4], 462 ["ppc603ev", CPU_TYPE_POWERPC, 5], 463 ["ppc604", CPU_TYPE_POWERPC, 6], 464 ["ppc604e", CPU_TYPE_POWERPC, 7], 465 ["ppc620", CPU_TYPE_POWERPC, 8], 466 ["ppc750", CPU_TYPE_POWERPC, 9], 467 ["ppc7400", CPU_TYPE_POWERPC, 10], 468 ["ppc7450", CPU_TYPE_POWERPC, 11], 469 ["ppc970", CPU_TYPE_POWERPC, 100], 470 ["ppc64", CPU_TYPE_POWERPC64, 0], 471 ["ppc970-64", CPU_TYPE_POWERPC64, 100], 472 ["i386", CPU_TYPE_I386, 3], 473 ["i486", CPU_TYPE_I386, 4], 474 ["i486sx", CPU_TYPE_I386, 0x84], 475 ["i386", CPU_TYPE_I386, CPU_TYPE_ANY], 476 ["x86_64", CPU_TYPE_X86_64, 3], 477 ["x86_64", CPU_TYPE_X86_64, CPU_TYPE_ANY], 478 ] 479 480 def __str__(self): 481 for info in self.cpu_infos: 482 if self.cpu == info[1] and (self.sub & 0x00ffffff) == info[2]: 483 return info[0] 484 return "{0}.{1}".format(self.cpu, self.sub) 485 486 class Magic(dict_utils.Enum): 487 488 enum = { 489 'MH_MAGIC': MH_MAGIC, 490 'MH_CIGAM': MH_CIGAM, 491 'MH_MAGIC_64': MH_MAGIC_64, 492 'MH_CIGAM_64': MH_CIGAM_64, 493 'FAT_MAGIC': FAT_MAGIC, 494 'FAT_CIGAM': FAT_CIGAM 495 } 496 497 def __init__(self, initial_value=0): 498 dict_utils.Enum.__init__(self, initial_value, self.enum) 499 500 def is_skinny_mach_file(self): 501 return self.value == MH_MAGIC or self.value == MH_CIGAM or self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64 502 503 def is_universal_mach_file(self): 504 return self.value == FAT_MAGIC or self.value == FAT_CIGAM 505 506 def unpack(self, data): 507 data.set_byte_order('native') 508 self.value = data.get_uint32() 509 510 def get_byte_order(self): 511 if self.value == MH_CIGAM or self.value == MH_CIGAM_64 or self.value == FAT_CIGAM: 512 return swap_unpack_char() 513 else: 514 return '=' 515 516 def is_64_bit(self): 517 return self.value == MH_MAGIC_64 or self.value == MH_CIGAM_64 518 519 def __init__(self): 520 self.magic = Mach.Magic() 521 self.content = None 522 self.path = None 523 524 def extract(self, path, extractor): 525 self.path = path 526 self.unpack(extractor) 527 528 def parse(self, path): 529 self.path = path 530 try: 531 f = open(self.path) 532 file_extractor = file_extract.FileExtract(f, '=') 533 self.unpack(file_extractor) 534 # f.close() 535 except IOError as xxx_todo_changeme: 536 (errno, strerror) = xxx_todo_changeme.args 537 print("I/O error({0}): {1}".format(errno, strerror)) 538 except ValueError: 539 print("Could not convert data to an integer.") 540 except: 541 print("Unexpected error:", sys.exc_info()[0]) 542 raise 543 544 def compare(self, rhs): 545 self.content.compare(rhs.content) 546 547 def dump(self, options=None): 548 self.content.dump(options) 549 550 def dump_header(self, dump_description=True, options=None): 551 self.content.dump_header(dump_description, options) 552 553 def dump_load_commands(self, dump_description=True, options=None): 554 self.content.dump_load_commands(dump_description, options) 555 556 def dump_sections(self, dump_description=True, options=None): 557 self.content.dump_sections(dump_description, options) 558 559 def dump_section_contents(self, options): 560 self.content.dump_section_contents(options) 561 562 def dump_symtab(self, dump_description=True, options=None): 563 self.content.dump_symtab(dump_description, options) 564 565 def dump_symbol_names_matching_regex(self, regex, file=None): 566 self.content.dump_symbol_names_matching_regex(regex, file) 567 568 def description(self): 569 return self.content.description() 570 571 def unpack(self, data): 572 self.magic.unpack(data) 573 if self.magic.is_skinny_mach_file(): 574 self.content = Mach.Skinny(self.path) 575 elif self.magic.is_universal_mach_file(): 576 self.content = Mach.Universal(self.path) 577 else: 578 self.content = None 579 580 if self.content is not None: 581 self.content.unpack(data, self.magic) 582 583 def is_valid(self): 584 return self.content is not None 585 586 class Universal: 587 588 def __init__(self, path): 589 self.path = path 590 self.type = 'universal' 591 self.file_off = 0 592 self.magic = None 593 self.nfat_arch = 0 594 self.archs = list() 595 596 def description(self): 597 s = '%#8.8x: %s (' % (self.file_off, self.path) 598 archs_string = '' 599 for arch in self.archs: 600 if len(archs_string): 601 archs_string += ', ' 602 archs_string += '%s' % arch.arch 603 s += archs_string 604 s += ')' 605 return s 606 607 def unpack(self, data, magic=None): 608 self.file_off = data.tell() 609 if magic is None: 610 self.magic = Mach.Magic() 611 self.magic.unpack(data) 612 else: 613 self.magic = magic 614 self.file_off = self.file_off - 4 615 # Universal headers are always in big endian 616 data.set_byte_order('big') 617 self.nfat_arch = data.get_uint32() 618 for i in range(self.nfat_arch): 619 self.archs.append(Mach.Universal.ArchInfo()) 620 self.archs[i].unpack(data) 621 for i in range(self.nfat_arch): 622 self.archs[i].mach = Mach.Skinny(self.path) 623 data.seek(self.archs[i].offset, 0) 624 skinny_magic = Mach.Magic() 625 skinny_magic.unpack(data) 626 self.archs[i].mach.unpack(data, skinny_magic) 627 628 def compare(self, rhs): 629 print('error: comparing two universal files is not supported yet') 630 return False 631 632 def dump(self, options): 633 if options.dump_header: 634 print() 635 print("Universal Mach File: magic = %s, nfat_arch = %u" % (self.magic, self.nfat_arch)) 636 print() 637 if self.nfat_arch > 0: 638 if options.dump_header: 639 self.archs[0].dump_header(True, options) 640 for i in range(self.nfat_arch): 641 self.archs[i].dump_flat(options) 642 if options.dump_header: 643 print() 644 for i in range(self.nfat_arch): 645 self.archs[i].mach.dump(options) 646 647 def dump_header(self, dump_description=True, options=None): 648 if dump_description: 649 print(self.description()) 650 for i in range(self.nfat_arch): 651 self.archs[i].mach.dump_header(True, options) 652 print() 653 654 def dump_load_commands(self, dump_description=True, options=None): 655 if dump_description: 656 print(self.description()) 657 for i in range(self.nfat_arch): 658 self.archs[i].mach.dump_load_commands(True, options) 659 print() 660 661 def dump_sections(self, dump_description=True, options=None): 662 if dump_description: 663 print(self.description()) 664 for i in range(self.nfat_arch): 665 self.archs[i].mach.dump_sections(True, options) 666 print() 667 668 def dump_section_contents(self, options): 669 for i in range(self.nfat_arch): 670 self.archs[i].mach.dump_section_contents(options) 671 print() 672 673 def dump_symtab(self, dump_description=True, options=None): 674 if dump_description: 675 print(self.description()) 676 for i in range(self.nfat_arch): 677 self.archs[i].mach.dump_symtab(True, options) 678 print() 679 680 def dump_symbol_names_matching_regex(self, regex, file=None): 681 for i in range(self.nfat_arch): 682 self.archs[i].mach.dump_symbol_names_matching_regex( 683 regex, file) 684 685 class ArchInfo: 686 687 def __init__(self): 688 self.arch = Mach.Arch(0, 0) 689 self.offset = 0 690 self.size = 0 691 self.align = 0 692 self.mach = None 693 694 def unpack(self, data): 695 # Universal headers are always in big endian 696 data.set_byte_order('big') 697 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align = data.get_n_uint32( 698 5) 699 700 def dump_header(self, dump_description=True, options=None): 701 if options.verbose: 702 print("CPU SUBTYPE OFFSET SIZE ALIGN") 703 print("---------- ---------- ---------- ---------- ----------") 704 else: 705 print("ARCH FILEOFFSET FILESIZE ALIGN") 706 print("---------- ---------- ---------- ----------") 707 708 def dump_flat(self, options): 709 if options.verbose: 710 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % (self.arch.cpu, self.arch.sub, self.offset, self.size, self.align)) 711 else: 712 print("%-10s %#8.8x %#8.8x %#8.8x" % (self.arch, self.offset, self.size, self.align)) 713 714 def dump(self): 715 print(" cputype: %#8.8x" % self.arch.cpu) 716 print("cpusubtype: %#8.8x" % self.arch.sub) 717 print(" offset: %#8.8x" % self.offset) 718 print(" size: %#8.8x" % self.size) 719 print(" align: %#8.8x" % self.align) 720 721 def __str__(self): 722 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % ( 723 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align) 724 725 def __repr__(self): 726 return "Mach.Universal.ArchInfo: %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x" % ( 727 self.arch.cpu, self.arch.sub, self.offset, self.size, self.align) 728 729 class Flags: 730 731 def __init__(self, b): 732 self.bits = b 733 734 def __str__(self): 735 s = '' 736 if self.bits & MH_NOUNDEFS: 737 s += 'MH_NOUNDEFS | ' 738 if self.bits & MH_INCRLINK: 739 s += 'MH_INCRLINK | ' 740 if self.bits & MH_DYLDLINK: 741 s += 'MH_DYLDLINK | ' 742 if self.bits & MH_BINDATLOAD: 743 s += 'MH_BINDATLOAD | ' 744 if self.bits & MH_PREBOUND: 745 s += 'MH_PREBOUND | ' 746 if self.bits & MH_SPLIT_SEGS: 747 s += 'MH_SPLIT_SEGS | ' 748 if self.bits & MH_LAZY_INIT: 749 s += 'MH_LAZY_INIT | ' 750 if self.bits & MH_TWOLEVEL: 751 s += 'MH_TWOLEVEL | ' 752 if self.bits & MH_FORCE_FLAT: 753 s += 'MH_FORCE_FLAT | ' 754 if self.bits & MH_NOMULTIDEFS: 755 s += 'MH_NOMULTIDEFS | ' 756 if self.bits & MH_NOFIXPREBINDING: 757 s += 'MH_NOFIXPREBINDING | ' 758 if self.bits & MH_PREBINDABLE: 759 s += 'MH_PREBINDABLE | ' 760 if self.bits & MH_ALLMODSBOUND: 761 s += 'MH_ALLMODSBOUND | ' 762 if self.bits & MH_SUBSECTIONS_VIA_SYMBOLS: 763 s += 'MH_SUBSECTIONS_VIA_SYMBOLS | ' 764 if self.bits & MH_CANONICAL: 765 s += 'MH_CANONICAL | ' 766 if self.bits & MH_WEAK_DEFINES: 767 s += 'MH_WEAK_DEFINES | ' 768 if self.bits & MH_BINDS_TO_WEAK: 769 s += 'MH_BINDS_TO_WEAK | ' 770 if self.bits & MH_ALLOW_STACK_EXECUTION: 771 s += 'MH_ALLOW_STACK_EXECUTION | ' 772 if self.bits & MH_ROOT_SAFE: 773 s += 'MH_ROOT_SAFE | ' 774 if self.bits & MH_SETUID_SAFE: 775 s += 'MH_SETUID_SAFE | ' 776 if self.bits & MH_NO_REEXPORTED_DYLIBS: 777 s += 'MH_NO_REEXPORTED_DYLIBS | ' 778 if self.bits & MH_PIE: 779 s += 'MH_PIE | ' 780 if self.bits & MH_DEAD_STRIPPABLE_DYLIB: 781 s += 'MH_DEAD_STRIPPABLE_DYLIB | ' 782 if self.bits & MH_HAS_TLV_DESCRIPTORS: 783 s += 'MH_HAS_TLV_DESCRIPTORS | ' 784 if self.bits & MH_NO_HEAP_EXECUTION: 785 s += 'MH_NO_HEAP_EXECUTION | ' 786 # Strip the trailing " |" if we have any flags 787 if len(s) > 0: 788 s = s[0:-2] 789 return s 790 791 class FileType(dict_utils.Enum): 792 793 enum = { 794 'MH_OBJECT': MH_OBJECT, 795 'MH_EXECUTE': MH_EXECUTE, 796 'MH_FVMLIB': MH_FVMLIB, 797 'MH_CORE': MH_CORE, 798 'MH_PRELOAD': MH_PRELOAD, 799 'MH_DYLIB': MH_DYLIB, 800 'MH_DYLINKER': MH_DYLINKER, 801 'MH_BUNDLE': MH_BUNDLE, 802 'MH_DYLIB_STUB': MH_DYLIB_STUB, 803 'MH_DSYM': MH_DSYM, 804 'MH_KEXT_BUNDLE': MH_KEXT_BUNDLE 805 } 806 807 def __init__(self, initial_value=0): 808 dict_utils.Enum.__init__(self, initial_value, self.enum) 809 810 class Skinny: 811 812 def __init__(self, path): 813 self.path = path 814 self.type = 'skinny' 815 self.data = None 816 self.file_off = 0 817 self.magic = 0 818 self.arch = Mach.Arch(0, 0) 819 self.filetype = Mach.FileType(0) 820 self.ncmds = 0 821 self.sizeofcmds = 0 822 self.flags = Mach.Flags(0) 823 self.uuid = None 824 self.commands = list() 825 self.segments = list() 826 self.sections = list() 827 self.symbols = list() 828 self.sections.append(Mach.Section()) 829 830 def description(self): 831 return '%#8.8x: %s (%s)' % (self.file_off, self.path, self.arch) 832 833 def unpack(self, data, magic=None): 834 self.data = data 835 self.file_off = data.tell() 836 if magic is None: 837 self.magic = Mach.Magic() 838 self.magic.unpack(data) 839 else: 840 self.magic = magic 841 self.file_off = self.file_off - 4 842 data.set_byte_order(self.magic.get_byte_order()) 843 self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, bits = data.get_n_uint32( 844 6) 845 self.flags.bits = bits 846 847 if self.is_64_bit(): 848 data.get_uint32() # Skip reserved word in mach_header_64 849 850 for i in range(0, self.ncmds): 851 lc = self.unpack_load_command(data) 852 self.commands.append(lc) 853 854 def get_data(self): 855 if self.data: 856 self.data.set_byte_order(self.magic.get_byte_order()) 857 return self.data 858 return None 859 860 def unpack_load_command(self, data): 861 lc = Mach.LoadCommand() 862 lc.unpack(self, data) 863 lc_command = lc.command.get_enum_value() 864 if (lc_command == LC_SEGMENT or 865 lc_command == LC_SEGMENT_64): 866 lc = Mach.SegmentLoadCommand(lc) 867 lc.unpack(self, data) 868 elif (lc_command == LC_LOAD_DYLIB or 869 lc_command == LC_ID_DYLIB or 870 lc_command == LC_LOAD_WEAK_DYLIB or 871 lc_command == LC_REEXPORT_DYLIB): 872 lc = Mach.DylibLoadCommand(lc) 873 lc.unpack(self, data) 874 elif (lc_command == LC_LOAD_DYLINKER or 875 lc_command == LC_SUB_FRAMEWORK or 876 lc_command == LC_SUB_CLIENT or 877 lc_command == LC_SUB_UMBRELLA or 878 lc_command == LC_SUB_LIBRARY or 879 lc_command == LC_ID_DYLINKER or 880 lc_command == LC_RPATH): 881 lc = Mach.LoadDYLDLoadCommand(lc) 882 lc.unpack(self, data) 883 elif (lc_command == LC_DYLD_INFO_ONLY): 884 lc = Mach.DYLDInfoOnlyLoadCommand(lc) 885 lc.unpack(self, data) 886 elif (lc_command == LC_SYMTAB): 887 lc = Mach.SymtabLoadCommand(lc) 888 lc.unpack(self, data) 889 elif (lc_command == LC_DYSYMTAB): 890 lc = Mach.DYLDSymtabLoadCommand(lc) 891 lc.unpack(self, data) 892 elif (lc_command == LC_UUID): 893 lc = Mach.UUIDLoadCommand(lc) 894 lc.unpack(self, data) 895 elif (lc_command == LC_CODE_SIGNATURE or 896 lc_command == LC_SEGMENT_SPLIT_INFO or 897 lc_command == LC_FUNCTION_STARTS): 898 lc = Mach.DataBlobLoadCommand(lc) 899 lc.unpack(self, data) 900 elif (lc_command == LC_UNIXTHREAD): 901 lc = Mach.UnixThreadLoadCommand(lc) 902 lc.unpack(self, data) 903 elif (lc_command == LC_ENCRYPTION_INFO): 904 lc = Mach.EncryptionInfoLoadCommand(lc) 905 lc.unpack(self, data) 906 lc.skip(data) 907 return lc 908 909 def compare(self, rhs): 910 print("\nComparing:") 911 print("a) %s %s" % (self.arch, self.path)) 912 print("b) %s %s" % (rhs.arch, rhs.path)) 913 result = True 914 if self.type == rhs.type: 915 for lhs_section in self.sections[1:]: 916 rhs_section = rhs.get_section_by_section(lhs_section) 917 if rhs_section: 918 print('comparing %s.%s...' % (lhs_section.segname, lhs_section.sectname), end=' ') 919 sys.stdout.flush() 920 lhs_data = lhs_section.get_contents(self) 921 rhs_data = rhs_section.get_contents(rhs) 922 if lhs_data and rhs_data: 923 if lhs_data == rhs_data: 924 print('ok') 925 else: 926 lhs_data_len = len(lhs_data) 927 rhs_data_len = len(rhs_data) 928 # if lhs_data_len < rhs_data_len: 929 # if lhs_data == rhs_data[0:lhs_data_len]: 930 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len) 931 # else: 932 # # TODO: check padding 933 # result = False 934 # elif lhs_data_len > rhs_data_len: 935 # if lhs_data[0:rhs_data_len] == rhs_data: 936 # print 'section data for %s matches the first %u bytes' % (lhs_section.sectname, lhs_data_len) 937 # else: 938 # # TODO: check padding 939 # result = False 940 # else: 941 result = False 942 print('error: sections differ') 943 # print 'a) %s' % (lhs_section) 944 # dump_hex_byte_string_diff(0, lhs_data, rhs_data) 945 # print 'b) %s' % (rhs_section) 946 # dump_hex_byte_string_diff(0, rhs_data, lhs_data) 947 elif lhs_data and not rhs_data: 948 print('error: section data missing from b:') 949 print('a) %s' % (lhs_section)) 950 print('b) %s' % (rhs_section)) 951 result = False 952 elif not lhs_data and rhs_data: 953 print('error: section data missing from a:') 954 print('a) %s' % (lhs_section)) 955 print('b) %s' % (rhs_section)) 956 result = False 957 elif lhs_section.offset or rhs_section.offset: 958 print('error: section data missing for both a and b:') 959 print('a) %s' % (lhs_section)) 960 print('b) %s' % (rhs_section)) 961 result = False 962 else: 963 print('ok') 964 else: 965 result = False 966 print('error: section %s is missing in %s' % (lhs_section.sectname, rhs.path)) 967 else: 968 print('error: comparing a %s mach-o file with a %s mach-o file is not supported' % (self.type, rhs.type)) 969 result = False 970 if not result: 971 print('error: mach files differ') 972 return result 973 974 def dump_header(self, dump_description=True, options=None): 975 if options.verbose: 976 print("MAGIC CPU SUBTYPE FILETYPE NUM CMDS SIZE CMDS FLAGS") 977 print("---------- ---------- ---------- ---------- -------- ---------- ----------") 978 else: 979 print("MAGIC ARCH FILETYPE NUM CMDS SIZE CMDS FLAGS") 980 print("------------ ---------- -------------- -------- ---------- ----------") 981 982 def dump_flat(self, options): 983 if options.verbose: 984 print("%#8.8x %#8.8x %#8.8x %#8.8x %#8u %#8.8x %#8.8x" % (self.magic, self.arch.cpu, self.arch.sub, self.filetype.value, self.ncmds, self.sizeofcmds, self.flags.bits)) 985 else: 986 print("%-12s %-10s %-14s %#8u %#8.8x %s" % (self.magic, self.arch, self.filetype, self.ncmds, self.sizeofcmds, self.flags)) 987 988 def dump(self, options): 989 if options.dump_header: 990 self.dump_header(True, options) 991 if options.dump_load_commands: 992 self.dump_load_commands(False, options) 993 if options.dump_sections: 994 self.dump_sections(False, options) 995 if options.section_names: 996 self.dump_section_contents(options) 997 if options.dump_symtab: 998 self.get_symtab() 999 if len(self.symbols): 1000 self.dump_sections(False, options) 1001 else: 1002 print("No symbols") 1003 if options.find_mangled: 1004 self.dump_symbol_names_matching_regex(re.compile('^_?_Z')) 1005 1006 def dump_header(self, dump_description=True, options=None): 1007 if dump_description: 1008 print(self.description()) 1009 print("Mach Header") 1010 print(" magic: %#8.8x %s" % (self.magic.value, self.magic)) 1011 print(" cputype: %#8.8x %s" % (self.arch.cpu, self.arch)) 1012 print(" cpusubtype: %#8.8x" % self.arch.sub) 1013 print(" filetype: %#8.8x %s" % (self.filetype.get_enum_value(), self.filetype.get_enum_name())) 1014 print(" ncmds: %#8.8x %u" % (self.ncmds, self.ncmds)) 1015 print(" sizeofcmds: %#8.8x" % self.sizeofcmds) 1016 print(" flags: %#8.8x %s" % (self.flags.bits, self.flags)) 1017 1018 def dump_load_commands(self, dump_description=True, options=None): 1019 if dump_description: 1020 print(self.description()) 1021 for lc in self.commands: 1022 print(lc) 1023 1024 def get_section_by_name(self, name): 1025 for section in self.sections: 1026 if section.sectname and section.sectname == name: 1027 return section 1028 return None 1029 1030 def get_section_by_section(self, other_section): 1031 for section in self.sections: 1032 if section.sectname == other_section.sectname and section.segname == other_section.segname: 1033 return section 1034 return None 1035 1036 def dump_sections(self, dump_description=True, options=None): 1037 if dump_description: 1038 print(self.description()) 1039 num_sections = len(self.sections) 1040 if num_sections > 1: 1041 self.sections[1].dump_header() 1042 for sect_idx in range(1, num_sections): 1043 print("%s" % self.sections[sect_idx]) 1044 1045 def dump_section_contents(self, options): 1046 saved_section_to_disk = False 1047 for sectname in options.section_names: 1048 section = self.get_section_by_name(sectname) 1049 if section: 1050 sect_bytes = section.get_contents(self) 1051 if options.outfile: 1052 if not saved_section_to_disk: 1053 outfile = open(options.outfile, 'w') 1054 if options.extract_modules: 1055 # print "Extracting modules from mach file..." 1056 data = file_extract.FileExtract( 1057 io.BytesIO(sect_bytes), self.data.byte_order) 1058 version = data.get_uint32() 1059 num_modules = data.get_uint32() 1060 # print "version = %u, num_modules = %u" % 1061 # (version, num_modules) 1062 for i in range(num_modules): 1063 data_offset = data.get_uint64() 1064 data_size = data.get_uint64() 1065 name_offset = data.get_uint32() 1066 language = data.get_uint32() 1067 flags = data.get_uint32() 1068 data.seek(name_offset) 1069 module_name = data.get_c_string() 1070 # print "module[%u] data_offset = %#16.16x, 1071 # data_size = %#16.16x, name_offset = 1072 # %#16.16x (%s), language = %u, flags = 1073 # %#x" % (i, data_offset, data_size, 1074 # name_offset, module_name, language, 1075 # flags) 1076 data.seek(data_offset) 1077 outfile.write(data.read_size(data_size)) 1078 else: 1079 print("Saving section %s to '%s'" % (sectname, options.outfile)) 1080 outfile.write(sect_bytes) 1081 outfile.close() 1082 saved_section_to_disk = True 1083 else: 1084 print("error: you can only save a single section to disk at a time, skipping section '%s'" % (sectname)) 1085 else: 1086 print('section %s:\n' % (sectname)) 1087 section.dump_header() 1088 print('%s\n' % (section)) 1089 dump_memory(0, sect_bytes, options.max_count, 16) 1090 else: 1091 print('error: no section named "%s" was found' % (sectname)) 1092 1093 def get_segment(self, segname): 1094 if len(self.segments) == 1 and self.segments[0].segname == '': 1095 return self.segments[0] 1096 for segment in self.segments: 1097 if segment.segname == segname: 1098 return segment 1099 return None 1100 1101 def get_first_load_command(self, lc_enum_value): 1102 for lc in self.commands: 1103 if lc.command.value == lc_enum_value: 1104 return lc 1105 return None 1106 1107 def get_symtab(self): 1108 if self.data and not self.symbols: 1109 lc_symtab = self.get_first_load_command(LC_SYMTAB) 1110 if lc_symtab: 1111 symtab_offset = self.file_off 1112 if self.data.is_in_memory(): 1113 linkedit_segment = self.get_segment('__LINKEDIT') 1114 if linkedit_segment: 1115 linkedit_vmaddr = linkedit_segment.vmaddr 1116 linkedit_fileoff = linkedit_segment.fileoff 1117 symtab_offset = linkedit_vmaddr + lc_symtab.symoff - linkedit_fileoff 1118 symtab_offset = linkedit_vmaddr + lc_symtab.stroff - linkedit_fileoff 1119 else: 1120 symtab_offset += lc_symtab.symoff 1121 1122 self.data.seek(symtab_offset) 1123 is_64 = self.is_64_bit() 1124 for i in range(lc_symtab.nsyms): 1125 nlist = Mach.NList() 1126 nlist.unpack(self, self.data, lc_symtab) 1127 self.symbols.append(nlist) 1128 else: 1129 print("no LC_SYMTAB") 1130 1131 def dump_symtab(self, dump_description=True, options=None): 1132 self.get_symtab() 1133 if dump_description: 1134 print(self.description()) 1135 for i, symbol in enumerate(self.symbols): 1136 print('[%5u] %s' % (i, symbol)) 1137 1138 def dump_symbol_names_matching_regex(self, regex, file=None): 1139 self.get_symtab() 1140 for symbol in self.symbols: 1141 if symbol.name and regex.search(symbol.name): 1142 print(symbol.name) 1143 if file: 1144 file.write('%s\n' % (symbol.name)) 1145 1146 def is_64_bit(self): 1147 return self.magic.is_64_bit() 1148 1149 class LoadCommand: 1150 1151 class Command(dict_utils.Enum): 1152 enum = { 1153 'LC_SEGMENT': LC_SEGMENT, 1154 'LC_SYMTAB': LC_SYMTAB, 1155 'LC_SYMSEG': LC_SYMSEG, 1156 'LC_THREAD': LC_THREAD, 1157 'LC_UNIXTHREAD': LC_UNIXTHREAD, 1158 'LC_LOADFVMLIB': LC_LOADFVMLIB, 1159 'LC_IDFVMLIB': LC_IDFVMLIB, 1160 'LC_IDENT': LC_IDENT, 1161 'LC_FVMFILE': LC_FVMFILE, 1162 'LC_PREPAGE': LC_PREPAGE, 1163 'LC_DYSYMTAB': LC_DYSYMTAB, 1164 'LC_LOAD_DYLIB': LC_LOAD_DYLIB, 1165 'LC_ID_DYLIB': LC_ID_DYLIB, 1166 'LC_LOAD_DYLINKER': LC_LOAD_DYLINKER, 1167 'LC_ID_DYLINKER': LC_ID_DYLINKER, 1168 'LC_PREBOUND_DYLIB': LC_PREBOUND_DYLIB, 1169 'LC_ROUTINES': LC_ROUTINES, 1170 'LC_SUB_FRAMEWORK': LC_SUB_FRAMEWORK, 1171 'LC_SUB_UMBRELLA': LC_SUB_UMBRELLA, 1172 'LC_SUB_CLIENT': LC_SUB_CLIENT, 1173 'LC_SUB_LIBRARY': LC_SUB_LIBRARY, 1174 'LC_TWOLEVEL_HINTS': LC_TWOLEVEL_HINTS, 1175 'LC_PREBIND_CKSUM': LC_PREBIND_CKSUM, 1176 'LC_LOAD_WEAK_DYLIB': LC_LOAD_WEAK_DYLIB, 1177 'LC_SEGMENT_64': LC_SEGMENT_64, 1178 'LC_ROUTINES_64': LC_ROUTINES_64, 1179 'LC_UUID': LC_UUID, 1180 'LC_RPATH': LC_RPATH, 1181 'LC_CODE_SIGNATURE': LC_CODE_SIGNATURE, 1182 'LC_SEGMENT_SPLIT_INFO': LC_SEGMENT_SPLIT_INFO, 1183 'LC_REEXPORT_DYLIB': LC_REEXPORT_DYLIB, 1184 'LC_LAZY_LOAD_DYLIB': LC_LAZY_LOAD_DYLIB, 1185 'LC_ENCRYPTION_INFO': LC_ENCRYPTION_INFO, 1186 'LC_DYLD_INFO': LC_DYLD_INFO, 1187 'LC_DYLD_INFO_ONLY': LC_DYLD_INFO_ONLY, 1188 'LC_LOAD_UPWARD_DYLIB': LC_LOAD_UPWARD_DYLIB, 1189 'LC_VERSION_MIN_MACOSX': LC_VERSION_MIN_MACOSX, 1190 'LC_VERSION_MIN_IPHONEOS': LC_VERSION_MIN_IPHONEOS, 1191 'LC_FUNCTION_STARTS': LC_FUNCTION_STARTS, 1192 'LC_DYLD_ENVIRONMENT': LC_DYLD_ENVIRONMENT 1193 } 1194 1195 def __init__(self, initial_value=0): 1196 dict_utils.Enum.__init__(self, initial_value, self.enum) 1197 1198 def __init__(self, c=None, l=0, o=0): 1199 if c is not None: 1200 self.command = c 1201 else: 1202 self.command = Mach.LoadCommand.Command(0) 1203 self.length = l 1204 self.file_off = o 1205 1206 def unpack(self, mach_file, data): 1207 self.file_off = data.tell() 1208 self.command.value, self.length = data.get_n_uint32(2) 1209 1210 def skip(self, data): 1211 data.seek(self.file_off + self.length, 0) 1212 1213 def __str__(self): 1214 lc_name = self.command.get_enum_name() 1215 return '%#8.8x: <%#4.4x> %-24s' % (self.file_off, 1216 self.length, lc_name) 1217 1218 class Section: 1219 1220 def __init__(self): 1221 self.index = 0 1222 self.is_64 = False 1223 self.sectname = None 1224 self.segname = None 1225 self.addr = 0 1226 self.size = 0 1227 self.offset = 0 1228 self.align = 0 1229 self.reloff = 0 1230 self.nreloc = 0 1231 self.flags = 0 1232 self.reserved1 = 0 1233 self.reserved2 = 0 1234 self.reserved3 = 0 1235 1236 def unpack(self, is_64, data): 1237 self.is_64 = is_64 1238 self.sectname = data.get_fixed_length_c_string(16, '', True) 1239 self.segname = data.get_fixed_length_c_string(16, '', True) 1240 if self.is_64: 1241 self.addr, self.size = data.get_n_uint64(2) 1242 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3 = data.get_n_uint32( 1243 8) 1244 else: 1245 self.addr, self.size = data.get_n_uint32(2) 1246 self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2 = data.get_n_uint32( 1247 7) 1248 1249 def dump_header(self): 1250 if self.is_64: 1251 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 RESERVED3 NAME") 1252 print("===== ------------------ ------------------ ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------") 1253 else: 1254 print("INDEX ADDRESS SIZE OFFSET ALIGN RELOFF NRELOC FLAGS RESERVED1 RESERVED2 NAME") 1255 print("===== ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------------------") 1256 1257 def __str__(self): 1258 if self.is_64: 1259 return "[%3u] %#16.16x %#16.16x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % ( 1260 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.reserved3, self.segname, self.sectname) 1261 else: 1262 return "[%3u] %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %#8.8x %s.%s" % ( 1263 self.index, self.addr, self.size, self.offset, self.align, self.reloff, self.nreloc, self.flags, self.reserved1, self.reserved2, self.segname, self.sectname) 1264 1265 def get_contents(self, mach_file): 1266 '''Get the section contents as a python string''' 1267 if self.size > 0 and mach_file.get_segment( 1268 self.segname).filesize > 0: 1269 data = mach_file.get_data() 1270 if data: 1271 section_data_offset = mach_file.file_off + self.offset 1272 # print '%s.%s is at offset 0x%x with size 0x%x' % 1273 # (self.segname, self.sectname, section_data_offset, 1274 # self.size) 1275 data.push_offset_and_seek(section_data_offset) 1276 bytes = data.read_size(self.size) 1277 data.pop_offset_and_seek() 1278 return bytes 1279 return None 1280 1281 class DylibLoadCommand(LoadCommand): 1282 1283 def __init__(self, lc): 1284 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1285 self.name = None 1286 self.timestamp = 0 1287 self.current_version = 0 1288 self.compatibility_version = 0 1289 1290 def unpack(self, mach_file, data): 1291 byte_order_char = mach_file.magic.get_byte_order() 1292 name_offset, self.timestamp, self.current_version, self.compatibility_version = data.get_n_uint32( 1293 4) 1294 data.seek(self.file_off + name_offset, 0) 1295 self.name = data.get_fixed_length_c_string(self.length - 24) 1296 1297 def __str__(self): 1298 s = Mach.LoadCommand.__str__(self) 1299 s += "%#8.8x %#8.8x %#8.8x " % (self.timestamp, 1300 self.current_version, 1301 self.compatibility_version) 1302 s += self.name 1303 return s 1304 1305 class LoadDYLDLoadCommand(LoadCommand): 1306 1307 def __init__(self, lc): 1308 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1309 self.name = None 1310 1311 def unpack(self, mach_file, data): 1312 data.get_uint32() 1313 self.name = data.get_fixed_length_c_string(self.length - 12) 1314 1315 def __str__(self): 1316 s = Mach.LoadCommand.__str__(self) 1317 s += "%s" % self.name 1318 return s 1319 1320 class UnixThreadLoadCommand(LoadCommand): 1321 1322 class ThreadState: 1323 1324 def __init__(self): 1325 self.flavor = 0 1326 self.count = 0 1327 self.register_values = list() 1328 1329 def unpack(self, data): 1330 self.flavor, self.count = data.get_n_uint32(2) 1331 self.register_values = data.get_n_uint32(self.count) 1332 1333 def __str__(self): 1334 s = "flavor = %u, count = %u, regs =" % ( 1335 self.flavor, self.count) 1336 i = 0 1337 for register_value in self.register_values: 1338 if i % 8 == 0: 1339 s += "\n " 1340 s += " %#8.8x" % register_value 1341 i += 1 1342 return s 1343 1344 def __init__(self, lc): 1345 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1346 self.reg_sets = list() 1347 1348 def unpack(self, mach_file, data): 1349 reg_set = Mach.UnixThreadLoadCommand.ThreadState() 1350 reg_set.unpack(data) 1351 self.reg_sets.append(reg_set) 1352 1353 def __str__(self): 1354 s = Mach.LoadCommand.__str__(self) 1355 for reg_set in self.reg_sets: 1356 s += "%s" % reg_set 1357 return s 1358 1359 class DYLDInfoOnlyLoadCommand(LoadCommand): 1360 1361 def __init__(self, lc): 1362 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1363 self.rebase_off = 0 1364 self.rebase_size = 0 1365 self.bind_off = 0 1366 self.bind_size = 0 1367 self.weak_bind_off = 0 1368 self.weak_bind_size = 0 1369 self.lazy_bind_off = 0 1370 self.lazy_bind_size = 0 1371 self.export_off = 0 1372 self.export_size = 0 1373 1374 def unpack(self, mach_file, data): 1375 byte_order_char = mach_file.magic.get_byte_order() 1376 self.rebase_off, self.rebase_size, self.bind_off, self.bind_size, self.weak_bind_off, self.weak_bind_size, self.lazy_bind_off, self.lazy_bind_size, self.export_off, self.export_size = data.get_n_uint32( 1377 10) 1378 1379 def __str__(self): 1380 s = Mach.LoadCommand.__str__(self) 1381 s += "rebase_off = %#8.8x, rebase_size = %u, " % ( 1382 self.rebase_off, self.rebase_size) 1383 s += "bind_off = %#8.8x, bind_size = %u, " % ( 1384 self.bind_off, self.bind_size) 1385 s += "weak_bind_off = %#8.8x, weak_bind_size = %u, " % ( 1386 self.weak_bind_off, self.weak_bind_size) 1387 s += "lazy_bind_off = %#8.8x, lazy_bind_size = %u, " % ( 1388 self.lazy_bind_off, self.lazy_bind_size) 1389 s += "export_off = %#8.8x, export_size = %u, " % ( 1390 self.export_off, self.export_size) 1391 return s 1392 1393 class DYLDSymtabLoadCommand(LoadCommand): 1394 1395 def __init__(self, lc): 1396 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1397 self.ilocalsym = 0 1398 self.nlocalsym = 0 1399 self.iextdefsym = 0 1400 self.nextdefsym = 0 1401 self.iundefsym = 0 1402 self.nundefsym = 0 1403 self.tocoff = 0 1404 self.ntoc = 0 1405 self.modtaboff = 0 1406 self.nmodtab = 0 1407 self.extrefsymoff = 0 1408 self.nextrefsyms = 0 1409 self.indirectsymoff = 0 1410 self.nindirectsyms = 0 1411 self.extreloff = 0 1412 self.nextrel = 0 1413 self.locreloff = 0 1414 self.nlocrel = 0 1415 1416 def unpack(self, mach_file, data): 1417 byte_order_char = mach_file.magic.get_byte_order() 1418 self.ilocalsym, self.nlocalsym, self.iextdefsym, self.nextdefsym, self.iundefsym, self.nundefsym, self.tocoff, self.ntoc, self.modtaboff, self.nmodtab, self.extrefsymoff, self.nextrefsyms, self.indirectsymoff, self.nindirectsyms, self.extreloff, self.nextrel, self.locreloff, self.nlocrel = data.get_n_uint32( 1419 18) 1420 1421 def __str__(self): 1422 s = Mach.LoadCommand.__str__(self) 1423 # s += "ilocalsym = %u, nlocalsym = %u, " % (self.ilocalsym, self.nlocalsym) 1424 # s += "iextdefsym = %u, nextdefsym = %u, " % (self.iextdefsym, self.nextdefsym) 1425 # s += "iundefsym %u, nundefsym = %u, " % (self.iundefsym, self.nundefsym) 1426 # s += "tocoff = %#8.8x, ntoc = %u, " % (self.tocoff, self.ntoc) 1427 # s += "modtaboff = %#8.8x, nmodtab = %u, " % (self.modtaboff, self.nmodtab) 1428 # s += "extrefsymoff = %#8.8x, nextrefsyms = %u, " % (self.extrefsymoff, self.nextrefsyms) 1429 # s += "indirectsymoff = %#8.8x, nindirectsyms = %u, " % (self.indirectsymoff, self.nindirectsyms) 1430 # s += "extreloff = %#8.8x, nextrel = %u, " % (self.extreloff, self.nextrel) 1431 # s += "locreloff = %#8.8x, nlocrel = %u" % (self.locreloff, 1432 # self.nlocrel) 1433 s += "ilocalsym = %-10u, nlocalsym = %u\n" % ( 1434 self.ilocalsym, self.nlocalsym) 1435 s += " iextdefsym = %-10u, nextdefsym = %u\n" % ( 1436 self.iextdefsym, self.nextdefsym) 1437 s += " iundefsym = %-10u, nundefsym = %u\n" % ( 1438 self.iundefsym, self.nundefsym) 1439 s += " tocoff = %#8.8x, ntoc = %u\n" % ( 1440 self.tocoff, self.ntoc) 1441 s += " modtaboff = %#8.8x, nmodtab = %u\n" % ( 1442 self.modtaboff, self.nmodtab) 1443 s += " extrefsymoff = %#8.8x, nextrefsyms = %u\n" % ( 1444 self.extrefsymoff, self.nextrefsyms) 1445 s += " indirectsymoff = %#8.8x, nindirectsyms = %u\n" % ( 1446 self.indirectsymoff, self.nindirectsyms) 1447 s += " extreloff = %#8.8x, nextrel = %u\n" % ( 1448 self.extreloff, self.nextrel) 1449 s += " locreloff = %#8.8x, nlocrel = %u" % ( 1450 self.locreloff, self.nlocrel) 1451 return s 1452 1453 class SymtabLoadCommand(LoadCommand): 1454 1455 def __init__(self, lc): 1456 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1457 self.symoff = 0 1458 self.nsyms = 0 1459 self.stroff = 0 1460 self.strsize = 0 1461 1462 def unpack(self, mach_file, data): 1463 byte_order_char = mach_file.magic.get_byte_order() 1464 self.symoff, self.nsyms, self.stroff, self.strsize = data.get_n_uint32( 1465 4) 1466 1467 def __str__(self): 1468 s = Mach.LoadCommand.__str__(self) 1469 s += "symoff = %#8.8x, nsyms = %u, stroff = %#8.8x, strsize = %u" % ( 1470 self.symoff, self.nsyms, self.stroff, self.strsize) 1471 return s 1472 1473 class UUIDLoadCommand(LoadCommand): 1474 1475 def __init__(self, lc): 1476 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1477 self.uuid = None 1478 1479 def unpack(self, mach_file, data): 1480 uuid_data = data.get_n_uint8(16) 1481 uuid_str = '' 1482 for byte in uuid_data: 1483 uuid_str += '%2.2x' % byte 1484 self.uuid = uuid.UUID(uuid_str) 1485 mach_file.uuid = self.uuid 1486 1487 def __str__(self): 1488 s = Mach.LoadCommand.__str__(self) 1489 s += self.uuid.__str__() 1490 return s 1491 1492 class DataBlobLoadCommand(LoadCommand): 1493 1494 def __init__(self, lc): 1495 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1496 self.dataoff = 0 1497 self.datasize = 0 1498 1499 def unpack(self, mach_file, data): 1500 byte_order_char = mach_file.magic.get_byte_order() 1501 self.dataoff, self.datasize = data.get_n_uint32(2) 1502 1503 def __str__(self): 1504 s = Mach.LoadCommand.__str__(self) 1505 s += "dataoff = %#8.8x, datasize = %u" % ( 1506 self.dataoff, self.datasize) 1507 return s 1508 1509 class EncryptionInfoLoadCommand(LoadCommand): 1510 1511 def __init__(self, lc): 1512 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1513 self.cryptoff = 0 1514 self.cryptsize = 0 1515 self.cryptid = 0 1516 1517 def unpack(self, mach_file, data): 1518 byte_order_char = mach_file.magic.get_byte_order() 1519 self.cryptoff, self.cryptsize, self.cryptid = data.get_n_uint32(3) 1520 1521 def __str__(self): 1522 s = Mach.LoadCommand.__str__(self) 1523 s += "file-range = [%#8.8x - %#8.8x), cryptsize = %u, cryptid = %u" % ( 1524 self.cryptoff, self.cryptoff + self.cryptsize, self.cryptsize, self.cryptid) 1525 return s 1526 1527 class SegmentLoadCommand(LoadCommand): 1528 1529 def __init__(self, lc): 1530 Mach.LoadCommand.__init__(self, lc.command, lc.length, lc.file_off) 1531 self.segname = None 1532 self.vmaddr = 0 1533 self.vmsize = 0 1534 self.fileoff = 0 1535 self.filesize = 0 1536 self.maxprot = 0 1537 self.initprot = 0 1538 self.nsects = 0 1539 self.flags = 0 1540 1541 def unpack(self, mach_file, data): 1542 is_64 = self.command.get_enum_value() == LC_SEGMENT_64 1543 self.segname = data.get_fixed_length_c_string(16, '', True) 1544 if is_64: 1545 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint64( 1546 4) 1547 else: 1548 self.vmaddr, self.vmsize, self.fileoff, self.filesize = data.get_n_uint32( 1549 4) 1550 self.maxprot, self.initprot, self.nsects, self.flags = data.get_n_uint32( 1551 4) 1552 mach_file.segments.append(self) 1553 for i in range(self.nsects): 1554 section = Mach.Section() 1555 section.unpack(is_64, data) 1556 section.index = len(mach_file.sections) 1557 mach_file.sections.append(section) 1558 1559 def __str__(self): 1560 s = Mach.LoadCommand.__str__(self) 1561 if self.command.get_enum_value() == LC_SEGMENT: 1562 s += "%#8.8x %#8.8x %#8.8x %#8.8x " % ( 1563 self.vmaddr, self.vmsize, self.fileoff, self.filesize) 1564 else: 1565 s += "%#16.16x %#16.16x %#16.16x %#16.16x " % ( 1566 self.vmaddr, self.vmsize, self.fileoff, self.filesize) 1567 s += "%s %s %3u %#8.8x" % (vm_prot_names[self.maxprot], vm_prot_names[ 1568 self.initprot], self.nsects, self.flags) 1569 s += ' ' + self.segname 1570 return s 1571 1572 class NList: 1573 1574 class Type: 1575 1576 class Stab(dict_utils.Enum): 1577 enum = { 1578 'N_GSYM': N_GSYM, 1579 'N_FNAME': N_FNAME, 1580 'N_FUN': N_FUN, 1581 'N_STSYM': N_STSYM, 1582 'N_LCSYM': N_LCSYM, 1583 'N_BNSYM': N_BNSYM, 1584 'N_OPT': N_OPT, 1585 'N_RSYM': N_RSYM, 1586 'N_SLINE': N_SLINE, 1587 'N_ENSYM': N_ENSYM, 1588 'N_SSYM': N_SSYM, 1589 'N_SO': N_SO, 1590 'N_OSO': N_OSO, 1591 'N_LSYM': N_LSYM, 1592 'N_BINCL': N_BINCL, 1593 'N_SOL': N_SOL, 1594 'N_PARAMS': N_PARAMS, 1595 'N_VERSION': N_VERSION, 1596 'N_OLEVEL': N_OLEVEL, 1597 'N_PSYM': N_PSYM, 1598 'N_EINCL': N_EINCL, 1599 'N_ENTRY': N_ENTRY, 1600 'N_LBRAC': N_LBRAC, 1601 'N_EXCL': N_EXCL, 1602 'N_RBRAC': N_RBRAC, 1603 'N_BCOMM': N_BCOMM, 1604 'N_ECOMM': N_ECOMM, 1605 'N_ECOML': N_ECOML, 1606 'N_LENG': N_LENG 1607 } 1608 1609 def __init__(self, magic=0): 1610 dict_utils.Enum.__init__(self, magic, self.enum) 1611 1612 def __init__(self, t=0): 1613 self.value = t 1614 1615 def __str__(self): 1616 n_type = self.value 1617 if n_type & N_STAB: 1618 stab = Mach.NList.Type.Stab(self.value) 1619 return '%s' % stab 1620 else: 1621 type = self.value & N_TYPE 1622 type_str = '' 1623 if type == N_UNDF: 1624 type_str = 'N_UNDF' 1625 elif type == N_ABS: 1626 type_str = 'N_ABS ' 1627 elif type == N_SECT: 1628 type_str = 'N_SECT' 1629 elif type == N_PBUD: 1630 type_str = 'N_PBUD' 1631 elif type == N_INDR: 1632 type_str = 'N_INDR' 1633 else: 1634 type_str = "??? (%#2.2x)" % type 1635 if n_type & N_PEXT: 1636 type_str += ' | PEXT' 1637 if n_type & N_EXT: 1638 type_str += ' | EXT ' 1639 return type_str 1640 1641 def __init__(self): 1642 self.index = 0 1643 self.name_offset = 0 1644 self.name = 0 1645 self.type = Mach.NList.Type() 1646 self.sect_idx = 0 1647 self.desc = 0 1648 self.value = 0 1649 1650 def unpack(self, mach_file, data, symtab_lc): 1651 self.index = len(mach_file.symbols) 1652 self.name_offset = data.get_uint32() 1653 self.type.value, self.sect_idx = data.get_n_uint8(2) 1654 self.desc = data.get_uint16() 1655 if mach_file.is_64_bit(): 1656 self.value = data.get_uint64() 1657 else: 1658 self.value = data.get_uint32() 1659 data.push_offset_and_seek( 1660 mach_file.file_off + 1661 symtab_lc.stroff + 1662 self.name_offset) 1663 # print "get string for symbol[%u]" % self.index 1664 self.name = data.get_c_string() 1665 data.pop_offset_and_seek() 1666 1667 def __str__(self): 1668 name_display = '' 1669 if len(self.name): 1670 name_display = ' "%s"' % self.name 1671 return '%#8.8x %#2.2x (%-20s) %#2.2x %#4.4x %16.16x%s' % (self.name_offset, 1672 self.type.value, self.type, self.sect_idx, self.desc, self.value, name_display) 1673 1674 class Interactive(cmd.Cmd): 1675 '''Interactive command interpreter to mach-o files.''' 1676 1677 def __init__(self, mach, options): 1678 cmd.Cmd.__init__(self) 1679 self.intro = 'Interactive mach-o command interpreter' 1680 self.prompt = 'mach-o: %s %% ' % mach.path 1681 self.mach = mach 1682 self.options = options 1683 1684 def default(self, line): 1685 '''Catch all for unknown command, which will exit the interpreter.''' 1686 print("uknown command: %s" % line) 1687 return True 1688 1689 def do_q(self, line): 1690 '''Quit command''' 1691 return True 1692 1693 def do_quit(self, line): 1694 '''Quit command''' 1695 return True 1696 1697 def do_header(self, line): 1698 '''Dump mach-o file headers''' 1699 self.mach.dump_header(True, self.options) 1700 return False 1701 1702 def do_load(self, line): 1703 '''Dump all mach-o load commands''' 1704 self.mach.dump_load_commands(True, self.options) 1705 return False 1706 1707 def do_sections(self, line): 1708 '''Dump all mach-o sections''' 1709 self.mach.dump_sections(True, self.options) 1710 return False 1711 1712 def do_symtab(self, line): 1713 '''Dump all mach-o symbols in the symbol table''' 1714 self.mach.dump_symtab(True, self.options) 1715 return False 1716 1717if __name__ == '__main__': 1718 parser = optparse.OptionParser( 1719 description='A script that parses skinny and universal mach-o files.') 1720 parser.add_option( 1721 '--arch', 1722 '-a', 1723 type='string', 1724 metavar='arch', 1725 dest='archs', 1726 action='append', 1727 help='specify one or more architectures by name') 1728 parser.add_option( 1729 '-v', 1730 '--verbose', 1731 action='store_true', 1732 dest='verbose', 1733 help='display verbose debug info', 1734 default=False) 1735 parser.add_option( 1736 '-H', 1737 '--header', 1738 action='store_true', 1739 dest='dump_header', 1740 help='dump the mach-o file header', 1741 default=False) 1742 parser.add_option( 1743 '-l', 1744 '--load-commands', 1745 action='store_true', 1746 dest='dump_load_commands', 1747 help='dump the mach-o load commands', 1748 default=False) 1749 parser.add_option( 1750 '-s', 1751 '--symtab', 1752 action='store_true', 1753 dest='dump_symtab', 1754 help='dump the mach-o symbol table', 1755 default=False) 1756 parser.add_option( 1757 '-S', 1758 '--sections', 1759 action='store_true', 1760 dest='dump_sections', 1761 help='dump the mach-o sections', 1762 default=False) 1763 parser.add_option( 1764 '--section', 1765 type='string', 1766 metavar='sectname', 1767 dest='section_names', 1768 action='append', 1769 help='Specify one or more section names to dump', 1770 default=[]) 1771 parser.add_option( 1772 '-o', 1773 '--out', 1774 type='string', 1775 dest='outfile', 1776 help='Used in conjunction with the --section=NAME option to save a single section\'s data to disk.', 1777 default=False) 1778 parser.add_option( 1779 '-i', 1780 '--interactive', 1781 action='store_true', 1782 dest='interactive', 1783 help='enable interactive mode', 1784 default=False) 1785 parser.add_option( 1786 '-m', 1787 '--mangled', 1788 action='store_true', 1789 dest='find_mangled', 1790 help='dump all mangled names in a mach file', 1791 default=False) 1792 parser.add_option( 1793 '-c', 1794 '--compare', 1795 action='store_true', 1796 dest='compare', 1797 help='compare two mach files', 1798 default=False) 1799 parser.add_option( 1800 '-M', 1801 '--extract-modules', 1802 action='store_true', 1803 dest='extract_modules', 1804 help='Extract modules from file', 1805 default=False) 1806 parser.add_option( 1807 '-C', 1808 '--count', 1809 type='int', 1810 dest='max_count', 1811 help='Sets the max byte count when dumping section data', 1812 default=-1) 1813 1814 (options, mach_files) = parser.parse_args() 1815 if options.extract_modules: 1816 if options.section_names: 1817 print("error: can't use --section option with the --extract-modules option") 1818 exit(1) 1819 if not options.outfile: 1820 print("error: the --output=FILE option must be specified with the --extract-modules option") 1821 exit(1) 1822 options.section_names.append("__apple_ast") 1823 if options.compare: 1824 if len(mach_files) == 2: 1825 mach_a = Mach() 1826 mach_b = Mach() 1827 mach_a.parse(mach_files[0]) 1828 mach_b.parse(mach_files[1]) 1829 mach_a.compare(mach_b) 1830 else: 1831 print('error: --compare takes two mach files as arguments') 1832 else: 1833 if not (options.dump_header or options.dump_load_commands or options.dump_symtab or options.dump_sections or options.find_mangled or options.section_names): 1834 options.dump_header = True 1835 options.dump_load_commands = True 1836 if options.verbose: 1837 print('options', options) 1838 print('mach_files', mach_files) 1839 for path in mach_files: 1840 mach = Mach() 1841 mach.parse(path) 1842 if options.interactive: 1843 interpreter = Mach.Interactive(mach, options) 1844 interpreter.cmdloop() 1845 else: 1846 mach.dump(options) 1847