1#!/usr/bin/env python 2#===- lib/asan/scripts/asan_symbolize.py -----------------------------------===# 3# 4# The LLVM Compiler Infrastructure 5# 6# This file is distributed under the University of Illinois Open Source 7# License. See LICENSE.TXT for details. 8# 9#===------------------------------------------------------------------------===# 10import argparse 11import bisect 12import getopt 13import os 14import re 15import subprocess 16import sys 17 18symbolizers = {} 19DEBUG = False 20demangle = False 21binutils_prefix = None 22sysroot_path = None 23binary_name_filter = None 24fix_filename_patterns = None 25logfile = sys.stdin 26allow_system_symbolizer = True 27 28# FIXME: merge the code that calls fix_filename(). 29def fix_filename(file_name): 30 if fix_filename_patterns: 31 for path_to_cut in fix_filename_patterns: 32 file_name = re.sub('.*' + path_to_cut, '', file_name) 33 file_name = re.sub('.*asan_[a-z_]*.cc:[0-9]*', '_asan_rtl_', file_name) 34 file_name = re.sub('.*crtstuff.c:0', '???:0', file_name) 35 return file_name 36 37def sysroot_path_filter(binary_name): 38 return sysroot_path + binary_name 39 40def guess_arch(addr): 41 # Guess which arch we're running. 10 = len('0x') + 8 hex digits. 42 if len(addr) > 10: 43 return 'x86_64' 44 else: 45 return 'i386' 46 47class Symbolizer(object): 48 def __init__(self): 49 pass 50 51 def symbolize(self, addr, binary, offset): 52 """Symbolize the given address (pair of binary and offset). 53 54 Overriden in subclasses. 55 Args: 56 addr: virtual address of an instruction. 57 binary: path to executable/shared object containing this instruction. 58 offset: instruction offset in the @binary. 59 Returns: 60 list of strings (one string for each inlined frame) describing 61 the code locations for this instruction (that is, function name, file 62 name, line and column numbers). 63 """ 64 return None 65 66 67class LLVMSymbolizer(Symbolizer): 68 def __init__(self, symbolizer_path, default_arch, system, dsym_hints=[]): 69 super(LLVMSymbolizer, self).__init__() 70 self.symbolizer_path = symbolizer_path 71 self.default_arch = default_arch 72 self.system = system 73 self.dsym_hints = dsym_hints 74 self.pipe = self.open_llvm_symbolizer() 75 76 def open_llvm_symbolizer(self): 77 cmd = [self.symbolizer_path, 78 '--use-symbol-table=true', 79 '--demangle=%s' % demangle, 80 '--functions=linkage', 81 '--inlining=true', 82 '--default-arch=%s' % self.default_arch] 83 if self.system == 'Darwin': 84 for hint in self.dsym_hints: 85 cmd.append('--dsym-hint=%s' % hint) 86 if DEBUG: 87 print ' '.join(cmd) 88 try: 89 result = subprocess.Popen(cmd, stdin=subprocess.PIPE, 90 stdout=subprocess.PIPE) 91 except OSError: 92 result = None 93 return result 94 95 def symbolize(self, addr, binary, offset): 96 """Overrides Symbolizer.symbolize.""" 97 if not self.pipe: 98 return None 99 result = [] 100 try: 101 symbolizer_input = '"%s" %s' % (binary, offset) 102 if DEBUG: 103 print symbolizer_input 104 print >> self.pipe.stdin, symbolizer_input 105 while True: 106 function_name = self.pipe.stdout.readline().rstrip() 107 if not function_name: 108 break 109 file_name = self.pipe.stdout.readline().rstrip() 110 file_name = fix_filename(file_name) 111 if (not function_name.startswith('??') or 112 not file_name.startswith('??')): 113 # Append only non-trivial frames. 114 result.append('%s in %s %s' % (addr, function_name, 115 file_name)) 116 except Exception: 117 result = [] 118 if not result: 119 result = None 120 return result 121 122 123def LLVMSymbolizerFactory(system, default_arch, dsym_hints=[]): 124 symbolizer_path = os.getenv('LLVM_SYMBOLIZER_PATH') 125 if not symbolizer_path: 126 symbolizer_path = os.getenv('ASAN_SYMBOLIZER_PATH') 127 if not symbolizer_path: 128 # Assume llvm-symbolizer is in PATH. 129 symbolizer_path = 'llvm-symbolizer' 130 return LLVMSymbolizer(symbolizer_path, default_arch, system, dsym_hints) 131 132 133class Addr2LineSymbolizer(Symbolizer): 134 def __init__(self, binary): 135 super(Addr2LineSymbolizer, self).__init__() 136 self.binary = binary 137 self.pipe = self.open_addr2line() 138 self.output_terminator = -1 139 140 def open_addr2line(self): 141 addr2line_tool = 'addr2line' 142 if binutils_prefix: 143 addr2line_tool = binutils_prefix + addr2line_tool 144 cmd = [addr2line_tool, '-fi'] 145 if demangle: 146 cmd += ['--demangle'] 147 cmd += ['-e', self.binary] 148 if DEBUG: 149 print ' '.join(cmd) 150 return subprocess.Popen(cmd, 151 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 152 153 def symbolize(self, addr, binary, offset): 154 """Overrides Symbolizer.symbolize.""" 155 if self.binary != binary: 156 return None 157 lines = [] 158 try: 159 print >> self.pipe.stdin, offset 160 print >> self.pipe.stdin, self.output_terminator 161 is_first_frame = True 162 while True: 163 function_name = self.pipe.stdout.readline().rstrip() 164 file_name = self.pipe.stdout.readline().rstrip() 165 if is_first_frame: 166 is_first_frame = False 167 elif function_name in ['', '??']: 168 assert file_name == function_name 169 break 170 lines.append((function_name, file_name)); 171 except Exception: 172 lines.append(('??', '??:0')) 173 return ['%s in %s %s' % (addr, function, fix_filename(file)) for (function, file) in lines] 174 175class UnbufferedLineConverter(object): 176 """ 177 Wrap a child process that responds to each line of input with one line of 178 output. Uses pty to trick the child into providing unbuffered output. 179 """ 180 def __init__(self, args, close_stderr=False): 181 # Local imports so that the script can start on Windows. 182 import pty 183 import termios 184 pid, fd = pty.fork() 185 if pid == 0: 186 # We're the child. Transfer control to command. 187 if close_stderr: 188 dev_null = os.open('/dev/null', 0) 189 os.dup2(dev_null, 2) 190 os.execvp(args[0], args) 191 else: 192 # Disable echoing. 193 attr = termios.tcgetattr(fd) 194 attr[3] = attr[3] & ~termios.ECHO 195 termios.tcsetattr(fd, termios.TCSANOW, attr) 196 # Set up a file()-like interface to the child process 197 self.r = os.fdopen(fd, "r", 1) 198 self.w = os.fdopen(os.dup(fd), "w", 1) 199 200 def convert(self, line): 201 self.w.write(line + "\n") 202 return self.readline() 203 204 def readline(self): 205 return self.r.readline().rstrip() 206 207 208class DarwinSymbolizer(Symbolizer): 209 def __init__(self, addr, binary): 210 super(DarwinSymbolizer, self).__init__() 211 self.binary = binary 212 self.arch = guess_arch(addr) 213 self.open_atos() 214 215 def open_atos(self): 216 if DEBUG: 217 print 'atos -o %s -arch %s' % (self.binary, self.arch) 218 cmdline = ['atos', '-o', self.binary, '-arch', self.arch] 219 self.atos = UnbufferedLineConverter(cmdline, close_stderr=True) 220 221 def symbolize(self, addr, binary, offset): 222 """Overrides Symbolizer.symbolize.""" 223 if self.binary != binary: 224 return None 225 atos_line = self.atos.convert('0x%x' % int(offset, 16)) 226 while "got symbolicator for" in atos_line: 227 atos_line = self.atos.readline() 228 # A well-formed atos response looks like this: 229 # foo(type1, type2) (in object.name) (filename.cc:80) 230 match = re.match('^(.*) \(in (.*)\) \((.*:\d*)\)$', atos_line) 231 if DEBUG: 232 print 'atos_line: ', atos_line 233 if match: 234 function_name = match.group(1) 235 function_name = re.sub('\(.*?\)', '', function_name) 236 file_name = fix_filename(match.group(3)) 237 return ['%s in %s %s' % (addr, function_name, file_name)] 238 else: 239 return ['%s in %s' % (addr, atos_line)] 240 241 242# Chain several symbolizers so that if one symbolizer fails, we fall back 243# to the next symbolizer in chain. 244class ChainSymbolizer(Symbolizer): 245 def __init__(self, symbolizer_list): 246 super(ChainSymbolizer, self).__init__() 247 self.symbolizer_list = symbolizer_list 248 249 def symbolize(self, addr, binary, offset): 250 """Overrides Symbolizer.symbolize.""" 251 for symbolizer in self.symbolizer_list: 252 if symbolizer: 253 result = symbolizer.symbolize(addr, binary, offset) 254 if result: 255 return result 256 return None 257 258 def append_symbolizer(self, symbolizer): 259 self.symbolizer_list.append(symbolizer) 260 261 262def BreakpadSymbolizerFactory(binary): 263 suffix = os.getenv('BREAKPAD_SUFFIX') 264 if suffix: 265 filename = binary + suffix 266 if os.access(filename, os.F_OK): 267 return BreakpadSymbolizer(filename) 268 return None 269 270 271def SystemSymbolizerFactory(system, addr, binary): 272 if system == 'Darwin': 273 return DarwinSymbolizer(addr, binary) 274 elif system == 'Linux': 275 return Addr2LineSymbolizer(binary) 276 277 278class BreakpadSymbolizer(Symbolizer): 279 def __init__(self, filename): 280 super(BreakpadSymbolizer, self).__init__() 281 self.filename = filename 282 lines = file(filename).readlines() 283 self.files = [] 284 self.symbols = {} 285 self.address_list = [] 286 self.addresses = {} 287 # MODULE mac x86_64 A7001116478B33F18FF9BEDE9F615F190 t 288 fragments = lines[0].rstrip().split() 289 self.arch = fragments[2] 290 self.debug_id = fragments[3] 291 self.binary = ' '.join(fragments[4:]) 292 self.parse_lines(lines[1:]) 293 294 def parse_lines(self, lines): 295 cur_function_addr = '' 296 for line in lines: 297 fragments = line.split() 298 if fragments[0] == 'FILE': 299 assert int(fragments[1]) == len(self.files) 300 self.files.append(' '.join(fragments[2:])) 301 elif fragments[0] == 'PUBLIC': 302 self.symbols[int(fragments[1], 16)] = ' '.join(fragments[3:]) 303 elif fragments[0] in ['CFI', 'STACK']: 304 pass 305 elif fragments[0] == 'FUNC': 306 cur_function_addr = int(fragments[1], 16) 307 if not cur_function_addr in self.symbols.keys(): 308 self.symbols[cur_function_addr] = ' '.join(fragments[4:]) 309 else: 310 # Line starting with an address. 311 addr = int(fragments[0], 16) 312 self.address_list.append(addr) 313 # Tuple of symbol address, size, line, file number. 314 self.addresses[addr] = (cur_function_addr, 315 int(fragments[1], 16), 316 int(fragments[2]), 317 int(fragments[3])) 318 self.address_list.sort() 319 320 def get_sym_file_line(self, addr): 321 key = None 322 if addr in self.addresses.keys(): 323 key = addr 324 else: 325 index = bisect.bisect_left(self.address_list, addr) 326 if index == 0: 327 return None 328 else: 329 key = self.address_list[index - 1] 330 sym_id, size, line_no, file_no = self.addresses[key] 331 symbol = self.symbols[sym_id] 332 filename = self.files[file_no] 333 if addr < key + size: 334 return symbol, filename, line_no 335 else: 336 return None 337 338 def symbolize(self, addr, binary, offset): 339 if self.binary != binary: 340 return None 341 res = self.get_sym_file_line(int(offset, 16)) 342 if res: 343 function_name, file_name, line_no = res 344 result = ['%s in %s %s:%d' % ( 345 addr, function_name, file_name, line_no)] 346 print result 347 return result 348 else: 349 return None 350 351 352class SymbolizationLoop(object): 353 def __init__(self, binary_name_filter=None, dsym_hint_producer=None): 354 if sys.platform == 'win32': 355 # ASan on Windows uses dbghelp.dll to symbolize in-process, which works 356 # even in sandboxed processes. Nothing needs to be done here. 357 self.process_line = self.process_line_echo 358 else: 359 # Used by clients who may want to supply a different binary name. 360 # E.g. in Chrome several binaries may share a single .dSYM. 361 self.binary_name_filter = binary_name_filter 362 self.dsym_hint_producer = dsym_hint_producer 363 self.system = os.uname()[0] 364 if self.system not in ['Linux', 'Darwin', 'FreeBSD']: 365 raise Exception('Unknown system') 366 self.llvm_symbolizers = {} 367 self.last_llvm_symbolizer = None 368 self.dsym_hints = set([]) 369 self.frame_no = 0 370 self.process_line = self.process_line_posix 371 372 def symbolize_address(self, addr, binary, offset): 373 # On non-Darwin (i.e. on platforms without .dSYM debug info) always use 374 # a single symbolizer binary. 375 # On Darwin, if the dsym hint producer is present: 376 # 1. check whether we've seen this binary already; if so, 377 # use |llvm_symbolizers[binary]|, which has already loaded the debug 378 # info for this binary (might not be the case for 379 # |last_llvm_symbolizer|); 380 # 2. otherwise check if we've seen all the hints for this binary already; 381 # if so, reuse |last_llvm_symbolizer| which has the full set of hints; 382 # 3. otherwise create a new symbolizer and pass all currently known 383 # .dSYM hints to it. 384 if not binary in self.llvm_symbolizers: 385 use_new_symbolizer = True 386 if self.system == 'Darwin' and self.dsym_hint_producer: 387 dsym_hints_for_binary = set(self.dsym_hint_producer(binary)) 388 use_new_symbolizer = bool(dsym_hints_for_binary - self.dsym_hints) 389 self.dsym_hints |= dsym_hints_for_binary 390 if self.last_llvm_symbolizer and not use_new_symbolizer: 391 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 392 else: 393 self.last_llvm_symbolizer = LLVMSymbolizerFactory( 394 self.system, guess_arch(addr), self.dsym_hints) 395 self.llvm_symbolizers[binary] = self.last_llvm_symbolizer 396 # Use the chain of symbolizers: 397 # Breakpad symbolizer -> LLVM symbolizer -> addr2line/atos 398 # (fall back to next symbolizer if the previous one fails). 399 if not binary in symbolizers: 400 symbolizers[binary] = ChainSymbolizer( 401 [BreakpadSymbolizerFactory(binary), self.llvm_symbolizers[binary]]) 402 result = symbolizers[binary].symbolize(addr, binary, offset) 403 if result is None: 404 if not allow_system_symbolizer: 405 raise Exception('Failed to launch or use llvm-symbolizer.') 406 # Initialize system symbolizer only if other symbolizers failed. 407 symbolizers[binary].append_symbolizer( 408 SystemSymbolizerFactory(self.system, addr, binary)) 409 result = symbolizers[binary].symbolize(addr, binary, offset) 410 # The system symbolizer must produce some result. 411 assert result 412 return result 413 414 def get_symbolized_lines(self, symbolized_lines): 415 if not symbolized_lines: 416 return [self.current_line] 417 else: 418 result = [] 419 for symbolized_frame in symbolized_lines: 420 result.append(' #%s %s' % (str(self.frame_no), symbolized_frame.rstrip())) 421 self.frame_no += 1 422 return result 423 424 def process_logfile(self): 425 self.frame_no = 0 426 for line in logfile: 427 processed = self.process_line(line) 428 print '\n'.join(processed) 429 430 def process_line_echo(self, line): 431 return [line.rstrip()] 432 433 def process_line_posix(self, line): 434 self.current_line = line.rstrip() 435 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 436 stack_trace_line_format = ( 437 '^( *#([0-9]+) *)(0x[0-9a-f]+) *\((.*)\+(0x[0-9a-f]+)\)') 438 match = re.match(stack_trace_line_format, line) 439 if not match: 440 return [self.current_line] 441 if DEBUG: 442 print line 443 _, frameno_str, addr, binary, offset = match.groups() 444 if frameno_str == '0': 445 # Assume that frame #0 is the first frame of new stack trace. 446 self.frame_no = 0 447 original_binary = binary 448 if self.binary_name_filter: 449 binary = self.binary_name_filter(binary) 450 symbolized_line = self.symbolize_address(addr, binary, offset) 451 if not symbolized_line: 452 if original_binary != binary: 453 symbolized_line = self.symbolize_address(addr, binary, offset) 454 return self.get_symbolized_lines(symbolized_line) 455 456 457if __name__ == '__main__': 458 parser = argparse.ArgumentParser( 459 formatter_class=argparse.RawDescriptionHelpFormatter, 460 description='ASan symbolization script', 461 epilog='Example of use:\n' 462 'asan_symbolize.py -c "$HOME/opt/cross/bin/arm-linux-gnueabi-" ' 463 '-s "$HOME/SymbolFiles" < asan.log') 464 parser.add_argument('path_to_cut', nargs='*', 465 help='pattern to be cut from the result file path ') 466 parser.add_argument('-d','--demangle', action='store_true', 467 help='demangle function names') 468 parser.add_argument('-s', metavar='SYSROOT', 469 help='set path to sysroot for sanitized binaries') 470 parser.add_argument('-c', metavar='CROSS_COMPILE', 471 help='set prefix for binutils') 472 parser.add_argument('-l','--logfile', default=sys.stdin, 473 type=argparse.FileType('r'), 474 help='set log file name to parse, default is stdin') 475 args = parser.parse_args() 476 if args.path_to_cut: 477 fix_filename_patterns = args.path_to_cut 478 if args.demangle: 479 demangle = True 480 if args.s: 481 binary_name_filter = sysroot_path_filter 482 sysroot_path = args.s 483 if args.c: 484 binutils_prefix = args.c 485 if args.logfile: 486 logfile = args.logfile 487 else: 488 logfile = sys.stdin 489 loop = SymbolizationLoop(binary_name_filter) 490 loop.process_logfile() 491