1#!/usr/bin/env python 2#===- lib/hwasan/scripts/hwasan_symbolize ----------------------------------===# 3# 4# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5# See https:#llvm.org/LICENSE.txt for license information. 6# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7# 8#===------------------------------------------------------------------------===# 9# 10# HWAddressSanitizer offline symbolization script. 11# 12#===------------------------------------------------------------------------===# 13import glob 14import os 15import re 16import sys 17import string 18import subprocess 19import argparse 20 21last_access_address = None 22last_access_tag = None 23 24class Symbolizer: 25 def __init__(self, path, binary_prefixes, paths_to_cut): 26 self.__pipe = None 27 self.__path = path 28 self.__binary_prefixes = binary_prefixes 29 self.__paths_to_cut = paths_to_cut 30 self.__log = False 31 self.__warnings = set() 32 33 def enable_logging(self, enable): 34 self.__log = enable 35 36 def __open_pipe(self): 37 if not self.__pipe: 38 self.__pipe = subprocess.Popen([self.__path, "-inlining", "-functions"], 39 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 40 41 class __EOF: 42 pass 43 44 def __write(self, s): 45 print >>self.__pipe.stdin, s 46 if self.__log: 47 print >>sys.stderr, ("#>> |%s|" % (s,)) 48 49 def __read(self): 50 s = self.__pipe.stdout.readline().rstrip() 51 if self.__log: 52 print >>sys.stderr, ("# << |%s|" % (s,)) 53 if s == '': 54 raise Symbolizer.__EOF 55 return s 56 57 def __process_source_path(self, file_name): 58 for path_to_cut in self.__paths_to_cut: 59 file_name = re.sub(".*" + path_to_cut, "", file_name) 60 file_name = re.sub(".*hwasan_[a-z_]*.(cc|h):[0-9]*", "[hwasan_rtl]", file_name) 61 file_name = re.sub(".*asan_[a-z_]*.(cc|h):[0-9]*", "[asan_rtl]", file_name) 62 file_name = re.sub(".*crtstuff.c:0", "???:0", file_name) 63 return file_name 64 65 def __process_binary_name(self, name): 66 if name.startswith('/'): 67 name = name[1:] 68 for p in self.__binary_prefixes: 69 full_path = os.path.join(p, name) 70 if os.path.exists(full_path): 71 return full_path 72 # Try stripping extra path components as the last resort. 73 for p in self.__binary_prefixes: 74 full_path = os.path.join(p, os.path.basename(name)) 75 if os.path.exists(full_path): 76 return full_path 77 if name not in self.__warnings: 78 print >>sys.stderr, "Could not find symbols for", name 79 self.__warnings.add(name) 80 return None 81 82 def iter_locals(self, binary, addr): 83 self.__open_pipe() 84 p = self.__pipe 85 binary = self.__process_binary_name(binary) 86 if not binary: 87 return 88 self.__write("FRAME %s %s" % (binary, addr)) 89 try: 90 while True: 91 function_name = self.__read() 92 local_name = self.__read() 93 file_line = self.__read() 94 extra = self.__read().split() 95 96 file_line = self.__process_source_path(file_line) 97 offset = None if extra[0] == '??' else int(extra[0]) 98 size = None if extra[1] == '??' else int(extra[1]) 99 tag_offset = None if extra[2] == '??' else int(extra[2]) 100 yield (function_name, file_line, local_name, offset, size, tag_offset) 101 except Symbolizer.__EOF: 102 pass 103 104 def iter_call_stack(self, binary, addr): 105 self.__open_pipe() 106 p = self.__pipe 107 binary = self.__process_binary_name(binary) 108 if not binary: 109 return 110 self.__write("CODE %s %s" % (binary, addr)) 111 try: 112 while True: 113 function_name = self.__read() 114 file_line = self.__read() 115 file_line = self.__process_source_path(file_line) 116 yield (function_name, file_line) 117 except Symbolizer.__EOF: 118 pass 119 120def symbolize_line(line, symbolizer_path): 121 #0 0x7f6e35cf2e45 (/blah/foo.so+0x11fe45) 122 match = re.match(r'^(.*?)#([0-9]+)( *)(0x[0-9a-f]*) *\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) 123 if match: 124 frameno = match.group(2) 125 binary = match.group(5) 126 addr = int(match.group(6), 16) 127 128 frames = list(symbolizer.iter_call_stack(binary, addr)) 129 130 if len(frames) > 0: 131 print "%s#%s%s%s in %s" % (match.group(1).encode('utf-8'), match.group(2).encode('utf-8'), 132 match.group(3).encode('utf-8'), frames[0][0], frames[0][1]) 133 for i in range(1, len(frames)): 134 space1 = ' ' * match.end(1) 135 space2 = ' ' * (match.start(4) - match.end(1) - 2) 136 print "%s->%s%s in %s" % (space1, space2, frames[i][0], frames[i][1]) 137 else: 138 print line.rstrip().encode('utf-8') 139 else: 140 print line.rstrip().encode('utf-8') 141 142def save_access_address(line): 143 global last_access_address, last_access_tag 144 match = re.match(r'^(.*?)HWAddressSanitizer: tag-mismatch on address (0x[0-9a-f]+) ', line, re.UNICODE) 145 if match: 146 last_access_address = int(match.group(2), 16) 147 match = re.match(r'^(.*?) of size [0-9]+ at 0x[0-9a-f]* tags: ([0-9a-f]+)/[0-9a-f]+ \(ptr/mem\)', line, re.UNICODE) 148 if match: 149 last_access_tag = int(match.group(2), 16) 150 151def process_stack_history(line, symbolizer, ignore_tags=False): 152 if last_access_address is None or last_access_tag is None: 153 return 154 if re.match(r'Previously allocated frames:', line, re.UNICODE): 155 return True 156 pc_mask = (1 << 48) - 1 157 fp_mask = (1 << 20) - 1 158 # record_addr:0x1234ABCD record:0x1234ABCD (/path/to/binary+0x1234ABCD) 159 match = re.match(r'^(.*?)record_addr:(0x[0-9a-f]+) +record:(0x[0-9a-f]+) +\((.*)\+(0x[0-9a-f]+)\)', line, re.UNICODE) 160 if match: 161 record_addr = int(match.group(2), 16) 162 record = int(match.group(3), 16) 163 binary = match.group(4) 164 addr = int(match.group(5), 16) 165 base_tag = (record_addr >> 3) & 0xFF 166 fp = (record >> 48) << 4 167 pc = record & pc_mask 168 169 for local in symbolizer.iter_locals(binary, addr): 170 frame_offset = local[3] 171 size = local[4] 172 if frame_offset is None or size is None: 173 continue 174 obj_offset = (last_access_address - fp - frame_offset) & fp_mask 175 if obj_offset >= size: 176 continue 177 tag_offset = local[5] 178 if not ignore_tags and (tag_offset is None or base_tag ^ tag_offset != last_access_tag): 179 continue 180 print '' 181 print 'Potentially referenced stack object:' 182 print ' %d bytes inside variable "%s" in stack frame of function "%s"' % (obj_offset, local[2], local[0]) 183 print ' at %s' % (local[1],) 184 return True 185 return False 186 187parser = argparse.ArgumentParser() 188parser.add_argument('-d', action='store_true') 189parser.add_argument('-v', action='store_true') 190parser.add_argument('--ignore-tags', action='store_true') 191parser.add_argument('--symbols', action='append') 192parser.add_argument('--source', action='append') 193parser.add_argument('--symbolizer') 194parser.add_argument('args', nargs=argparse.REMAINDER) 195args = parser.parse_args() 196 197# Unstripped binaries location. 198binary_prefixes = args.symbols or [] 199if not binary_prefixes: 200 if 'ANDROID_PRODUCT_OUT' in os.environ: 201 product_out = os.path.join(os.environ['ANDROID_PRODUCT_OUT'], 'symbols') 202 binary_prefixes.append(product_out) 203 204for p in binary_prefixes: 205 if not os.path.isdir(p): 206 print >>sys.stderr, "Symbols path does not exist or is not a directory:", p 207 sys.exit(1) 208 209# Source location. 210paths_to_cut = args.source or [] 211if not paths_to_cut: 212 paths_to_cut.append(os.getcwd() + '/') 213 if 'ANDROID_BUILD_TOP' in os.environ: 214 paths_to_cut.append(os.environ['ANDROID_BUILD_TOP'] + '/') 215 216# llvm-symbolizer binary. 217# 1. --symbolizer flag 218# 2. environment variable 219# 3. unsuffixed binary in the current directory 220# 4. if inside Android platform, prebuilt binary at a known path 221# 5. first "llvm-symbolizer", then "llvm-symbolizer-$VER" with the 222# highest available version in $PATH 223symbolizer_path = args.symbolizer 224if not symbolizer_path: 225 if 'LLVM_SYMBOLIZER_PATH' in os.environ: 226 symbolizer_path = os.environ['LLVM_SYMBOLIZER_PATH'] 227 elif 'HWASAN_SYMBOLIZER_PATH' in os.environ: 228 symbolizer_path = os.environ['HWASAN_SYMBOLIZER_PATH'] 229 230if not symbolizer_path: 231 s = os.path.join(os.path.dirname(sys.argv[0]), 'llvm-symbolizer') 232 if os.path.exists(s): 233 symbolizer_path = s 234 235if not symbolizer_path: 236 if 'ANDROID_BUILD_TOP' in os.environ: 237 s = os.path.join(os.environ['ANDROID_BUILD_TOP'], 'prebuilts/clang/host/linux-x86/llvm-binutils-stable/llvm-symbolizer') 238 if os.path.exists(s): 239 symbolizer_path = s 240 241if not symbolizer_path: 242 for path in os.environ["PATH"].split(os.pathsep): 243 p = os.path.join(path, 'llvm-symbolizer') 244 if os.path.exists(p): 245 symbolizer_path = p 246 break 247 248def extract_version(s): 249 idx = s.rfind('-') 250 if idx == -1: 251 return 0 252 x = float(s[idx + 1:]) 253 return x 254 255if not symbolizer_path: 256 for path in os.environ["PATH"].split(os.pathsep): 257 candidates = glob.glob(os.path.join(path, 'llvm-symbolizer-*')) 258 if len(candidates) > 0: 259 candidates.sort(key = extract_version, reverse = True) 260 symbolizer_path = candidates[0] 261 break 262 263if not os.path.exists(symbolizer_path): 264 print >>sys.stderr, "Symbolizer path does not exist:", symbolizer_path 265 sys.exit(1) 266 267if args.v: 268 print "Looking for symbols in:" 269 for s in binary_prefixes: 270 print " %s" % (s,) 271 print "Stripping source path prefixes:" 272 for s in paths_to_cut: 273 print " %s" % (s,) 274 print "Using llvm-symbolizer binary in:\n %s" % (symbolizer_path,) 275 print 276 277symbolizer = Symbolizer(symbolizer_path, binary_prefixes, paths_to_cut) 278symbolizer.enable_logging(args.d) 279 280for line in sys.stdin: 281 line = line.decode('utf-8') 282 save_access_address(line) 283 if process_stack_history(line, symbolizer, ignore_tags=args.ignore_tags): 284 continue 285 symbolize_line(line, symbolizer_path) 286