1#!/usr/bin/python 2# 3# Copyright (C) 2013 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16 17"""Module for looking up symbolic debugging information. 18 19The information can include symbol names, offsets, and source locations. 20""" 21 22import glob 23import os 24import platform 25import re 26import subprocess 27import unittest 28 29ANDROID_BUILD_TOP = os.environ["ANDROID_BUILD_TOP"] 30if not ANDROID_BUILD_TOP: 31 ANDROID_BUILD_TOP = "." 32 33def FindSymbolsDir(): 34 saveddir = os.getcwd() 35 os.chdir(ANDROID_BUILD_TOP) 36 try: 37 cmd = ("CALLED_FROM_SETUP=true BUILD_SYSTEM=build/core " 38 "SRC_TARGET_DIR=build/target make -f build/core/config.mk " 39 "dumpvar-abs-TARGET_OUT_UNSTRIPPED") 40 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE, shell=True).stdout 41 return os.path.join(ANDROID_BUILD_TOP, stream.read().strip()) 42 finally: 43 os.chdir(saveddir) 44 45SYMBOLS_DIR = FindSymbolsDir() 46 47ARCH = None 48 49 50# These are private. Do not access them from other modules. 51_CACHED_TOOLCHAIN = None 52_CACHED_TOOLCHAIN_ARCH = None 53 54 55def ToolPath(tool, toolchain=None): 56 """Return a fully-qualified path to the specified tool""" 57 if not toolchain: 58 toolchain = FindToolchain() 59 return glob.glob(os.path.join(toolchain, "*-" + tool))[0] 60 61 62def FindToolchain(): 63 """Returns the toolchain matching ARCH.""" 64 global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH 65 if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH: 66 return _CACHED_TOOLCHAIN 67 68 # We use slightly different names from GCC, and there's only one toolchain 69 # for x86/x86_64. Note that these are the names of the top-level directory 70 # rather than the _different_ names used lower down the directory hierarchy! 71 gcc_dir = ARCH 72 if gcc_dir == "arm64": 73 gcc_dir = "aarch64" 74 elif gcc_dir == "mips64": 75 gcc_dir = "mips" 76 elif gcc_dir == "x86_64": 77 gcc_dir = "x86" 78 79 os_name = platform.system().lower(); 80 81 available_toolchains = glob.glob("%s/prebuilts/gcc/%s-x86/%s/*-linux-*/bin/" % (ANDROID_BUILD_TOP, os_name, gcc_dir)) 82 if len(available_toolchains) == 0: 83 raise Exception("Could not find tool chain for %s" % (ARCH)) 84 85 toolchain = sorted(available_toolchains)[-1] 86 87 if not os.path.exists(ToolPath("addr2line", toolchain)): 88 raise Exception("No addr2line for %s" % (toolchain)) 89 90 _CACHED_TOOLCHAIN = toolchain 91 _CACHED_TOOLCHAIN_ARCH = ARCH 92 print "Using %s toolchain from: %s" % (_CACHED_TOOLCHAIN_ARCH, _CACHED_TOOLCHAIN) 93 return _CACHED_TOOLCHAIN 94 95 96def SymbolInformation(lib, addr): 97 """Look up symbol information about an address. 98 99 Args: 100 lib: library (or executable) pathname containing symbols 101 addr: string hexidecimal address 102 103 Returns: 104 A list of the form [(source_symbol, source_location, 105 object_symbol_with_offset)]. 106 107 If the function has been inlined then the list may contain 108 more than one element with the symbols for the most deeply 109 nested inlined location appearing first. The list is 110 always non-empty, even if no information is available. 111 112 Usually you want to display the source_location and 113 object_symbol_with_offset from the last element in the list. 114 """ 115 info = SymbolInformationForSet(lib, set([addr])) 116 return (info and info.get(addr)) or [(None, None, None)] 117 118 119def SymbolInformationForSet(lib, unique_addrs): 120 """Look up symbol information for a set of addresses from the given library. 121 122 Args: 123 lib: library (or executable) pathname containing symbols 124 unique_addrs: set of hexidecimal addresses 125 126 Returns: 127 A dictionary of the form {addr: [(source_symbol, source_location, 128 object_symbol_with_offset)]} where each address has a list of 129 associated symbols and locations. The list is always non-empty. 130 131 If the function has been inlined then the list may contain 132 more than one element with the symbols for the most deeply 133 nested inlined location appearing first. The list is 134 always non-empty, even if no information is available. 135 136 Usually you want to display the source_location and 137 object_symbol_with_offset from the last element in the list. 138 """ 139 if not lib: 140 return None 141 142 addr_to_line = CallAddr2LineForSet(lib, unique_addrs) 143 if not addr_to_line: 144 return None 145 146 addr_to_objdump = CallObjdumpForSet(lib, unique_addrs) 147 if not addr_to_objdump: 148 return None 149 150 result = {} 151 for addr in unique_addrs: 152 source_info = addr_to_line.get(addr) 153 if not source_info: 154 source_info = [(None, None)] 155 if addr in addr_to_objdump: 156 (object_symbol, object_offset) = addr_to_objdump.get(addr) 157 object_symbol_with_offset = FormatSymbolWithOffset(object_symbol, 158 object_offset) 159 else: 160 object_symbol_with_offset = None 161 result[addr] = [(source_symbol, source_location, object_symbol_with_offset) 162 for (source_symbol, source_location) in source_info] 163 164 return result 165 166 167def CallAddr2LineForSet(lib, unique_addrs): 168 """Look up line and symbol information for a set of addresses. 169 170 Args: 171 lib: library (or executable) pathname containing symbols 172 unique_addrs: set of string hexidecimal addresses look up. 173 174 Returns: 175 A dictionary of the form {addr: [(symbol, file:line)]} where 176 each address has a list of associated symbols and locations 177 or an empty list if no symbol information was found. 178 179 If the function has been inlined then the list may contain 180 more than one element with the symbols for the most deeply 181 nested inlined location appearing first. 182 """ 183 if not lib: 184 return None 185 186 symbols = SYMBOLS_DIR + lib 187 if not os.path.exists(symbols): 188 symbols = lib 189 if not os.path.exists(symbols): 190 return None 191 192 cmd = [ToolPath("addr2line"), "--functions", "--inlines", 193 "--demangle", "--exe=" + symbols] 194 child = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 195 196 result = {} 197 addrs = sorted(unique_addrs) 198 for addr in addrs: 199 child.stdin.write("0x%s\n" % addr) 200 child.stdin.flush() 201 records = [] 202 first = True 203 while True: 204 symbol = child.stdout.readline().strip() 205 if symbol == "??": 206 symbol = None 207 location = child.stdout.readline().strip() 208 if location == "??:0" or location == "??:?": 209 location = None 210 if symbol is None and location is None: 211 break 212 records.append((symbol, location)) 213 if first: 214 # Write a blank line as a sentinel so we know when to stop 215 # reading inlines from the output. 216 # The blank line will cause addr2line to emit "??\n??:0\n". 217 child.stdin.write("\n") 218 first = False 219 result[addr] = records 220 child.stdin.close() 221 child.stdout.close() 222 return result 223 224 225def StripPC(addr): 226 """Strips the Thumb bit a program counter address when appropriate. 227 228 Args: 229 addr: the program counter address 230 231 Returns: 232 The stripped program counter address. 233 """ 234 global ARCH 235 if ARCH == "arm": 236 return addr & ~1 237 return addr 238 239 240def CallObjdumpForSet(lib, unique_addrs): 241 """Use objdump to find out the names of the containing functions. 242 243 Args: 244 lib: library (or executable) pathname containing symbols 245 unique_addrs: set of string hexidecimal addresses to find the functions for. 246 247 Returns: 248 A dictionary of the form {addr: (string symbol, offset)}. 249 """ 250 if not lib: 251 return None 252 253 symbols = SYMBOLS_DIR + lib 254 if not os.path.exists(symbols): 255 symbols = lib 256 if not os.path.exists(symbols): 257 return None 258 259 addrs = sorted(unique_addrs) 260 start_addr_dec = str(StripPC(int(addrs[0], 16))) 261 stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8) 262 cmd = [ToolPath("objdump"), 263 "--section=.text", 264 "--demangle", 265 "--disassemble", 266 "--start-address=" + start_addr_dec, 267 "--stop-address=" + stop_addr_dec, 268 symbols] 269 270 # Function lines look like: 271 # 000177b0 <android::IBinder::~IBinder()+0x2c>: 272 # We pull out the address and function first. Then we check for an optional 273 # offset. This is tricky due to functions that look like "operator+(..)+0x2c" 274 func_regexp = re.compile("(^[a-f0-9]*) \<(.*)\>:$") 275 offset_regexp = re.compile("(.*)\+0x([a-f0-9]*)") 276 277 # A disassembly line looks like: 278 # 177b2: b510 push {r4, lr} 279 asm_regexp = re.compile("(^[ a-f0-9]*):[ a-f0-0]*.*$") 280 281 current_symbol = None # The current function symbol in the disassembly. 282 current_symbol_addr = 0 # The address of the current function. 283 addr_index = 0 # The address that we are currently looking for. 284 285 stream = subprocess.Popen(cmd, stdout=subprocess.PIPE).stdout 286 result = {} 287 for line in stream: 288 # Is it a function line like: 289 # 000177b0 <android::IBinder::~IBinder()>: 290 components = func_regexp.match(line) 291 if components: 292 # This is a new function, so record the current function and its address. 293 current_symbol_addr = int(components.group(1), 16) 294 current_symbol = components.group(2) 295 296 # Does it have an optional offset like: "foo(..)+0x2c"? 297 components = offset_regexp.match(current_symbol) 298 if components: 299 current_symbol = components.group(1) 300 offset = components.group(2) 301 if offset: 302 current_symbol_addr -= int(offset, 16) 303 304 # Is it an disassembly line like: 305 # 177b2: b510 push {r4, lr} 306 components = asm_regexp.match(line) 307 if components: 308 addr = components.group(1) 309 target_addr = addrs[addr_index] 310 i_addr = int(addr, 16) 311 i_target = StripPC(int(target_addr, 16)) 312 if i_addr == i_target: 313 result[target_addr] = (current_symbol, i_target - current_symbol_addr) 314 addr_index += 1 315 if addr_index >= len(addrs): 316 break 317 stream.close() 318 319 return result 320 321 322def CallCppFilt(mangled_symbol): 323 cmd = [ToolPath("c++filt")] 324 process = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 325 process.stdin.write(mangled_symbol) 326 process.stdin.write("\n") 327 process.stdin.close() 328 demangled_symbol = process.stdout.readline().strip() 329 process.stdout.close() 330 return demangled_symbol 331 332 333def FormatSymbolWithOffset(symbol, offset): 334 if offset == 0: 335 return symbol 336 return "%s+%d" % (symbol, offset) 337 338 339def GetAbiFromToolchain(toolchain_var, bits): 340 toolchain = os.environ.get(toolchain_var) 341 if not toolchain: 342 return None 343 344 toolchain_match = re.search("\/(aarch64|arm|mips|x86)\/", toolchain) 345 if toolchain_match: 346 abi = toolchain_match.group(1) 347 if abi == "aarch64": 348 return "arm64" 349 elif bits == 64: 350 if abi == "x86": 351 return "x86_64" 352 elif abi == "mips": 353 return "mips64" 354 return abi 355 return None 356 357 358def SetAbi(lines): 359 global ARCH 360 361 abi_line = re.compile("ABI: \'(.*)\'") 362 trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") 363 364 ARCH = None 365 for line in lines: 366 abi_match = abi_line.search(line) 367 if abi_match: 368 ARCH = abi_match.group(1) 369 break 370 trace_match = trace_line.search(line) 371 if trace_match: 372 # Try to guess the arch, we know the bitness. 373 if len(trace_match.group(1)) == 16: 374 # 64 bit 375 # Check for ANDROID_TOOLCHAIN, if it is set, we can figure out the 376 # arch this way. If this is not set, then default to arm64. 377 ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 64) 378 if not ARCH: 379 ARCH = "arm64" 380 else: 381 # 32 bit 382 # Check for ANDROID_TOOLCHAIN_2ND_ARCH first, if set, use that. 383 # If not try ANDROID_TOOLCHAIN to find the arch. 384 # If this is not set, then default to arm. 385 ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN_2ND_ARCH", 32) 386 if not ARCH: 387 ARCH = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 32) 388 if not ARCH: 389 ARCH = "arm" 390 break 391 if not ARCH: 392 raise Exception("Could not determine arch from input") 393 394 395class FindToolchainTests(unittest.TestCase): 396 def assert_toolchain_found(self, abi): 397 global ARCH 398 ARCH = abi 399 FindToolchain() # Will throw on failure. 400 401 def test_toolchains_found(self): 402 self.assert_toolchain_found("arm") 403 self.assert_toolchain_found("arm64") 404 self.assert_toolchain_found("mips") 405 self.assert_toolchain_found("x86") 406 self.assert_toolchain_found("x86_64") 407 408class SetArchTests(unittest.TestCase): 409 def test_abi_check(self): 410 global ARCH 411 412 SetAbi(["ABI: 'arm'"]) 413 self.assertEqual(ARCH, "arm") 414 SetAbi(["ABI: 'arm64'"]) 415 self.assertEqual(ARCH, "arm64") 416 417 SetAbi(["ABI: 'mips'"]) 418 self.assertEqual(ARCH, "mips") 419 SetAbi(["ABI: 'mips64'"]) 420 self.assertEqual(ARCH, "mips64") 421 422 SetAbi(["ABI: 'x86'"]) 423 self.assertEqual(ARCH, "x86") 424 SetAbi(["ABI: 'x86_64'"]) 425 self.assertEqual(ARCH, "x86_64") 426 427 def test_32bit_trace_line_toolchain(self): 428 global ARCH 429 430 os.environ.clear() 431 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" 432 SetAbi(["#00 pc 000374e0"]) 433 self.assertEqual(ARCH, "arm") 434 435 os.environ.clear() 436 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin" 437 SetAbi(["#00 pc 000374e0"]) 438 self.assertEqual(ARCH, "mips") 439 440 os.environ.clear() 441 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" 442 SetAbi(["#00 pc 000374e0"]) 443 self.assertEqual(ARCH, "x86") 444 445 def test_32bit_trace_line_toolchain_2nd(self): 446 global ARCH 447 448 os.environ.clear() 449 os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" 450 os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin" 451 SetAbi(["#00 pc 000374e0"]) 452 self.assertEqual(ARCH, "arm") 453 454 os.environ.clear() 455 os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/mips/mips-linux-androideabi-4.9/bin" 456 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin" 457 SetAbi(["#00 pc 000374e0"]) 458 self.assertEqual(ARCH, "mips") 459 460 os.environ.clear() 461 os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin" 462 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin" 463 SetAbi(["#00 pc 000374e0"]) 464 self.assertEqual(ARCH, "x86") 465 466 def test_64bit_trace_line_toolchain(self): 467 global ARCH 468 469 os.environ.clear() 470 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin" 471 SetAbi(["#00 pc 00000000000374e0"]) 472 self.assertEqual(ARCH, "arm64") 473 474 os.environ.clear() 475 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/mips/arm-linux-androideabi-4.9/bin" 476 SetAbi(["#00 pc 00000000000374e0"]) 477 self.assertEqual(ARCH, "mips64") 478 479 os.environ.clear() 480 os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" 481 SetAbi(["#00 pc 00000000000374e0"]) 482 self.assertEqual(ARCH, "x86_64") 483 484 def test_default_abis(self): 485 global ARCH 486 487 os.environ.clear() 488 SetAbi(["#00 pc 000374e0"]) 489 self.assertEqual(ARCH, "arm") 490 SetAbi(["#00 pc 00000000000374e0"]) 491 self.assertEqual(ARCH, "arm64") 492 493 def test_no_abi(self): 494 global ARCH 495 496 self.assertRaisesRegexp(Exception, "Could not determine arch from input", SetAbi, []) 497 498if __name__ == '__main__': 499 unittest.main() 500