1#!/usr/bin/env python 2# 3# Copyright (C) 2016 The Android Open Source Project 4# 5# Licensed under the Apache License, Version 2.0 (the "License"); 6# you may not use this file except in compliance with the License. 7# You may obtain a copy of the License at 8# 9# http://www.apache.org/licenses/LICENSE-2.0 10# 11# Unless required by applicable law or agreed to in writing, software 12# distributed under the License is distributed on an "AS IS" BASIS, 13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14# See the License for the specific language governing permissions and 15# limitations under the License. 16# 17 18"""utils.py: export utility functions. 19""" 20 21from __future__ import print_function 22import argparse 23import logging 24import os 25import os.path 26import re 27import shutil 28import subprocess 29import sys 30import time 31 32def get_script_dir(): 33 return os.path.dirname(os.path.realpath(__file__)) 34 35def is_windows(): 36 return sys.platform == 'win32' or sys.platform == 'cygwin' 37 38def is_darwin(): 39 return sys.platform == 'darwin' 40 41def get_platform(): 42 if is_windows(): 43 return 'windows' 44 if is_darwin(): 45 return 'darwin' 46 return 'linux' 47 48def is_python3(): 49 return sys.version_info >= (3, 0) 50 51 52def log_debug(msg): 53 logging.debug(msg) 54 55 56def log_info(msg): 57 logging.info(msg) 58 59 60def log_warning(msg): 61 logging.warning(msg) 62 63 64def log_fatal(msg): 65 raise Exception(msg) 66 67def log_exit(msg): 68 sys.exit(msg) 69 70def disable_debug_log(): 71 logging.getLogger().setLevel(logging.WARN) 72 73def set_log_level(level_name): 74 if level_name == 'debug': 75 level = logging.DEBUG 76 elif level_name == 'info': 77 level = logging.INFO 78 elif level_name == 'warning': 79 level = logging.WARNING 80 else: 81 log_fatal('unknown log level: %s' % level_name) 82 logging.getLogger().setLevel(level) 83 84def str_to_bytes(str_value): 85 if not is_python3(): 86 return str_value 87 # In python 3, str are wide strings whereas the C api expects 8 bit strings, 88 # hence we have to convert. For now using utf-8 as the encoding. 89 return str_value.encode('utf-8') 90 91def bytes_to_str(bytes_value): 92 if not bytes_value: 93 return '' 94 if not is_python3(): 95 return bytes_value 96 return bytes_value.decode('utf-8') 97 98def get_target_binary_path(arch, binary_name): 99 if arch == 'aarch64': 100 arch = 'arm64' 101 arch_dir = os.path.join(get_script_dir(), "bin", "android", arch) 102 if not os.path.isdir(arch_dir): 103 log_fatal("can't find arch directory: %s" % arch_dir) 104 binary_path = os.path.join(arch_dir, binary_name) 105 if not os.path.isfile(binary_path): 106 log_fatal("can't find binary: %s" % binary_path) 107 return binary_path 108 109 110def get_host_binary_path(binary_name): 111 dirname = os.path.join(get_script_dir(), 'bin') 112 if is_windows(): 113 if binary_name.endswith('.so'): 114 binary_name = binary_name[0:-3] + '.dll' 115 elif '.' not in binary_name: 116 binary_name += '.exe' 117 dirname = os.path.join(dirname, 'windows') 118 elif sys.platform == 'darwin': # OSX 119 if binary_name.endswith('.so'): 120 binary_name = binary_name[0:-3] + '.dylib' 121 dirname = os.path.join(dirname, 'darwin') 122 else: 123 dirname = os.path.join(dirname, 'linux') 124 dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86') 125 binary_path = os.path.join(dirname, binary_name) 126 if not os.path.isfile(binary_path): 127 log_fatal("can't find binary: %s" % binary_path) 128 return binary_path 129 130 131def is_executable_available(executable, option='--help'): 132 """ Run an executable to see if it exists. """ 133 try: 134 subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE, 135 stderr=subprocess.PIPE) 136 subproc.communicate() 137 return subproc.returncode == 0 138 except OSError: 139 return False 140 141DEFAULT_NDK_PATH = { 142 'darwin': 'Library/Android/sdk/ndk-bundle', 143 'linux': 'Android/Sdk/ndk-bundle', 144 'windows': 'AppData/Local/Android/sdk/ndk-bundle', 145} 146 147EXPECTED_TOOLS = { 148 'adb': { 149 'is_binutils': False, 150 'test_option': 'version', 151 'path_in_ndk': lambda _: '../platform-tools/adb', 152 }, 153 'readelf': { 154 'is_binutils': True, 155 'accept_tool_without_arch': True, 156 }, 157 'llvm-symbolizer': { 158 'is_binutils': False, 159 'path_in_ndk': 160 lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform, 161 }, 162 'objdump': { 163 'is_binutils': True, 164 }, 165 'strip': { 166 'is_binutils': True, 167 }, 168} 169 170def _get_binutils_path_in_ndk(toolname, arch, platform): 171 if not arch: 172 arch = 'arm64' 173 if arch == 'arm64': 174 name = 'aarch64-linux-android-' + toolname 175 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 176 elif arch == 'arm': 177 name = 'arm-linux-androideabi-' + toolname 178 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 179 elif arch == 'x86_64': 180 name = 'x86_64-linux-android-' + toolname 181 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 182 elif arch == 'x86': 183 name = 'i686-linux-android-' + toolname 184 path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name) 185 else: 186 log_fatal('unexpected arch %s' % arch) 187 return (name, path) 188 189def find_tool_path(toolname, ndk_path=None, arch=None): 190 if toolname not in EXPECTED_TOOLS: 191 return None 192 tool_info = EXPECTED_TOOLS[toolname] 193 is_binutils = tool_info['is_binutils'] 194 test_option = tool_info.get('test_option', '--help') 195 platform = get_platform() 196 if is_binutils: 197 toolname_with_arch, path_in_ndk = _get_binutils_path_in_ndk(toolname, arch, platform) 198 else: 199 toolname_with_arch = toolname 200 path_in_ndk = tool_info['path_in_ndk'](platform) 201 path_in_ndk = path_in_ndk.replace('/', os.sep) 202 203 # 1. Find tool in the given ndk path. 204 if ndk_path: 205 path = os.path.join(ndk_path, path_in_ndk) 206 if is_executable_available(path, test_option): 207 return path 208 209 # 2. Find tool in the ndk directory containing simpleperf scripts. 210 path = os.path.join('..', path_in_ndk) 211 if is_executable_available(path, test_option): 212 return path 213 214 # 3. Find tool in the default ndk installation path. 215 home = os.environ.get('HOMEPATH') if is_windows() else os.environ.get('HOME') 216 if home: 217 default_ndk_path = os.path.join(home, DEFAULT_NDK_PATH[platform].replace('/', os.sep)) 218 path = os.path.join(default_ndk_path, path_in_ndk) 219 if is_executable_available(path, test_option): 220 return path 221 222 # 4. Find tool in $PATH. 223 if is_executable_available(toolname_with_arch, test_option): 224 return toolname_with_arch 225 226 # 5. Find tool without arch in $PATH. 227 if is_binutils and tool_info.get('accept_tool_without_arch'): 228 if is_executable_available(toolname, test_option): 229 return toolname 230 return None 231 232 233class AdbHelper(object): 234 def __init__(self, enable_switch_to_root=True): 235 adb_path = find_tool_path('adb') 236 if not adb_path: 237 log_exit("Can't find adb in PATH environment.") 238 self.adb_path = adb_path 239 self.enable_switch_to_root = enable_switch_to_root 240 241 242 def run(self, adb_args): 243 return self.run_and_return_output(adb_args)[0] 244 245 246 def run_and_return_output(self, adb_args, log_output=True, log_stderr=True): 247 adb_args = [self.adb_path] + adb_args 248 log_debug('run adb cmd: %s' % adb_args) 249 subproc = subprocess.Popen(adb_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) 250 stdout_data, stderr_data = subproc.communicate() 251 stdout_data = bytes_to_str(stdout_data) 252 stderr_data = bytes_to_str(stderr_data) 253 returncode = subproc.returncode 254 result = (returncode == 0) 255 if log_output and stdout_data and adb_args[1] != 'push' and adb_args[1] != 'pull': 256 log_debug(stdout_data) 257 if log_stderr and stderr_data: 258 log_warning(stderr_data) 259 log_debug('run adb cmd: %s [result %s]' % (adb_args, result)) 260 return (result, stdout_data) 261 262 def check_run(self, adb_args): 263 self.check_run_and_return_output(adb_args) 264 265 266 def check_run_and_return_output(self, adb_args, stdout_file=None, log_output=True): 267 result, stdoutdata = self.run_and_return_output(adb_args, stdout_file, log_output) 268 if not result: 269 log_exit('run "adb %s" failed' % adb_args) 270 return stdoutdata 271 272 273 def _unroot(self): 274 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 275 if not result: 276 return 277 if 'root' not in stdoutdata: 278 return 279 log_info('unroot adb') 280 self.run(['unroot']) 281 self.run(['wait-for-device']) 282 time.sleep(1) 283 284 285 def switch_to_root(self): 286 if not self.enable_switch_to_root: 287 self._unroot() 288 return False 289 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 290 if not result: 291 return False 292 if 'root' in stdoutdata: 293 return True 294 build_type = self.get_property('ro.build.type') 295 if build_type == 'user': 296 return False 297 self.run(['root']) 298 time.sleep(1) 299 self.run(['wait-for-device']) 300 result, stdoutdata = self.run_and_return_output(['shell', 'whoami']) 301 return result and 'root' in stdoutdata 302 303 def get_property(self, name): 304 result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name]) 305 return stdoutdata if result else None 306 307 def set_property(self, name, value): 308 return self.run(['shell', 'setprop', name, value]) 309 310 311 def get_device_arch(self): 312 output = self.check_run_and_return_output(['shell', 'uname', '-m']) 313 if 'aarch64' in output: 314 return 'arm64' 315 if 'arm' in output: 316 return 'arm' 317 if 'x86_64' in output: 318 return 'x86_64' 319 if '86' in output: 320 return 'x86' 321 log_fatal('unsupported architecture: %s' % output.strip()) 322 return '' 323 324 325 def get_android_version(self): 326 """ Get Android version on device, like 7 is for Android N, 8 is for Android O.""" 327 build_version = self.get_property('ro.build.version.release') 328 android_version = 0 329 if build_version: 330 if not build_version[0].isdigit(): 331 c = build_version[0].upper() 332 if c.isupper() and c >= 'L': 333 android_version = ord(c) - ord('L') + 5 334 else: 335 strs = build_version.split('.') 336 if strs: 337 android_version = int(strs[0]) 338 return android_version 339 340 341def flatten_arg_list(arg_list): 342 res = [] 343 if arg_list: 344 for items in arg_list: 345 res += items 346 return res 347 348 349def remove(dir_or_file): 350 if os.path.isfile(dir_or_file): 351 os.remove(dir_or_file) 352 elif os.path.isdir(dir_or_file): 353 shutil.rmtree(dir_or_file, ignore_errors=True) 354 355 356def open_report_in_browser(report_path): 357 if is_darwin(): 358 # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first. 359 try: 360 subprocess.check_call(['open', report_path]) 361 return 362 except subprocess.CalledProcessError: 363 pass 364 import webbrowser 365 try: 366 # Try to open the report with Chrome 367 browser = webbrowser.get('google-chrome') 368 browser.open(report_path, new=0, autoraise=True) 369 except webbrowser.Error: 370 # webbrowser.get() doesn't work well on darwin/windows. 371 webbrowser.open_new_tab(report_path) 372 373def is_elf_file(path): 374 if os.path.isfile(path): 375 with open(path, 'rb') as fh: 376 return fh.read(4) == b'\x7fELF' 377 return False 378 379def find_real_dso_path(dso_path_in_record_file, binary_cache_path): 380 """ Given the path of a shared library in perf.data, find its real path in the file system. """ 381 if binary_cache_path: 382 tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:]) 383 if is_elf_file(tmp_path): 384 return tmp_path 385 if is_elf_file(dso_path_in_record_file): 386 return dso_path_in_record_file 387 return None 388 389 390class Addr2Nearestline(object): 391 """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line). 392 For instructions generated by C++ compilers without a matching statement in source code 393 (like stack corruption check, switch optimization, etc.), addr2line can't generate 394 line information. However, we want to assign the instruction to the nearest line before 395 the instruction (just like objdump -dl). So we use below strategy: 396 Instead of finding the exact line of the instruction in an address, we find the nearest 397 line to the instruction in an address. If an address doesn't have a line info, we find 398 the line info of address - 1. If still no line info, then use address - 2, address - 3, 399 etc. 400 401 The implementation steps are as below: 402 1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the 403 times to call addr2line. 404 2. Convert addrs to (source_file, line) pairs for each dso_path as below: 405 2.1 Check if the dso_path has .debug_line. If not, omit its conversion. 406 2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we 407 change addr each time. For example, since instructions of arm64 are all 4 bytes long, 408 addr_step for arm64 can be 4. 409 2.3 Use addr2line to find line info for each addr in the dso_path. 410 2.4 For each addr without line info, use addr2line to find line info for 411 range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step). 412 2.5 For each addr without line info, use addr2line to find line info for 413 range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step). 414 (128 is a guess number. A nested switch statement in 415 system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.) 416 """ 417 class Dso(object): 418 """ Info of a dynamic shared library. 419 addrs: a map from address to Addr object in this dso. 420 """ 421 def __init__(self): 422 self.addrs = {} 423 424 class Addr(object): 425 """ Info of an addr request. 426 func_addr: start_addr of the function containing addr. 427 source_lines: a list of [file_id, line_number] for addr. 428 source_lines[:-1] are all for inlined functions. 429 """ 430 def __init__(self, func_addr): 431 self.func_addr = func_addr 432 self.source_lines = None 433 434 def __init__(self, ndk_path, binary_cache_path, with_function_name): 435 self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path) 436 if not self.symbolizer_path: 437 log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.") 438 self.readelf = ReadElf(ndk_path) 439 self.dso_map = {} # map from dso_path to Dso. 440 self.binary_cache_path = binary_cache_path 441 self.with_function_name = with_function_name 442 # Saving file names for each addr takes a lot of memory. So we store file ids in Addr, 443 # and provide data structures connecting file id and file name here. 444 self.file_name_to_id = {} 445 self.file_id_to_name = [] 446 self.func_name_to_id = {} 447 self.func_id_to_name = [] 448 449 def add_addr(self, dso_path, func_addr, addr): 450 dso = self.dso_map.get(dso_path) 451 if dso is None: 452 dso = self.dso_map[dso_path] = self.Dso() 453 if addr not in dso.addrs: 454 dso.addrs[addr] = self.Addr(func_addr) 455 456 def convert_addrs_to_lines(self): 457 for dso_path in self.dso_map: 458 self._convert_addrs_in_one_dso(dso_path, self.dso_map[dso_path]) 459 460 def _convert_addrs_in_one_dso(self, dso_path, dso): 461 real_path = find_real_dso_path(dso_path, self.binary_cache_path) 462 if not real_path: 463 if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']: 464 log_debug("Can't find dso %s" % dso_path) 465 return 466 467 if not self._check_debug_line_section(real_path): 468 log_debug("file %s doesn't contain .debug_line section." % real_path) 469 return 470 471 addr_step = self._get_addr_step(real_path) 472 self._collect_line_info(dso, real_path, [0]) 473 self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step)) 474 self._collect_line_info(dso, real_path, 475 range(-addr_step * 5, -addr_step * 128 - 1, -addr_step)) 476 477 def _check_debug_line_section(self, real_path): 478 return '.debug_line' in self.readelf.get_sections(real_path) 479 480 def _get_addr_step(self, real_path): 481 arch = self.readelf.get_arch(real_path) 482 if arch == 'arm64': 483 return 4 484 if arch == 'arm': 485 return 2 486 return 1 487 488 def _collect_line_info(self, dso, real_path, addr_shifts): 489 """ Use addr2line to get line info in a dso, with given addr shifts. """ 490 # 1. Collect addrs to send to addr2line. 491 addr_set = set() 492 for addr in dso.addrs: 493 addr_obj = dso.addrs[addr] 494 if addr_obj.source_lines: # already has source line, no need to search. 495 continue 496 for shift in addr_shifts: 497 # The addr after shift shouldn't change to another function. 498 shifted_addr = max(addr + shift, addr_obj.func_addr) 499 addr_set.add(shifted_addr) 500 if shifted_addr == addr_obj.func_addr: 501 break 502 if not addr_set: 503 return 504 addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)]) 505 506 # 2. Use addr2line to collect line info. 507 try: 508 subproc = subprocess.Popen(self._build_symbolizer_args(real_path), 509 stdin=subprocess.PIPE, stdout=subprocess.PIPE) 510 (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request)) 511 stdoutdata = bytes_to_str(stdoutdata) 512 except OSError: 513 return 514 addr_map = {} 515 cur_line_list = None 516 need_function_name = self.with_function_name 517 cur_function_name = None 518 for line in stdoutdata.strip().split('\n'): 519 line = line.strip() 520 if not line: 521 continue 522 if line[:2] == '0x': 523 # a new address 524 cur_line_list = addr_map[int(line, 16)] = [] 525 elif need_function_name: 526 cur_function_name = line.strip() 527 need_function_name = False 528 else: 529 need_function_name = self.with_function_name 530 if cur_line_list is None: 531 continue 532 file_path, line_number = self._parse_source_location(line) 533 if not file_path or not line_number: 534 # An addr can have a list of (file, line), when the addr belongs to an inlined 535 # function. Sometimes only part of the list has ? mark. In this case, we think 536 # the line info is valid if the first line doesn't have ? mark. 537 if not cur_line_list: 538 cur_line_list = None 539 continue 540 file_id = self._get_file_id(file_path) 541 if self.with_function_name: 542 func_id = self._get_func_id(cur_function_name) 543 cur_line_list.append((file_id, line_number, func_id)) 544 else: 545 cur_line_list.append((file_id, line_number)) 546 547 # 3. Fill line info in dso.addrs. 548 for addr in dso.addrs: 549 addr_obj = dso.addrs[addr] 550 if addr_obj.source_lines: 551 continue 552 for shift in addr_shifts: 553 shifted_addr = max(addr + shift, addr_obj.func_addr) 554 lines = addr_map.get(shifted_addr) 555 if lines: 556 addr_obj.source_lines = lines 557 break 558 if shifted_addr == addr_obj.func_addr: 559 break 560 561 def _build_symbolizer_args(self, binary_path): 562 args = [self.symbolizer_path, '-print-address', '-inlining', '-obj=%s' % binary_path] 563 if self.with_function_name: 564 args += ['-functions=linkage', '-demangle'] 565 else: 566 args.append('-functions=none') 567 return args 568 569 def _parse_source_location(self, line): 570 file_path, line_number = None, None 571 # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25". 572 # Filename may contain ':' like "C:\Users\...\file". 573 items = line.rsplit(':', 2) 574 if len(items) == 3: 575 file_path, line_number = items[:2] 576 if not file_path or ('?' in file_path) or not line_number or ('?' in line_number): 577 return None, None 578 try: 579 line_number = int(line_number) 580 except ValueError: 581 return None, None 582 return file_path, line_number 583 584 def _get_file_id(self, file_path): 585 file_id = self.file_name_to_id.get(file_path) 586 if file_id is None: 587 file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name) 588 self.file_id_to_name.append(file_path) 589 return file_id 590 591 def _get_func_id(self, func_name): 592 func_id = self.func_name_to_id.get(func_name) 593 if func_id is None: 594 func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name) 595 self.func_id_to_name.append(func_name) 596 return func_id 597 598 def get_dso(self, dso_path): 599 return self.dso_map.get(dso_path) 600 601 def get_addr_source(self, dso, addr): 602 source = dso.addrs[addr].source_lines 603 if source is None: 604 return None 605 if self.with_function_name: 606 return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id]) 607 for (file_id, line, func_id) in source] 608 return [(self.file_id_to_name[file_id], line) for (file_id, line) in source] 609 610 611class SourceFileSearcher(object): 612 """ Find source file paths in the file system. 613 The file paths reported by addr2line are the paths stored in debug sections 614 of shared libraries. And we need to convert them to file paths in the file 615 system. It is done in below steps: 616 1. Collect all file paths under the provided source_dirs. The suffix of a 617 source file should contain one of below: 618 h: for C/C++ header files. 619 c: for C/C++ source files. 620 java: for Java source files. 621 kt: for Kotlin source files. 622 2. Given an abstract_path reported by addr2line, select the best real path 623 as below: 624 2.1 Find all real paths with the same file name as the abstract path. 625 2.2 Select the real path having the longest common suffix with the abstract path. 626 """ 627 628 SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++', 629 '.c', '.cc', '.C', '.cxx', '.cpp', '.c++', 630 '.java', '.kt'} 631 632 @classmethod 633 def is_source_filename(cls, filename): 634 ext = os.path.splitext(filename)[1] 635 return ext in cls.SOURCE_FILE_EXTS 636 637 def __init__(self, source_dirs): 638 # Map from filename to a list of reversed directory path containing filename. 639 self.filename_to_rparents = {} 640 self._collect_paths(source_dirs) 641 642 def _collect_paths(self, source_dirs): 643 for source_dir in source_dirs: 644 for parent, _, file_names in os.walk(source_dir): 645 rparent = None 646 for file_name in file_names: 647 if self.is_source_filename(file_name): 648 rparents = self.filename_to_rparents.get(file_name) 649 if rparents is None: 650 rparents = self.filename_to_rparents[file_name] = [] 651 if rparent is None: 652 rparent = parent[::-1] 653 rparents.append(rparent) 654 655 def get_real_path(self, abstract_path): 656 abstract_path = abstract_path.replace('/', os.sep) 657 abstract_parent, file_name = os.path.split(abstract_path) 658 abstract_rparent = abstract_parent[::-1] 659 real_rparents = self.filename_to_rparents.get(file_name) 660 if real_rparents is None: 661 return None 662 best_matched_rparent = None 663 best_common_length = -1 664 for real_rparent in real_rparents: 665 length = len(os.path.commonprefix((real_rparent, abstract_rparent))) 666 if length > best_common_length: 667 best_common_length = length 668 best_matched_rparent = real_rparent 669 if best_matched_rparent is None: 670 return None 671 return os.path.join(best_matched_rparent[::-1], file_name) 672 673 674class Objdump(object): 675 """ A wrapper of objdump to disassemble code. """ 676 def __init__(self, ndk_path, binary_cache_path): 677 self.ndk_path = ndk_path 678 self.binary_cache_path = binary_cache_path 679 self.readelf = ReadElf(ndk_path) 680 self.objdump_paths = {} 681 682 def get_dso_info(self, dso_path): 683 real_path = find_real_dso_path(dso_path, self.binary_cache_path) 684 if not real_path: 685 return None 686 arch = self.readelf.get_arch(real_path) 687 if arch == 'unknown': 688 return None 689 return (real_path, arch) 690 691 def disassemble_code(self, dso_info, start_addr, addr_len): 692 """ Disassemble [start_addr, start_addr + addr_len] of dso_path. 693 Return a list of pair (disassemble_code_line, addr). 694 """ 695 real_path, arch = dso_info 696 objdump_path = self.objdump_paths.get(arch) 697 if not objdump_path: 698 objdump_path = find_tool_path('objdump', self.ndk_path, arch) 699 if not objdump_path: 700 log_exit("Can't find objdump. Please set ndk path with --ndk_path option.") 701 self.objdump_paths[arch] = objdump_path 702 703 # 3. Run objdump. 704 args = [objdump_path, '-dlC', '--no-show-raw-insn', 705 '--start-address=0x%x' % start_addr, 706 '--stop-address=0x%x' % (start_addr + addr_len), 707 real_path] 708 try: 709 subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE) 710 (stdoutdata, _) = subproc.communicate() 711 stdoutdata = bytes_to_str(stdoutdata) 712 except OSError: 713 return None 714 715 if not stdoutdata: 716 return None 717 result = [] 718 for line in stdoutdata.split('\n'): 719 line = line.rstrip() # Remove '\r' on Windows. 720 items = line.split(':', 1) 721 try: 722 addr = int(items[0], 16) 723 except ValueError: 724 addr = 0 725 result.append((line, addr)) 726 return result 727 728 729class ReadElf(object): 730 """ A wrapper of readelf. """ 731 def __init__(self, ndk_path): 732 self.readelf_path = find_tool_path('readelf', ndk_path) 733 if not self.readelf_path: 734 log_exit("Can't find readelf. Please set ndk path with --ndk_path option.") 735 736 def get_arch(self, elf_file_path): 737 """ Get arch of an elf file. """ 738 if is_elf_file(elf_file_path): 739 try: 740 output = subprocess.check_output([self.readelf_path, '-h', elf_file_path]) 741 output = bytes_to_str(output) 742 if output.find('AArch64') != -1: 743 return 'arm64' 744 if output.find('ARM') != -1: 745 return 'arm' 746 if output.find('X86-64') != -1: 747 return 'x86_64' 748 if output.find('80386') != -1: 749 return 'x86' 750 except subprocess.CalledProcessError: 751 pass 752 return 'unknown' 753 754 def get_build_id(self, elf_file_path, with_padding=True): 755 """ Get build id of an elf file. """ 756 if is_elf_file(elf_file_path): 757 try: 758 output = subprocess.check_output([self.readelf_path, '-n', elf_file_path]) 759 output = bytes_to_str(output) 760 result = re.search(r'Build ID:\s*(\S+)', output) 761 if result: 762 build_id = result.group(1) 763 if with_padding: 764 build_id = self.pad_build_id(build_id) 765 return build_id 766 except subprocess.CalledProcessError: 767 pass 768 return "" 769 770 @staticmethod 771 def pad_build_id(build_id): 772 """ Pad build id to 40 hex numbers (20 bytes). """ 773 if len(build_id) < 40: 774 build_id += '0' * (40 - len(build_id)) 775 else: 776 build_id = build_id[:40] 777 return '0x' + build_id 778 779 def get_sections(self, elf_file_path): 780 """ Get sections of an elf file. """ 781 section_names = [] 782 if is_elf_file(elf_file_path): 783 try: 784 output = subprocess.check_output([self.readelf_path, '-SW', elf_file_path]) 785 output = bytes_to_str(output) 786 for line in output.split('\n'): 787 # Parse line like:" [ 1] .note.android.ident NOTE 0000000000400190 ...". 788 result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line) 789 if result: 790 section_name = result.group(1).strip() 791 if section_name: 792 section_names.append(section_name) 793 except subprocess.CalledProcessError: 794 pass 795 return section_names 796 797def extant_dir(arg): 798 """ArgumentParser type that only accepts extant directories. 799 800 Args: 801 arg: The string argument given on the command line. 802 Returns: The argument as a realpath. 803 Raises: 804 argparse.ArgumentTypeError: The given path isn't a directory. 805 """ 806 path = os.path.realpath(arg) 807 if not os.path.isdir(path): 808 raise argparse.ArgumentTypeError('{} is not a directory.'.format(path)) 809 return path 810 811def extant_file(arg): 812 """ArgumentParser type that only accepts extant files. 813 814 Args: 815 arg: The string argument given on the command line. 816 Returns: The argument as a realpath. 817 Raises: 818 argparse.ArgumentTypeError: The given path isn't a file. 819 """ 820 path = os.path.realpath(arg) 821 if not os.path.isfile(path): 822 raise argparse.ArgumentTypeError('{} is not a file.'.format(path)) 823 return path 824 825logging.getLogger().setLevel(logging.DEBUG) 826