1#!/usr/bin/env python3
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""utils.py: export utility functions.
19"""
20
21from __future__ import annotations
22import argparse
23import logging
24import os
25import os.path
26from pathlib import Path
27import re
28import shutil
29import subprocess
30import sys
31import time
32from typing import Dict, Iterator, List, Optional, Set, Union
33
34
35def get_script_dir() -> str:
36    return os.path.dirname(os.path.realpath(__file__))
37
38
39def is_windows() -> bool:
40    return sys.platform == 'win32' or sys.platform == 'cygwin'
41
42
43def is_darwin() -> bool:
44    return sys.platform == 'darwin'
45
46
47def get_platform() -> str:
48    if is_windows():
49        return 'windows'
50    if is_darwin():
51        return 'darwin'
52    return 'linux'
53
54
55def is_python3() -> str:
56    return sys.version_info >= (3, 0)
57
58
59def log_debug(msg: str):
60    logging.debug(msg)
61
62
63def log_info(msg: str):
64    logging.info(msg)
65
66
67def log_warning(msg: str):
68    logging.warning(msg)
69
70
71def log_fatal(msg: str):
72    raise Exception(msg)
73
74
75def log_exit(msg: str):
76    sys.exit(msg)
77
78
79def disable_debug_log():
80    logging.getLogger().setLevel(logging.WARN)
81
82
83def set_log_level(level_name: str):
84    if level_name == 'debug':
85        level = logging.DEBUG
86    elif level_name == 'info':
87        level = logging.INFO
88    elif level_name == 'warning':
89        level = logging.WARNING
90    else:
91        log_fatal('unknown log level: %s' % level_name)
92    logging.getLogger().setLevel(level)
93
94
95def str_to_bytes(str_value: str) -> bytes:
96    if not is_python3():
97        return str_value
98    # In python 3, str are wide strings whereas the C api expects 8 bit strings,
99    # hence we have to convert. For now using utf-8 as the encoding.
100    return str_value.encode('utf-8')
101
102
103def bytes_to_str(bytes_value: Optional[bytes]) -> str:
104    if not bytes_value:
105        return ''
106    if not is_python3():
107        return bytes_value
108    return bytes_value.decode('utf-8')
109
110
111def get_target_binary_path(arch: str, binary_name: str) -> str:
112    if arch == 'aarch64':
113        arch = 'arm64'
114    arch_dir = os.path.join(get_script_dir(), "bin", "android", arch)
115    if not os.path.isdir(arch_dir):
116        log_fatal("can't find arch directory: %s" % arch_dir)
117    binary_path = os.path.join(arch_dir, binary_name)
118    if not os.path.isfile(binary_path):
119        log_fatal("can't find binary: %s" % binary_path)
120    return binary_path
121
122
123def get_host_binary_path(binary_name: str) -> str:
124    dirname = os.path.join(get_script_dir(), 'bin')
125    if is_windows():
126        if binary_name.endswith('.so'):
127            binary_name = binary_name[0:-3] + '.dll'
128        elif '.' not in binary_name:
129            binary_name += '.exe'
130        dirname = os.path.join(dirname, 'windows')
131    elif sys.platform == 'darwin':  # OSX
132        if binary_name.endswith('.so'):
133            binary_name = binary_name[0:-3] + '.dylib'
134        dirname = os.path.join(dirname, 'darwin')
135    else:
136        dirname = os.path.join(dirname, 'linux')
137    dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86')
138    binary_path = os.path.join(dirname, binary_name)
139    if not os.path.isfile(binary_path):
140        log_fatal("can't find binary: %s" % binary_path)
141    return binary_path
142
143
144def is_executable_available(executable: str, option='--help') -> bool:
145    """ Run an executable to see if it exists. """
146    try:
147        subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE,
148                                   stderr=subprocess.PIPE)
149        subproc.communicate()
150        return subproc.returncode == 0
151    except OSError:
152        return False
153
154
155class ToolFinder:
156    """ Find tools in ndk or sdk. """
157    DEFAULT_SDK_PATH = {
158        'darwin': 'Library/Android/sdk',
159        'linux': 'Android/Sdk',
160        'windows': 'AppData/Local/Android/sdk',
161    }
162
163    EXPECTED_TOOLS = {
164        'adb': {
165            'is_binutils': False,
166            'test_option': 'version',
167            'path_in_sdk': 'platform-tools/adb',
168        },
169        'llvm-objdump': {
170            'is_binutils': False,
171            'path_in_ndk':
172                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-objdump' % platform,
173        },
174        'llvm-readelf': {
175            'is_binutils': False,
176            'path_in_ndk':
177                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-readelf' % platform,
178        },
179        'llvm-symbolizer': {
180            'is_binutils': False,
181            'path_in_ndk':
182                lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
183        },
184        'objdump': {
185            'is_binutils': True,
186        },
187        'strip': {
188            'is_binutils': True,
189        },
190    }
191
192    @classmethod
193    def find_ndk_and_sdk_paths(cls, ndk_path: Optional[str] = None
194                               ) -> Iterator[Tuple[Optional[str], Optional[str]]]:
195        # Use the given ndk path.
196        if ndk_path and os.path.isdir(ndk_path):
197            ndk_path = os.path.abspath(ndk_path)
198            yield ndk_path, cls.find_sdk_path(ndk_path)
199        # Find ndk in the parent directory containing simpleperf scripts.
200        ndk_path = os.path.dirname(os.path.abspath(get_script_dir()))
201        yield ndk_path, cls.find_sdk_path(ndk_path)
202        # Find ndk in the default sdk installation path.
203        if is_windows():
204            home = os.environ.get('HOMEDRIVE') + os.environ.get('HOMEPATH')
205        else:
206            home = os.environ.get('HOME')
207        if home:
208            platform = get_platform()
209            sdk_path = os.path.join(home, cls.DEFAULT_SDK_PATH[platform].replace('/', os.sep))
210            if os.path.isdir(sdk_path):
211                path = os.path.join(sdk_path, 'ndk')
212                if os.path.isdir(path):
213                    # Android Studio can install multiple ndk versions in 'ndk'.
214                    # Find the newest one.
215                    ndk_version = None
216                    for name in os.listdir(path):
217                        if not ndk_version or ndk_version < name:
218                            ndk_version = name
219                    if ndk_version:
220                        yield os.path.join(path, ndk_version), sdk_path
221            ndk_path = os.path.join(sdk_path, 'ndk-bundle')
222            if os.path.isdir(ndk_path):
223                yield ndk_path, sdk_path
224
225    @classmethod
226    def find_sdk_path(cls, ndk_path: str) -> Optional[str]:
227        path = ndk_path
228        for _ in range(2):
229            path = os.path.dirname(path)
230            if os.path.isdir(os.path.join(path, 'platform-tools')):
231                return path
232        return None
233
234    @classmethod
235    def _get_binutils_path_in_ndk(cls, toolname: str, arch: Optional[str], platform: str
236                                  ) -> Tuple[str, str]:
237        if not arch:
238            arch = 'arm64'
239        if arch == 'arm64':
240            name = 'aarch64-linux-android-' + toolname
241        elif arch == 'arm':
242            name = 'arm-linux-androideabi-' + toolname
243        elif arch == 'x86_64':
244            name = 'x86_64-linux-android-' + toolname
245        elif arch == 'x86':
246            name = 'i686-linux-android-' + toolname
247        else:
248            log_fatal('unexpected arch %s' % arch)
249        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
250        return (name, path)
251
252    @classmethod
253    def find_tool_path(cls, toolname: str, ndk_path: Optional[str] = None,
254                       arch: Optional[str] = None) -> Optional[str]:
255        tool_info = cls.EXPECTED_TOOLS.get(toolname)
256        if not tool_info:
257            return None
258
259        is_binutils = tool_info['is_binutils']
260        test_option = tool_info.get('test_option', '--help')
261        platform = get_platform()
262
263        # Find tool in clang prebuilts in Android platform.
264        if toolname.startswith('llvm-') and platform == 'linux' and get_script_dir().endswith(
265                'system/extras/simpleperf/scripts'):
266            path = str(
267                Path(get_script_dir()).parents[3] / 'prebuilts' / 'clang' / 'host' / 'linux-x86' /
268                'llvm-binutils-stable' / toolname)
269            if is_executable_available(path, test_option):
270                return path
271
272        # Find tool in NDK or SDK.
273        path_in_ndk = None
274        path_in_sdk = None
275        if is_binutils:
276            toolname_with_arch, path_in_ndk = cls._get_binutils_path_in_ndk(
277                toolname, arch, platform)
278        else:
279            toolname_with_arch = toolname
280            if 'path_in_ndk' in tool_info:
281                path_in_ndk = tool_info['path_in_ndk'](platform)
282            elif 'path_in_sdk' in tool_info:
283                path_in_sdk = tool_info['path_in_sdk']
284        if path_in_ndk:
285            path_in_ndk = path_in_ndk.replace('/', os.sep)
286        elif path_in_sdk:
287            path_in_sdk = path_in_sdk.replace('/', os.sep)
288
289        for ndk_dir, sdk_dir in cls.find_ndk_and_sdk_paths(ndk_path):
290            if path_in_ndk and ndk_dir:
291                path = os.path.join(ndk_dir, path_in_ndk)
292                if is_executable_available(path, test_option):
293                    return path
294            elif path_in_sdk and sdk_dir:
295                path = os.path.join(sdk_dir, path_in_sdk)
296                if is_executable_available(path, test_option):
297                    return path
298
299        # Find tool in $PATH.
300        if is_executable_available(toolname_with_arch, test_option):
301            return toolname_with_arch
302
303        # Find tool without arch in $PATH.
304        if is_binutils and tool_info.get('accept_tool_without_arch'):
305            if is_executable_available(toolname, test_option):
306                return toolname
307        return None
308
309
310class AdbHelper(object):
311    def __init__(self, enable_switch_to_root: bool = True):
312        adb_path = ToolFinder.find_tool_path('adb')
313        if not adb_path:
314            log_exit("Can't find adb in PATH environment.")
315        self.adb_path: str = adb_path
316        self.enable_switch_to_root = enable_switch_to_root
317        self.serial_number: Optional[str] = None
318
319    def is_device_available(self) -> bool:
320        return self.run_and_return_output(['shell', 'whoami'])[0]
321
322    def run(self, adb_args: List[str], log_output: bool = False, log_stderr: bool = False) -> bool:
323        return self.run_and_return_output(adb_args, log_output, log_stderr)[0]
324
325    def run_and_return_output(self, adb_args: List[str], log_output: bool = False,
326                              log_stderr: bool = False) -> Tuple[bool, str]:
327        adb_args = [self.adb_path] + adb_args
328        log_debug('run adb cmd: %s' % adb_args)
329        env = None
330        if self.serial_number:
331            env = os.environ.copy()
332            env['ANDROID_SERIAL'] = self.serial_number
333        subproc = subprocess.Popen(
334            adb_args, env=env, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
335        stdout_data, stderr_data = subproc.communicate()
336        stdout_data = bytes_to_str(stdout_data)
337        stderr_data = bytes_to_str(stderr_data)
338        returncode = subproc.returncode
339        result = (returncode == 0)
340        if log_output and stdout_data:
341            log_debug(stdout_data)
342        if log_stderr and stderr_data:
343            log_warning(stderr_data)
344        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
345        return (result, stdout_data)
346
347    def check_run(self, adb_args: List[str], log_output: bool = False):
348        self.check_run_and_return_output(adb_args, log_output)
349
350    def check_run_and_return_output(self, adb_args: List[str], log_output: bool = False,
351                                    log_stderr: bool = False) -> str:
352        result, stdoutdata = self.run_and_return_output(adb_args, log_output, True)
353        if not result:
354            log_exit('run "adb %s" failed: %s' % (adb_args, stdoutdata))
355        return stdoutdata
356
357    def _unroot(self):
358        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
359        if not result:
360            return
361        if 'root' not in stdoutdata:
362            return
363        log_info('unroot adb')
364        self.run(['unroot'])
365        self.run(['wait-for-device'])
366        time.sleep(1)
367
368    def switch_to_root(self) -> bool:
369        if not self.enable_switch_to_root:
370            self._unroot()
371            return False
372        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
373        if not result:
374            return False
375        if 'root' in stdoutdata:
376            return True
377        build_type = self.get_property('ro.build.type')
378        if build_type == 'user':
379            return False
380        self.run(['root'])
381        time.sleep(1)
382        self.run(['wait-for-device'])
383        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
384        return result and 'root' in stdoutdata
385
386    def get_property(self, name: str) -> Optional[str]:
387        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
388        return stdoutdata if result else None
389
390    def set_property(self, name: str, value: str) -> bool:
391        return self.run(['shell', 'setprop', name, value])
392
393    def get_device_arch(self) -> str:
394        output = self.check_run_and_return_output(['shell', 'uname', '-m'])
395        if 'aarch64' in output:
396            return 'arm64'
397        if 'arm' in output:
398            return 'arm'
399        if 'x86_64' in output:
400            return 'x86_64'
401        if '86' in output:
402            return 'x86'
403        log_fatal('unsupported architecture: %s' % output.strip())
404        return ''
405
406    def get_android_version(self) -> int:
407        """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
408        build_version = self.get_property('ro.build.version.release')
409        android_version = 0
410        if build_version:
411            if not build_version[0].isdigit():
412                c = build_version[0].upper()
413                if c.isupper() and c >= 'L':
414                    android_version = ord(c) - ord('L') + 5
415            else:
416                strs = build_version.split('.')
417                if strs:
418                    android_version = int(strs[0])
419        return android_version
420
421
422def flatten_arg_list(arg_list: List[List[str]]) -> List[str]:
423    res = []
424    if arg_list:
425        for items in arg_list:
426            res += items
427    return res
428
429
430def remove(dir_or_file: Union[Path, str]):
431    if os.path.isfile(dir_or_file):
432        os.remove(dir_or_file)
433    elif os.path.isdir(dir_or_file):
434        shutil.rmtree(dir_or_file, ignore_errors=True)
435
436
437def open_report_in_browser(report_path: str):
438    if is_darwin():
439        # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first.
440        try:
441            subprocess.check_call(['open', report_path])
442            return
443        except subprocess.CalledProcessError:
444            pass
445    import webbrowser
446    try:
447        # Try to open the report with Chrome
448        browser = webbrowser.get('google-chrome')
449        browser.open(report_path, new=0, autoraise=True)
450    except webbrowser.Error:
451        # webbrowser.get() doesn't work well on darwin/windows.
452        webbrowser.open_new_tab(report_path)
453
454
455class BinaryFinder:
456    def __init__(self, binary_cache_dir: Optional[Union[Path, str]], readelf: ReadElf):
457        if isinstance(binary_cache_dir, str):
458            binary_cache_dir = Path(binary_cache_dir)
459        self.binary_cache_dir = binary_cache_dir
460        self.readelf = readelf
461        self.build_id_map = self._load_build_id_map()
462
463    def _load_build_id_map(self) -> Dict[str, Path]:
464        build_id_map: Dict[str, Path] = {}
465        if self.binary_cache_dir:
466            build_id_list_file = self.binary_cache_dir / 'build_id_list'
467            if build_id_list_file.is_file():
468                with open(self.binary_cache_dir / 'build_id_list', 'rb') as fh:
469                    for line in fh.readlines():
470                        # lines are in format "<build_id>=<path_in_binary_cache>".
471                        items = bytes_to_str(line).strip().split('=')
472                        if len(items) == 2:
473                            build_id_map[items[0]] = self.binary_cache_dir / items[1]
474        return build_id_map
475
476    def find_binary(self, dso_path_in_record_file: str,
477                    expected_build_id: Optional[str]) -> Optional[Path]:
478        """ If expected_build_id is None, don't check build id.
479            Otherwise, the build id of the found binary should match the expected one."""
480        # Find binary from build id map.
481        if expected_build_id:
482            path = self.build_id_map.get(expected_build_id)
483            if path and self._check_path(path, expected_build_id):
484                return path
485        # Find binary by path in binary cache.
486        if self.binary_cache_dir:
487            path = self.binary_cache_dir / dso_path_in_record_file[1:]
488            if self._check_path(path, expected_build_id):
489                return path
490        # Find binary by its absolute path.
491        path = Path(dso_path_in_record_file)
492        if self._check_path(path, expected_build_id):
493            return path
494        return None
495
496    def _check_path(self, path: Path, expected_build_id: Optional[str]) -> bool:
497        if not self.readelf.is_elf_file(path):
498            return False
499        if expected_build_id is not None:
500            return self.readelf.get_build_id(path) == expected_build_id
501        return True
502
503
504class Addr2Nearestline(object):
505    """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
506        For instructions generated by C++ compilers without a matching statement in source code
507        (like stack corruption check, switch optimization, etc.), addr2line can't generate
508        line information. However, we want to assign the instruction to the nearest line before
509        the instruction (just like objdump -dl). So we use below strategy:
510        Instead of finding the exact line of the instruction in an address, we find the nearest
511        line to the instruction in an address. If an address doesn't have a line info, we find
512        the line info of address - 1. If still no line info, then use address - 2, address - 3,
513        etc.
514
515        The implementation steps are as below:
516        1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the
517        times to call addr2line.
518        2. Convert addrs to (source_file, line) pairs for each dso_path as below:
519          2.1 Check if the dso_path has .debug_line. If not, omit its conversion.
520          2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we
521          change addr each time. For example, since instructions of arm64 are all 4 bytes long,
522          addr_step for arm64 can be 4.
523          2.3 Use addr2line to find line info for each addr in the dso_path.
524          2.4 For each addr without line info, use addr2line to find line info for
525              range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step).
526          2.5 For each addr without line info, use addr2line to find line info for
527              range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step).
528              (128 is a guess number. A nested switch statement in
529               system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.)
530    """
531    class Dso(object):
532        """ Info of a dynamic shared library.
533            addrs: a map from address to Addr object in this dso.
534        """
535
536        def __init__(self, build_id: Optional[str]):
537            self.build_id = build_id
538            self.addrs: Dict[int, Addr2Nearestline.Addr] = {}
539
540    class Addr(object):
541        """ Info of an addr request.
542            func_addr: start_addr of the function containing addr.
543            source_lines: a list of [file_id, line_number] for addr.
544                          source_lines[:-1] are all for inlined functions.
545        """
546
547        def __init__(self, func_addr: int):
548            self.func_addr = func_addr
549            self.source_lines: Optional[List[int, int]] = None
550
551    def __init__(
552            self, ndk_path: Optional[str],
553            binary_finder: BinaryFinder, with_function_name: bool):
554        self.symbolizer_path = ToolFinder.find_tool_path('llvm-symbolizer', ndk_path)
555        if not self.symbolizer_path:
556            log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
557        self.readelf = ReadElf(ndk_path)
558        self.dso_map: Dict[str, Addr2Nearestline.Dso] = {}  # map from dso_path to Dso.
559        self.binary_finder = binary_finder
560        self.with_function_name = with_function_name
561        # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
562        # and provide data structures connecting file id and file name here.
563        self.file_name_to_id: Dict[str, int] = {}
564        self.file_id_to_name: List[str] = []
565        self.func_name_to_id: Dict[str, int] = {}
566        self.func_id_to_name: List[str] = []
567
568    def add_addr(self, dso_path: str, build_id: Optional[str], func_addr: int, addr: int):
569        dso = self.dso_map.get(dso_path)
570        if dso is None:
571            dso = self.dso_map[dso_path] = self.Dso(build_id)
572        if addr not in dso.addrs:
573            dso.addrs[addr] = self.Addr(func_addr)
574
575    def convert_addrs_to_lines(self):
576        for dso_path, dso in self.dso_map.items():
577            self._convert_addrs_in_one_dso(dso_path, dso)
578
579    def _convert_addrs_in_one_dso(self, dso_path: str, dso: Addr2Nearestline.Dso):
580        real_path = self.binary_finder.find_binary(dso_path, dso.build_id)
581        if not real_path:
582            if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
583                log_debug("Can't find dso %s" % dso_path)
584            return
585
586        if not self._check_debug_line_section(real_path):
587            log_debug("file %s doesn't contain .debug_line section." % real_path)
588            return
589
590        addr_step = self._get_addr_step(real_path)
591        self._collect_line_info(dso, real_path, [0])
592        self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step))
593        self._collect_line_info(dso, real_path,
594                                range(-addr_step * 5, -addr_step * 128 - 1, -addr_step))
595
596    def _check_debug_line_section(self, real_path: Path) -> bool:
597        return '.debug_line' in self.readelf.get_sections(real_path)
598
599    def _get_addr_step(self, real_path: Path) -> int:
600        arch = self.readelf.get_arch(real_path)
601        if arch == 'arm64':
602            return 4
603        if arch == 'arm':
604            return 2
605        return 1
606
607    def _collect_line_info(
608            self, dso: Addr2Nearestline.Dso, real_path: Path, addr_shifts: List[int]):
609        """ Use addr2line to get line info in a dso, with given addr shifts. """
610        # 1. Collect addrs to send to addr2line.
611        addr_set: Set[int] = set()
612        for addr in dso.addrs:
613            addr_obj = dso.addrs[addr]
614            if addr_obj.source_lines:  # already has source line, no need to search.
615                continue
616            for shift in addr_shifts:
617                # The addr after shift shouldn't change to another function.
618                shifted_addr = max(addr + shift, addr_obj.func_addr)
619                addr_set.add(shifted_addr)
620                if shifted_addr == addr_obj.func_addr:
621                    break
622        if not addr_set:
623            return
624        addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
625
626        # 2. Use addr2line to collect line info.
627        try:
628            subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
629                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
630            (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
631            stdoutdata = bytes_to_str(stdoutdata)
632        except OSError:
633            return
634        addr_map: Dict[int, List[Tuple[int]]] = {}
635        cur_line_list: Optional[List[Tuple[int]]] = None
636        need_function_name = self.with_function_name
637        cur_function_name: Optional[str] = None
638        for line in stdoutdata.strip().split('\n'):
639            line = line.strip()
640            if not line:
641                continue
642            if line[:2] == '0x':
643                # a new address
644                cur_line_list = addr_map[int(line, 16)] = []
645            elif need_function_name:
646                cur_function_name = line.strip()
647                need_function_name = False
648            else:
649                need_function_name = self.with_function_name
650                if cur_line_list is None:
651                    continue
652                file_path, line_number = self._parse_source_location(line)
653                if not file_path or not line_number:
654                    # An addr can have a list of (file, line), when the addr belongs to an inlined
655                    # function. Sometimes only part of the list has ? mark. In this case, we think
656                    # the line info is valid if the first line doesn't have ? mark.
657                    if not cur_line_list:
658                        cur_line_list = None
659                    continue
660                file_id = self._get_file_id(file_path)
661                if self.with_function_name:
662                    func_id = self._get_func_id(cur_function_name)
663                    cur_line_list.append((file_id, line_number, func_id))
664                else:
665                    cur_line_list.append((file_id, line_number))
666
667        # 3. Fill line info in dso.addrs.
668        for addr in dso.addrs:
669            addr_obj = dso.addrs[addr]
670            if addr_obj.source_lines:
671                continue
672            for shift in addr_shifts:
673                shifted_addr = max(addr + shift, addr_obj.func_addr)
674                lines = addr_map.get(shifted_addr)
675                if lines:
676                    addr_obj.source_lines = lines
677                    break
678                if shifted_addr == addr_obj.func_addr:
679                    break
680
681    def _build_symbolizer_args(self, binary_path: Path) -> List[str]:
682        args = [self.symbolizer_path, '--print-address', '--inlining', '--obj=%s' % binary_path]
683        if self.with_function_name:
684            args += ['--functions=linkage', '--demangle']
685        else:
686            args.append('--functions=none')
687        return args
688
689    def _parse_source_location(self, line: str) -> Tuple[Optional[str], Optional[int]]:
690        file_path, line_number = None, None
691        # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
692        # Filename may contain ':' like "C:\Users\...\file".
693        items = line.rsplit(':', 2)
694        if len(items) == 3:
695            file_path, line_number = items[:2]
696        if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
697            return None, None
698        try:
699            line_number = int(line_number)
700        except ValueError:
701            return None, None
702        return file_path, line_number
703
704    def _get_file_id(self, file_path: str) -> int:
705        file_id = self.file_name_to_id.get(file_path)
706        if file_id is None:
707            file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
708            self.file_id_to_name.append(file_path)
709        return file_id
710
711    def _get_func_id(self, func_name: str) -> int:
712        func_id = self.func_name_to_id.get(func_name)
713        if func_id is None:
714            func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
715            self.func_id_to_name.append(func_name)
716        return func_id
717
718    def get_dso(self, dso_path: str) -> Addr2Nearestline.Dso:
719        return self.dso_map.get(dso_path)
720
721    def get_addr_source(self, dso: Addr2Nearestline.Dso, addr: int) -> Optional[List[Tuple[int]]]:
722        source = dso.addrs[addr].source_lines
723        if source is None:
724            return None
725        if self.with_function_name:
726            return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id])
727                    for (file_id, line, func_id) in source]
728        return [(self.file_id_to_name[file_id], line) for (file_id, line) in source]
729
730
731class SourceFileSearcher(object):
732    """ Find source file paths in the file system.
733        The file paths reported by addr2line are the paths stored in debug sections
734        of shared libraries. And we need to convert them to file paths in the file
735        system. It is done in below steps:
736        1. Collect all file paths under the provided source_dirs. The suffix of a
737           source file should contain one of below:
738            h: for C/C++ header files.
739            c: for C/C++ source files.
740            java: for Java source files.
741            kt: for Kotlin source files.
742        2. Given an abstract_path reported by addr2line, select the best real path
743           as below:
744           2.1 Find all real paths with the same file name as the abstract path.
745           2.2 Select the real path having the longest common suffix with the abstract path.
746    """
747
748    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
749                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
750                        '.java', '.kt'}
751
752    @classmethod
753    def is_source_filename(cls, filename: str) -> bool:
754        ext = os.path.splitext(filename)[1]
755        return ext in cls.SOURCE_FILE_EXTS
756
757    def __init__(self, source_dirs: List[str]):
758        # Map from filename to a list of reversed directory path containing filename.
759        self.filename_to_rparents: Dict[str, List[str]] = {}
760        self._collect_paths(source_dirs)
761
762    def _collect_paths(self, source_dirs: List[str]):
763        for source_dir in source_dirs:
764            for parent, _, file_names in os.walk(source_dir):
765                rparent = None
766                for file_name in file_names:
767                    if self.is_source_filename(file_name):
768                        rparents = self.filename_to_rparents.get(file_name)
769                        if rparents is None:
770                            rparents = self.filename_to_rparents[file_name] = []
771                        if rparent is None:
772                            rparent = parent[::-1]
773                        rparents.append(rparent)
774
775    def get_real_path(self, abstract_path: str) -> Optional[str]:
776        abstract_path = abstract_path.replace('/', os.sep)
777        abstract_parent, file_name = os.path.split(abstract_path)
778        abstract_rparent = abstract_parent[::-1]
779        real_rparents = self.filename_to_rparents.get(file_name)
780        if real_rparents is None:
781            return None
782        best_matched_rparent = None
783        best_common_length = -1
784        for real_rparent in real_rparents:
785            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
786            if length > best_common_length:
787                best_common_length = length
788                best_matched_rparent = real_rparent
789        if best_matched_rparent is None:
790            return None
791        return os.path.join(best_matched_rparent[::-1], file_name)
792
793
794class Objdump(object):
795    """ A wrapper of objdump to disassemble code. """
796
797    def __init__(self, ndk_path: Optional[str], binary_finder: BinaryFinder):
798        self.ndk_path = ndk_path
799        self.binary_finder = binary_finder
800        self.readelf = ReadElf(ndk_path)
801        self.objdump_paths: Dict[str, str] = {}
802
803    def get_dso_info(self, dso_path: str, expected_build_id: Optional[str]
804                     ) -> Optional[Tuple[str, str]]:
805        real_path = self.binary_finder.find_binary(dso_path, expected_build_id)
806        if not real_path:
807            return None
808        arch = self.readelf.get_arch(real_path)
809        if arch == 'unknown':
810            return None
811        return (str(real_path), arch)
812
813    def disassemble_code(self, dso_info, start_addr, addr_len) -> List[Tuple[str, int]]:
814        """ Disassemble [start_addr, start_addr + addr_len] of dso_path.
815            Return a list of pair (disassemble_code_line, addr).
816        """
817        real_path, arch = dso_info
818        objdump_path = self.objdump_paths.get(arch)
819        if not objdump_path:
820            if arch == 'arm':
821                # llvm-objdump for arm is not good at showing branch targets.
822                # So still prefer objdump.
823                objdump_path = ToolFinder.find_tool_path('objdump', self.ndk_path, arch)
824            if not objdump_path:
825                objdump_path = ToolFinder.find_tool_path('llvm-objdump', self.ndk_path, arch)
826            if not objdump_path:
827                log_exit("Can't find llvm-objdump. Please set ndk path with --ndk_path option.")
828            self.objdump_paths[arch] = objdump_path
829
830        # 3. Run objdump.
831        args = [objdump_path, '-dlC', '--no-show-raw-insn',
832                '--start-address=0x%x' % start_addr,
833                '--stop-address=0x%x' % (start_addr + addr_len),
834                real_path]
835        if arch == 'arm' and 'llvm-objdump' in objdump_path:
836            args += ['--print-imm-hex']
837        try:
838            subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
839            (stdoutdata, _) = subproc.communicate()
840            stdoutdata = bytes_to_str(stdoutdata)
841        except OSError:
842            return None
843
844        if not stdoutdata:
845            return None
846        result = []
847        for line in stdoutdata.split('\n'):
848            line = line.rstrip()  # Remove '\r' on Windows.
849            items = line.split(':', 1)
850            try:
851                addr = int(items[0], 16)
852            except ValueError:
853                addr = 0
854            result.append((line, addr))
855        return result
856
857
858class ReadElf(object):
859    """ A wrapper of readelf. """
860
861    def __init__(self, ndk_path: Optional[str]):
862        self.readelf_path = ToolFinder.find_tool_path('llvm-readelf', ndk_path)
863        if not self.readelf_path:
864            log_exit("Can't find llvm-readelf. Please set ndk path with --ndk_path option.")
865
866    @staticmethod
867    def is_elf_file(path: Union[Path, str]) -> bool:
868        if os.path.isfile(path):
869            with open(path, 'rb') as fh:
870                return fh.read(4) == b'\x7fELF'
871        return False
872
873    def get_arch(self, elf_file_path: Union[Path, str]) -> str:
874        """ Get arch of an elf file. """
875        if self.is_elf_file(elf_file_path):
876            try:
877                output = subprocess.check_output([self.readelf_path, '-h', str(elf_file_path)])
878                output = bytes_to_str(output)
879                if output.find('AArch64') != -1:
880                    return 'arm64'
881                if output.find('ARM') != -1:
882                    return 'arm'
883                if output.find('X86-64') != -1:
884                    return 'x86_64'
885                if output.find('80386') != -1:
886                    return 'x86'
887            except subprocess.CalledProcessError:
888                pass
889        return 'unknown'
890
891    def get_build_id(self, elf_file_path: Union[Path, str], with_padding=True) -> str:
892        """ Get build id of an elf file. """
893        if self.is_elf_file(elf_file_path):
894            try:
895                output = subprocess.check_output([self.readelf_path, '-n', str(elf_file_path)])
896                output = bytes_to_str(output)
897                result = re.search(r'Build ID:\s*(\S+)', output)
898                if result:
899                    build_id = result.group(1)
900                    if with_padding:
901                        build_id = self.pad_build_id(build_id)
902                    return build_id
903            except subprocess.CalledProcessError:
904                pass
905        return ""
906
907    @staticmethod
908    def pad_build_id(build_id: str) -> str:
909        """ Pad build id to 40 hex numbers (20 bytes). """
910        if len(build_id) < 40:
911            build_id += '0' * (40 - len(build_id))
912        else:
913            build_id = build_id[:40]
914        return '0x' + build_id
915
916    def get_sections(self, elf_file_path: Union[Path, str]) -> List[str]:
917        """ Get sections of an elf file. """
918        section_names: List[str] = []
919        if self.is_elf_file(elf_file_path):
920            try:
921                output = subprocess.check_output([self.readelf_path, '-SW', str(elf_file_path)])
922                output = bytes_to_str(output)
923                for line in output.split('\n'):
924                    # Parse line like:" [ 1] .note.android.ident NOTE  0000000000400190 ...".
925                    result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line)
926                    if result:
927                        section_name = result.group(1).strip()
928                        if section_name:
929                            section_names.append(section_name)
930            except subprocess.CalledProcessError:
931                pass
932        return section_names
933
934
935def extant_dir(arg: str) -> str:
936    """ArgumentParser type that only accepts extant directories.
937
938    Args:
939        arg: The string argument given on the command line.
940    Returns: The argument as a realpath.
941    Raises:
942        argparse.ArgumentTypeError: The given path isn't a directory.
943    """
944    path = os.path.realpath(arg)
945    if not os.path.isdir(path):
946        raise argparse.ArgumentTypeError('{} is not a directory.'.format(path))
947    return path
948
949
950def extant_file(arg: str) -> str:
951    """ArgumentParser type that only accepts extant files.
952
953    Args:
954        arg: The string argument given on the command line.
955    Returns: The argument as a realpath.
956    Raises:
957        argparse.ArgumentTypeError: The given path isn't a file.
958    """
959    path = os.path.realpath(arg)
960    if not os.path.isfile(path):
961        raise argparse.ArgumentTypeError('{} is not a file.'.format(path))
962    return path
963
964
965class ArgParseFormatter(
966        argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter):
967    pass
968
969
970logging.getLogger().setLevel(logging.DEBUG)
971