1#!/usr/bin/env python
2#
3# Copyright (C) 2016 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18"""utils.py: export utility functions.
19"""
20
21from __future__ import print_function
22import argparse
23import logging
24import os
25import os.path
26import re
27import shutil
28import subprocess
29import sys
30import time
31
32def get_script_dir():
33    return os.path.dirname(os.path.realpath(__file__))
34
35def is_windows():
36    return sys.platform == 'win32' or sys.platform == 'cygwin'
37
38def is_darwin():
39    return sys.platform == 'darwin'
40
41def get_platform():
42    if is_windows():
43        return 'windows'
44    if is_darwin():
45        return 'darwin'
46    return 'linux'
47
48def is_python3():
49    return sys.version_info >= (3, 0)
50
51
52def log_debug(msg):
53    logging.debug(msg)
54
55
56def log_info(msg):
57    logging.info(msg)
58
59
60def log_warning(msg):
61    logging.warning(msg)
62
63
64def log_fatal(msg):
65    raise Exception(msg)
66
67def log_exit(msg):
68    sys.exit(msg)
69
70def disable_debug_log():
71    logging.getLogger().setLevel(logging.WARN)
72
73def set_log_level(level_name):
74    if level_name == 'debug':
75        level = logging.DEBUG
76    elif level_name == 'info':
77        level = logging.INFO
78    elif level_name == 'warning':
79        level = logging.WARNING
80    else:
81        log_fatal('unknown log level: %s' % level_name)
82    logging.getLogger().setLevel(level)
83
84def str_to_bytes(str_value):
85    if not is_python3():
86        return str_value
87    # In python 3, str are wide strings whereas the C api expects 8 bit strings,
88    # hence we have to convert. For now using utf-8 as the encoding.
89    return str_value.encode('utf-8')
90
91def bytes_to_str(bytes_value):
92    if not bytes_value:
93        return ''
94    if not is_python3():
95        return bytes_value
96    return bytes_value.decode('utf-8')
97
98def get_target_binary_path(arch, binary_name):
99    if arch == 'aarch64':
100        arch = 'arm64'
101    arch_dir = os.path.join(get_script_dir(), "bin", "android", arch)
102    if not os.path.isdir(arch_dir):
103        log_fatal("can't find arch directory: %s" % arch_dir)
104    binary_path = os.path.join(arch_dir, binary_name)
105    if not os.path.isfile(binary_path):
106        log_fatal("can't find binary: %s" % binary_path)
107    return binary_path
108
109
110def get_host_binary_path(binary_name):
111    dirname = os.path.join(get_script_dir(), 'bin')
112    if is_windows():
113        if binary_name.endswith('.so'):
114            binary_name = binary_name[0:-3] + '.dll'
115        elif '.' not in binary_name:
116            binary_name += '.exe'
117        dirname = os.path.join(dirname, 'windows')
118    elif sys.platform == 'darwin': # OSX
119        if binary_name.endswith('.so'):
120            binary_name = binary_name[0:-3] + '.dylib'
121        dirname = os.path.join(dirname, 'darwin')
122    else:
123        dirname = os.path.join(dirname, 'linux')
124    dirname = os.path.join(dirname, 'x86_64' if sys.maxsize > 2 ** 32 else 'x86')
125    binary_path = os.path.join(dirname, binary_name)
126    if not os.path.isfile(binary_path):
127        log_fatal("can't find binary: %s" % binary_path)
128    return binary_path
129
130
131def is_executable_available(executable, option='--help'):
132    """ Run an executable to see if it exists. """
133    try:
134        subproc = subprocess.Popen([executable, option], stdout=subprocess.PIPE,
135                                   stderr=subprocess.PIPE)
136        subproc.communicate()
137        return subproc.returncode == 0
138    except OSError:
139        return False
140
141DEFAULT_NDK_PATH = {
142    'darwin': 'Library/Android/sdk/ndk-bundle',
143    'linux': 'Android/Sdk/ndk-bundle',
144    'windows': 'AppData/Local/Android/sdk/ndk-bundle',
145}
146
147EXPECTED_TOOLS = {
148    'adb': {
149        'is_binutils': False,
150        'test_option': 'version',
151        'path_in_ndk': lambda _: '../platform-tools/adb',
152    },
153    'readelf': {
154        'is_binutils': True,
155        'accept_tool_without_arch': True,
156    },
157    'llvm-symbolizer': {
158        'is_binutils': False,
159        'path_in_ndk':
160            lambda platform: 'toolchains/llvm/prebuilt/%s-x86_64/bin/llvm-symbolizer' % platform,
161    },
162    'objdump': {
163        'is_binutils': True,
164    },
165    'strip': {
166        'is_binutils': True,
167    },
168}
169
170def _get_binutils_path_in_ndk(toolname, arch, platform):
171    if not arch:
172        arch = 'arm64'
173    if arch == 'arm64':
174        name = 'aarch64-linux-android-' + toolname
175        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
176    elif arch == 'arm':
177        name = 'arm-linux-androideabi-' + toolname
178        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
179    elif arch == 'x86_64':
180        name = 'x86_64-linux-android-' + toolname
181        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
182    elif arch == 'x86':
183        name = 'i686-linux-android-' + toolname
184        path = 'toolchains/llvm/prebuilt/%s-x86_64/bin/%s' % (platform, name)
185    else:
186        log_fatal('unexpected arch %s' % arch)
187    return (name, path)
188
189def find_tool_path(toolname, ndk_path=None, arch=None):
190    if toolname not in EXPECTED_TOOLS:
191        return None
192    tool_info = EXPECTED_TOOLS[toolname]
193    is_binutils = tool_info['is_binutils']
194    test_option = tool_info.get('test_option', '--help')
195    platform = get_platform()
196    if is_binutils:
197        toolname_with_arch, path_in_ndk = _get_binutils_path_in_ndk(toolname, arch, platform)
198    else:
199        toolname_with_arch = toolname
200        path_in_ndk = tool_info['path_in_ndk'](platform)
201    path_in_ndk = path_in_ndk.replace('/', os.sep)
202
203    # 1. Find tool in the given ndk path.
204    if ndk_path:
205        path = os.path.join(ndk_path, path_in_ndk)
206        if is_executable_available(path, test_option):
207            return path
208
209    # 2. Find tool in the ndk directory containing simpleperf scripts.
210    path = os.path.join('..', path_in_ndk)
211    if is_executable_available(path, test_option):
212        return path
213
214    # 3. Find tool in the default ndk installation path.
215    home = os.environ.get('HOMEPATH') if is_windows() else os.environ.get('HOME')
216    if home:
217        default_ndk_path = os.path.join(home, DEFAULT_NDK_PATH[platform].replace('/', os.sep))
218        path = os.path.join(default_ndk_path, path_in_ndk)
219        if is_executable_available(path, test_option):
220            return path
221
222    # 4. Find tool in $PATH.
223    if is_executable_available(toolname_with_arch, test_option):
224        return toolname_with_arch
225
226    # 5. Find tool without arch in $PATH.
227    if is_binutils and tool_info.get('accept_tool_without_arch'):
228        if is_executable_available(toolname, test_option):
229            return toolname
230    return None
231
232
233class AdbHelper(object):
234    def __init__(self, enable_switch_to_root=True):
235        adb_path = find_tool_path('adb')
236        if not adb_path:
237            log_exit("Can't find adb in PATH environment.")
238        self.adb_path = adb_path
239        self.enable_switch_to_root = enable_switch_to_root
240
241
242    def run(self, adb_args):
243        return self.run_and_return_output(adb_args)[0]
244
245
246    def run_and_return_output(self, adb_args, log_output=True, log_stderr=True):
247        adb_args = [self.adb_path] + adb_args
248        log_debug('run adb cmd: %s' % adb_args)
249        subproc = subprocess.Popen(adb_args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
250        stdout_data, stderr_data = subproc.communicate()
251        stdout_data = bytes_to_str(stdout_data)
252        stderr_data = bytes_to_str(stderr_data)
253        returncode = subproc.returncode
254        result = (returncode == 0)
255        if log_output and stdout_data and adb_args[1] != 'push' and adb_args[1] != 'pull':
256            log_debug(stdout_data)
257        if log_stderr and stderr_data:
258            log_warning(stderr_data)
259        log_debug('run adb cmd: %s  [result %s]' % (adb_args, result))
260        return (result, stdout_data)
261
262    def check_run(self, adb_args):
263        self.check_run_and_return_output(adb_args)
264
265
266    def check_run_and_return_output(self, adb_args, stdout_file=None, log_output=True):
267        result, stdoutdata = self.run_and_return_output(adb_args, stdout_file, log_output)
268        if not result:
269            log_exit('run "adb %s" failed' % adb_args)
270        return stdoutdata
271
272
273    def _unroot(self):
274        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
275        if not result:
276            return
277        if 'root' not in stdoutdata:
278            return
279        log_info('unroot adb')
280        self.run(['unroot'])
281        self.run(['wait-for-device'])
282        time.sleep(1)
283
284
285    def switch_to_root(self):
286        if not self.enable_switch_to_root:
287            self._unroot()
288            return False
289        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
290        if not result:
291            return False
292        if 'root' in stdoutdata:
293            return True
294        build_type = self.get_property('ro.build.type')
295        if build_type == 'user':
296            return False
297        self.run(['root'])
298        time.sleep(1)
299        self.run(['wait-for-device'])
300        result, stdoutdata = self.run_and_return_output(['shell', 'whoami'])
301        return result and 'root' in stdoutdata
302
303    def get_property(self, name):
304        result, stdoutdata = self.run_and_return_output(['shell', 'getprop', name])
305        return stdoutdata if result else None
306
307    def set_property(self, name, value):
308        return self.run(['shell', 'setprop', name, value])
309
310
311    def get_device_arch(self):
312        output = self.check_run_and_return_output(['shell', 'uname', '-m'])
313        if 'aarch64' in output:
314            return 'arm64'
315        if 'arm' in output:
316            return 'arm'
317        if 'x86_64' in output:
318            return 'x86_64'
319        if '86' in output:
320            return 'x86'
321        log_fatal('unsupported architecture: %s' % output.strip())
322        return ''
323
324
325    def get_android_version(self):
326        """ Get Android version on device, like 7 is for Android N, 8 is for Android O."""
327        build_version = self.get_property('ro.build.version.release')
328        android_version = 0
329        if build_version:
330            if not build_version[0].isdigit():
331                c = build_version[0].upper()
332                if c.isupper() and c >= 'L':
333                    android_version = ord(c) - ord('L') + 5
334            else:
335                strs = build_version.split('.')
336                if strs:
337                    android_version = int(strs[0])
338        return android_version
339
340
341def flatten_arg_list(arg_list):
342    res = []
343    if arg_list:
344        for items in arg_list:
345            res += items
346    return res
347
348
349def remove(dir_or_file):
350    if os.path.isfile(dir_or_file):
351        os.remove(dir_or_file)
352    elif os.path.isdir(dir_or_file):
353        shutil.rmtree(dir_or_file, ignore_errors=True)
354
355
356def open_report_in_browser(report_path):
357    if is_darwin():
358        # On darwin 10.12.6, webbrowser can't open browser, so try `open` cmd first.
359        try:
360            subprocess.check_call(['open', report_path])
361            return
362        except subprocess.CalledProcessError:
363            pass
364    import webbrowser
365    try:
366        # Try to open the report with Chrome
367        browser = webbrowser.get('google-chrome')
368        browser.open(report_path, new=0, autoraise=True)
369    except webbrowser.Error:
370        # webbrowser.get() doesn't work well on darwin/windows.
371        webbrowser.open_new_tab(report_path)
372
373def is_elf_file(path):
374    if os.path.isfile(path):
375        with open(path, 'rb') as fh:
376            return fh.read(4) == b'\x7fELF'
377    return False
378
379def find_real_dso_path(dso_path_in_record_file, binary_cache_path):
380    """ Given the path of a shared library in perf.data, find its real path in the file system. """
381    if binary_cache_path:
382        tmp_path = os.path.join(binary_cache_path, dso_path_in_record_file[1:])
383        if is_elf_file(tmp_path):
384            return tmp_path
385    if is_elf_file(dso_path_in_record_file):
386        return dso_path_in_record_file
387    return None
388
389
390class Addr2Nearestline(object):
391    """ Use llvm-symbolizer to convert (dso_path, func_addr, addr) to (source_file, line).
392        For instructions generated by C++ compilers without a matching statement in source code
393        (like stack corruption check, switch optimization, etc.), addr2line can't generate
394        line information. However, we want to assign the instruction to the nearest line before
395        the instruction (just like objdump -dl). So we use below strategy:
396        Instead of finding the exact line of the instruction in an address, we find the nearest
397        line to the instruction in an address. If an address doesn't have a line info, we find
398        the line info of address - 1. If still no line info, then use address - 2, address - 3,
399        etc.
400
401        The implementation steps are as below:
402        1. Collect all (dso_path, func_addr, addr) requests before converting. This saves the
403        times to call addr2line.
404        2. Convert addrs to (source_file, line) pairs for each dso_path as below:
405          2.1 Check if the dso_path has .debug_line. If not, omit its conversion.
406          2.2 Get arch of the dso_path, and decide the addr_step for it. addr_step is the step we
407          change addr each time. For example, since instructions of arm64 are all 4 bytes long,
408          addr_step for arm64 can be 4.
409          2.3 Use addr2line to find line info for each addr in the dso_path.
410          2.4 For each addr without line info, use addr2line to find line info for
411              range(addr - addr_step, addr - addr_step * 4 - 1, -addr_step).
412          2.5 For each addr without line info, use addr2line to find line info for
413              range(addr - addr_step * 5, addr - addr_step * 128 - 1, -addr_step).
414              (128 is a guess number. A nested switch statement in
415               system/core/demangle/Demangler.cpp has >300 bytes without line info in arm64.)
416    """
417    class Dso(object):
418        """ Info of a dynamic shared library.
419            addrs: a map from address to Addr object in this dso.
420        """
421        def __init__(self):
422            self.addrs = {}
423
424    class Addr(object):
425        """ Info of an addr request.
426            func_addr: start_addr of the function containing addr.
427            source_lines: a list of [file_id, line_number] for addr.
428                          source_lines[:-1] are all for inlined functions.
429        """
430        def __init__(self, func_addr):
431            self.func_addr = func_addr
432            self.source_lines = None
433
434    def __init__(self, ndk_path, binary_cache_path, with_function_name):
435        self.symbolizer_path = find_tool_path('llvm-symbolizer', ndk_path)
436        if not self.symbolizer_path:
437            log_exit("Can't find llvm-symbolizer. Please set ndk path with --ndk_path option.")
438        self.readelf = ReadElf(ndk_path)
439        self.dso_map = {}  # map from dso_path to Dso.
440        self.binary_cache_path = binary_cache_path
441        self.with_function_name = with_function_name
442        # Saving file names for each addr takes a lot of memory. So we store file ids in Addr,
443        # and provide data structures connecting file id and file name here.
444        self.file_name_to_id = {}
445        self.file_id_to_name = []
446        self.func_name_to_id = {}
447        self.func_id_to_name = []
448
449    def add_addr(self, dso_path, func_addr, addr):
450        dso = self.dso_map.get(dso_path)
451        if dso is None:
452            dso = self.dso_map[dso_path] = self.Dso()
453        if addr not in dso.addrs:
454            dso.addrs[addr] = self.Addr(func_addr)
455
456    def convert_addrs_to_lines(self):
457        for dso_path in self.dso_map:
458            self._convert_addrs_in_one_dso(dso_path, self.dso_map[dso_path])
459
460    def _convert_addrs_in_one_dso(self, dso_path, dso):
461        real_path = find_real_dso_path(dso_path, self.binary_cache_path)
462        if not real_path:
463            if dso_path not in ['//anon', 'unknown', '[kernel.kallsyms]']:
464                log_debug("Can't find dso %s" % dso_path)
465            return
466
467        if not self._check_debug_line_section(real_path):
468            log_debug("file %s doesn't contain .debug_line section." % real_path)
469            return
470
471        addr_step = self._get_addr_step(real_path)
472        self._collect_line_info(dso, real_path, [0])
473        self._collect_line_info(dso, real_path, range(-addr_step, -addr_step * 4 - 1, -addr_step))
474        self._collect_line_info(dso, real_path,
475                                range(-addr_step * 5, -addr_step * 128 - 1, -addr_step))
476
477    def _check_debug_line_section(self, real_path):
478        return '.debug_line' in self.readelf.get_sections(real_path)
479
480    def _get_addr_step(self, real_path):
481        arch = self.readelf.get_arch(real_path)
482        if arch == 'arm64':
483            return 4
484        if arch == 'arm':
485            return 2
486        return 1
487
488    def _collect_line_info(self, dso, real_path, addr_shifts):
489        """ Use addr2line to get line info in a dso, with given addr shifts. """
490        # 1. Collect addrs to send to addr2line.
491        addr_set = set()
492        for addr in dso.addrs:
493            addr_obj = dso.addrs[addr]
494            if addr_obj.source_lines:  # already has source line, no need to search.
495                continue
496            for shift in addr_shifts:
497                # The addr after shift shouldn't change to another function.
498                shifted_addr = max(addr + shift, addr_obj.func_addr)
499                addr_set.add(shifted_addr)
500                if shifted_addr == addr_obj.func_addr:
501                    break
502        if not addr_set:
503            return
504        addr_request = '\n'.join(['0x%x' % addr for addr in sorted(addr_set)])
505
506        # 2. Use addr2line to collect line info.
507        try:
508            subproc = subprocess.Popen(self._build_symbolizer_args(real_path),
509                                       stdin=subprocess.PIPE, stdout=subprocess.PIPE)
510            (stdoutdata, _) = subproc.communicate(str_to_bytes(addr_request))
511            stdoutdata = bytes_to_str(stdoutdata)
512        except OSError:
513            return
514        addr_map = {}
515        cur_line_list = None
516        need_function_name = self.with_function_name
517        cur_function_name = None
518        for line in stdoutdata.strip().split('\n'):
519            line = line.strip()
520            if not line:
521                continue
522            if line[:2] == '0x':
523                # a new address
524                cur_line_list = addr_map[int(line, 16)] = []
525            elif need_function_name:
526                cur_function_name = line.strip()
527                need_function_name = False
528            else:
529                need_function_name = self.with_function_name
530                if cur_line_list is None:
531                    continue
532                file_path, line_number = self._parse_source_location(line)
533                if not file_path or not line_number:
534                    # An addr can have a list of (file, line), when the addr belongs to an inlined
535                    # function. Sometimes only part of the list has ? mark. In this case, we think
536                    # the line info is valid if the first line doesn't have ? mark.
537                    if not cur_line_list:
538                        cur_line_list = None
539                    continue
540                file_id = self._get_file_id(file_path)
541                if self.with_function_name:
542                    func_id = self._get_func_id(cur_function_name)
543                    cur_line_list.append((file_id, line_number, func_id))
544                else:
545                    cur_line_list.append((file_id, line_number))
546
547        # 3. Fill line info in dso.addrs.
548        for addr in dso.addrs:
549            addr_obj = dso.addrs[addr]
550            if addr_obj.source_lines:
551                continue
552            for shift in addr_shifts:
553                shifted_addr = max(addr + shift, addr_obj.func_addr)
554                lines = addr_map.get(shifted_addr)
555                if lines:
556                    addr_obj.source_lines = lines
557                    break
558                if shifted_addr == addr_obj.func_addr:
559                    break
560
561    def _build_symbolizer_args(self, binary_path):
562        args = [self.symbolizer_path, '-print-address', '-inlining', '-obj=%s' % binary_path]
563        if self.with_function_name:
564            args += ['-functions=linkage', '-demangle']
565        else:
566            args.append('-functions=none')
567        return args
568
569    def _parse_source_location(self, line):
570        file_path, line_number = None, None
571        # Handle lines in format filename:line:column, like "runtest/two_functions.cpp:14:25".
572        # Filename may contain ':' like "C:\Users\...\file".
573        items = line.rsplit(':', 2)
574        if len(items) == 3:
575            file_path, line_number = items[:2]
576        if not file_path or ('?' in file_path) or not line_number or ('?' in line_number):
577            return None, None
578        try:
579            line_number = int(line_number)
580        except ValueError:
581            return None, None
582        return file_path, line_number
583
584    def _get_file_id(self, file_path):
585        file_id = self.file_name_to_id.get(file_path)
586        if file_id is None:
587            file_id = self.file_name_to_id[file_path] = len(self.file_id_to_name)
588            self.file_id_to_name.append(file_path)
589        return file_id
590
591    def _get_func_id(self, func_name):
592        func_id = self.func_name_to_id.get(func_name)
593        if func_id is None:
594            func_id = self.func_name_to_id[func_name] = len(self.func_id_to_name)
595            self.func_id_to_name.append(func_name)
596        return func_id
597
598    def get_dso(self, dso_path):
599        return self.dso_map.get(dso_path)
600
601    def get_addr_source(self, dso, addr):
602        source = dso.addrs[addr].source_lines
603        if source is None:
604            return None
605        if self.with_function_name:
606            return [(self.file_id_to_name[file_id], line, self.func_id_to_name[func_id])
607                    for (file_id, line, func_id) in source]
608        return [(self.file_id_to_name[file_id], line) for (file_id, line) in source]
609
610
611class SourceFileSearcher(object):
612    """ Find source file paths in the file system.
613        The file paths reported by addr2line are the paths stored in debug sections
614        of shared libraries. And we need to convert them to file paths in the file
615        system. It is done in below steps:
616        1. Collect all file paths under the provided source_dirs. The suffix of a
617           source file should contain one of below:
618            h: for C/C++ header files.
619            c: for C/C++ source files.
620            java: for Java source files.
621            kt: for Kotlin source files.
622        2. Given an abstract_path reported by addr2line, select the best real path
623           as below:
624           2.1 Find all real paths with the same file name as the abstract path.
625           2.2 Select the real path having the longest common suffix with the abstract path.
626    """
627
628    SOURCE_FILE_EXTS = {'.h', '.hh', '.H', '.hxx', '.hpp', '.h++',
629                        '.c', '.cc', '.C', '.cxx', '.cpp', '.c++',
630                        '.java', '.kt'}
631
632    @classmethod
633    def is_source_filename(cls, filename):
634        ext = os.path.splitext(filename)[1]
635        return ext in cls.SOURCE_FILE_EXTS
636
637    def __init__(self, source_dirs):
638        # Map from filename to a list of reversed directory path containing filename.
639        self.filename_to_rparents = {}
640        self._collect_paths(source_dirs)
641
642    def _collect_paths(self, source_dirs):
643        for source_dir in source_dirs:
644            for parent, _, file_names in os.walk(source_dir):
645                rparent = None
646                for file_name in file_names:
647                    if self.is_source_filename(file_name):
648                        rparents = self.filename_to_rparents.get(file_name)
649                        if rparents is None:
650                            rparents = self.filename_to_rparents[file_name] = []
651                        if rparent is None:
652                            rparent = parent[::-1]
653                        rparents.append(rparent)
654
655    def get_real_path(self, abstract_path):
656        abstract_path = abstract_path.replace('/', os.sep)
657        abstract_parent, file_name = os.path.split(abstract_path)
658        abstract_rparent = abstract_parent[::-1]
659        real_rparents = self.filename_to_rparents.get(file_name)
660        if real_rparents is None:
661            return None
662        best_matched_rparent = None
663        best_common_length = -1
664        for real_rparent in real_rparents:
665            length = len(os.path.commonprefix((real_rparent, abstract_rparent)))
666            if length > best_common_length:
667                best_common_length = length
668                best_matched_rparent = real_rparent
669        if best_matched_rparent is None:
670            return None
671        return os.path.join(best_matched_rparent[::-1], file_name)
672
673
674class Objdump(object):
675    """ A wrapper of objdump to disassemble code. """
676    def __init__(self, ndk_path, binary_cache_path):
677        self.ndk_path = ndk_path
678        self.binary_cache_path = binary_cache_path
679        self.readelf = ReadElf(ndk_path)
680        self.objdump_paths = {}
681
682    def get_dso_info(self, dso_path):
683        real_path = find_real_dso_path(dso_path, self.binary_cache_path)
684        if not real_path:
685            return None
686        arch = self.readelf.get_arch(real_path)
687        if arch == 'unknown':
688            return None
689        return (real_path, arch)
690
691    def disassemble_code(self, dso_info, start_addr, addr_len):
692        """ Disassemble [start_addr, start_addr + addr_len] of dso_path.
693            Return a list of pair (disassemble_code_line, addr).
694        """
695        real_path, arch = dso_info
696        objdump_path = self.objdump_paths.get(arch)
697        if not objdump_path:
698            objdump_path = find_tool_path('objdump', self.ndk_path, arch)
699            if not objdump_path:
700                log_exit("Can't find objdump. Please set ndk path with --ndk_path option.")
701            self.objdump_paths[arch] = objdump_path
702
703        # 3. Run objdump.
704        args = [objdump_path, '-dlC', '--no-show-raw-insn',
705                '--start-address=0x%x' % start_addr,
706                '--stop-address=0x%x' % (start_addr + addr_len),
707                real_path]
708        try:
709            subproc = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
710            (stdoutdata, _) = subproc.communicate()
711            stdoutdata = bytes_to_str(stdoutdata)
712        except OSError:
713            return None
714
715        if not stdoutdata:
716            return None
717        result = []
718        for line in stdoutdata.split('\n'):
719            line = line.rstrip()  # Remove '\r' on Windows.
720            items = line.split(':', 1)
721            try:
722                addr = int(items[0], 16)
723            except ValueError:
724                addr = 0
725            result.append((line, addr))
726        return result
727
728
729class ReadElf(object):
730    """ A wrapper of readelf. """
731    def __init__(self, ndk_path):
732        self.readelf_path = find_tool_path('readelf', ndk_path)
733        if not self.readelf_path:
734            log_exit("Can't find readelf. Please set ndk path with --ndk_path option.")
735
736    def get_arch(self, elf_file_path):
737        """ Get arch of an elf file. """
738        if is_elf_file(elf_file_path):
739            try:
740                output = subprocess.check_output([self.readelf_path, '-h', elf_file_path])
741                output = bytes_to_str(output)
742                if output.find('AArch64') != -1:
743                    return 'arm64'
744                if output.find('ARM') != -1:
745                    return 'arm'
746                if output.find('X86-64') != -1:
747                    return 'x86_64'
748                if output.find('80386') != -1:
749                    return 'x86'
750            except subprocess.CalledProcessError:
751                pass
752        return 'unknown'
753
754    def get_build_id(self, elf_file_path, with_padding=True):
755        """ Get build id of an elf file. """
756        if is_elf_file(elf_file_path):
757            try:
758                output = subprocess.check_output([self.readelf_path, '-n', elf_file_path])
759                output = bytes_to_str(output)
760                result = re.search(r'Build ID:\s*(\S+)', output)
761                if result:
762                    build_id = result.group(1)
763                    if with_padding:
764                        build_id = self.pad_build_id(build_id)
765                    return build_id
766            except subprocess.CalledProcessError:
767                pass
768        return ""
769
770    @staticmethod
771    def pad_build_id(build_id):
772        """ Pad build id to 40 hex numbers (20 bytes). """
773        if len(build_id) < 40:
774            build_id += '0' * (40 - len(build_id))
775        else:
776            build_id = build_id[:40]
777        return '0x' + build_id
778
779    def get_sections(self, elf_file_path):
780        """ Get sections of an elf file. """
781        section_names = []
782        if is_elf_file(elf_file_path):
783            try:
784                output = subprocess.check_output([self.readelf_path, '-SW', elf_file_path])
785                output = bytes_to_str(output)
786                for line in output.split('\n'):
787                    # Parse line like:" [ 1] .note.android.ident NOTE  0000000000400190 ...".
788                    result = re.search(r'^\s+\[\s*\d+\]\s(.+?)\s', line)
789                    if result:
790                        section_name = result.group(1).strip()
791                        if section_name:
792                            section_names.append(section_name)
793            except subprocess.CalledProcessError:
794                pass
795        return section_names
796
797def extant_dir(arg):
798    """ArgumentParser type that only accepts extant directories.
799
800    Args:
801        arg: The string argument given on the command line.
802    Returns: The argument as a realpath.
803    Raises:
804        argparse.ArgumentTypeError: The given path isn't a directory.
805    """
806    path = os.path.realpath(arg)
807    if not os.path.isdir(path):
808        raise argparse.ArgumentTypeError('{} is not a directory.'.format(path))
809    return path
810
811def extant_file(arg):
812    """ArgumentParser type that only accepts extant files.
813
814    Args:
815        arg: The string argument given on the command line.
816    Returns: The argument as a realpath.
817    Raises:
818        argparse.ArgumentTypeError: The given path isn't a file.
819    """
820    path = os.path.realpath(arg)
821    if not os.path.isfile(path):
822        raise argparse.ArgumentTypeError('{} is not a file.'.format(path))
823    return path
824
825logging.getLogger().setLevel(logging.DEBUG)
826