1#!/usr/bin/env python3
2#
3# Copyright (C) 2019 The Android Open Source Project
4# All rights reserved.
5#
6# Redistribution and use in source and binary forms, with or without
7# modification, are permitted provided that the following conditions
8# are met:
9#  * Redistributions of source code must retain the above copyright
10#    notice, this list of conditions and the following disclaimer.
11#  * Redistributions in binary form must reproduce the above copyright
12#    notice, this list of conditions and the following disclaimer in
13#    the documentation and/or other materials provided with the
14#    distribution.
15#
16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
19# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
20# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
21# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
23# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
24# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
26# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27# SUCH DAMAGE.
28
29# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing:
30#  - each ELF file
31#  - its DT_NEEDED entries
32#  - its defined symbols
33#  - its relocations
34
35import argparse
36import json
37import os
38import re
39import shlex
40import shutil
41import subprocess
42import sys
43import tempfile
44import textwrap
45import typing
46from enum import Enum
47from typing import Any, Set, List, Dict, Optional
48from subprocess import PIPE, DEVNULL
49from pathlib import Path
50
51from common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \
52    SymbolRef, bfs_walk, elf_tree_to_json
53
54
55g_readelf_cache: Dict[str, str] = {}
56g_path_to_soname_cache: Dict[Path, str] = {}
57
58def do_readelf_query(arguments: List[str]) -> List[str]:
59    cmdline = ['llvm-readelf'] + arguments
60    key = repr(cmdline)
61    if key in g_readelf_cache: return g_readelf_cache[key].splitlines()
62    out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode()
63    g_readelf_cache[key] = out
64    return out.splitlines()
65
66
67def get_elf_soname(path: Path) -> str:
68    if path in g_path_to_soname_cache: return g_path_to_soname_cache[path]
69    out = do_readelf_query(['-d', str(path)])
70    for line in out:
71        m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line)
72        if not m: continue
73        result = m.group(1)
74        break
75    else:
76        result = os.path.basename(path)
77    g_path_to_soname_cache[path] = result
78    return result
79
80
81def get_elf_needed(path: Path) -> List[str]:
82    result = []
83    out = do_readelf_query(['-d', str(path)])
84    for line in out:
85        m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line)
86        if not m: continue
87        result.append(m.group(1))
88    return result
89
90
91kSymbolMatcher = re.compile(r'''
92    \s+ (\d+) : \s*                 # number
93    [0-9a-f]+ \s+                   # value
94    [0-9a-f]+ \s+                   # size
95    (FUNC|IFUNC|OBJECT|NOTYPE) \s+  # type
96    (GLOBAL|WEAK) \s+               # bind
97    \w+ \s+                         # vis
98    (\d+|UND) \s+                   # ndx
99    ([\.\w]+)                       # name
100    (?:(@@?)(\w+))?                 # version
101    $
102''', re.VERBOSE)
103
104
105def get_dyn_symbols(path: Path) -> DynSymbols:
106    kind_lookup = {
107        'FUNC': SymKind.Func,
108        'IFUNC': SymKind.Func,
109        'OBJECT': SymKind.Var,
110        'NOTYPE': SymKind.Func,
111    }
112    bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak }
113
114    result = {}
115    out = do_readelf_query(['--dyn-syms', str(path)])
116    for line in out:
117        m = kSymbolMatcher.match(line)
118        if not m:
119            # gLinux currently has a version of llvm-readelf whose output is very different from
120            # the current versions of llvm-readelf (or GNU readelf).
121            if 'Symbol table of .gnu.hash for image:' in line:
122                sys.exit(f'error: obsolete version of llvm-readelf')
123            continue
124
125        num, kind, bind, ndx, name, ver_type, ver_name = m.groups()
126
127        if name == '__cfi_check':
128            # The linker gives an error like:
129            #    CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)"
130            # I am probably breaking some kind of CFI invariant, so strip these out for now.
131            continue
132
133        result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND',
134                                     ver_type, ver_name)
135
136    return result
137
138
139kRelocationMatcher = re.compile(r'''
140    ([0-9a-f]+) \s+     # offset
141    ([0-9a-f]+) \s+     # info
142    (\w+)               # type
143    (?:
144        \s+ [0-9a-f]+ \s+       # symbol value
145        ([\.\w]+)               # symbol name
146        (?: @@? ([\.\w]+) )?    # version
147    )?
148    \b
149''', re.VERBOSE)
150
151
152def scan_relocations(path: Path, syms: DynSymbols) -> Relocations:
153    result: Relocations = Relocations()
154    out = do_readelf_query(['-r', str(path)])
155    for line in out:
156        m = kRelocationMatcher.match(line)
157        if not m: continue
158
159        offset_str, info_str, reloc_name, sym_name, ver = m.groups()
160
161        if len(offset_str) == 8:
162            offset = int(offset_str, 16) // 4
163            sym_idx = int(info_str, 16) >> 8
164        elif len(offset_str) == 16:
165            offset = int(offset_str, 16) // 8
166            sym_idx = int(info_str, 16) >> 32
167        else:
168            sys.exit(f'error: invalid offset length: {repr(offset_str)}')
169
170        # TODO: R_ARM_IRELATIVE doesn't work, so skip it.
171        if reloc_name == 'R_ARM_IRELATIVE': continue
172
173        if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']:
174            assert sym_name is None
175            result.relative.append(offset)
176        else:
177            if sym_name is None:
178                sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}')
179
180            is_weak = syms[sym_idx].bind == SymBind.Weak
181            symbol = SymbolRef(sym_name, is_weak, ver)
182
183            if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']:
184                result.jump_slots.append(symbol)
185            elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']:
186                result.got.append(symbol)
187            elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']:
188                result.symbolic.append((offset, symbol))
189            else:
190                sys.exit(f'error: unrecognized reloc {m.groups()} in {path}')
191
192    return result
193
194
195def load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary:
196
197    libraries: Dict[str, LoadedLibrary] = {}
198
199    def find_library(needed: str) -> Optional[LoadedLibrary]:
200        nonlocal libraries
201
202        if needed in libraries: return libraries[needed]
203
204        for candidate_dir in search_path:
205            candidate_path = candidate_dir / needed
206            if candidate_path.exists():
207                return load(candidate_path)
208
209        sys.exit(f'error: missing DT_NEEDED lib {needed}!')
210
211    def load(path: Path) -> LoadedLibrary:
212        nonlocal libraries
213
214        lib = LoadedLibrary()
215        lib.soname = get_elf_soname(path)
216        if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}')
217        libraries[lib.soname] = lib
218
219        lib.syms = get_dyn_symbols(path)
220        lib.rels = scan_relocations(path, lib.syms)
221
222        for needed in get_elf_needed(path):
223            needed_lib = find_library(needed)
224            if needed_lib is not None:
225                lib.needed.append(needed_lib)
226
227        return lib
228
229    return load(path)
230
231
232def main() -> None:
233    parser = argparse.ArgumentParser()
234    parser.add_argument('input', type=str)
235    parser.add_argument('output', type=str)
236    parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[])
237
238    args = parser.parse_args()
239    search_path = [Path(p) for p in args.search_path]
240
241    with open(Path(args.output), 'w') as f:
242        root = load_elf_tree(search_path, Path(args.input))
243        json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2)
244
245
246if __name__ == '__main__':
247    main()
248