1#!/usr/bin/env python3 2# 3# Copyright (C) 2019 The Android Open Source Project 4# All rights reserved. 5# 6# Redistribution and use in source and binary forms, with or without 7# modification, are permitted provided that the following conditions 8# are met: 9# * Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# * Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in 13# the documentation and/or other materials provided with the 14# distribution. 15# 16# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 23# OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 24# AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 26# OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27# SUCH DAMAGE. 28 29# Scan an ELF file and its tree of DT_NEEDED ELF files, and dump out a JSON file listing: 30# - each ELF file 31# - its DT_NEEDED entries 32# - its defined symbols 33# - its relocations 34 35import argparse 36import json 37import os 38import re 39import shlex 40import shutil 41import subprocess 42import sys 43import tempfile 44import textwrap 45import typing 46from enum import Enum 47from typing import Any, Set, List, Dict, Optional 48from subprocess import PIPE, DEVNULL 49from pathlib import Path 50 51from common_types import LoadedLibrary, SymBind, SymKind, DynSymbol, DynSymbols, Relocations, \ 52 SymbolRef, bfs_walk, elf_tree_to_json 53 54 55g_readelf_cache: Dict[str, str] = {} 56g_path_to_soname_cache: Dict[Path, str] = {} 57 58def do_readelf_query(arguments: List[str]) -> List[str]: 59 cmdline = ['llvm-readelf'] + arguments 60 key = repr(cmdline) 61 if key in g_readelf_cache: return g_readelf_cache[key].splitlines() 62 out = subprocess.run(cmdline, check=True, stdout=PIPE).stdout.decode() 63 g_readelf_cache[key] = out 64 return out.splitlines() 65 66 67def get_elf_soname(path: Path) -> str: 68 if path in g_path_to_soname_cache: return g_path_to_soname_cache[path] 69 out = do_readelf_query(['-d', str(path)]) 70 for line in out: 71 m = re.search(r'\(SONAME\)\s+Library soname: \[(.+)\]$', line) 72 if not m: continue 73 result = m.group(1) 74 break 75 else: 76 result = os.path.basename(path) 77 g_path_to_soname_cache[path] = result 78 return result 79 80 81def get_elf_needed(path: Path) -> List[str]: 82 result = [] 83 out = do_readelf_query(['-d', str(path)]) 84 for line in out: 85 m = re.search(r'\(NEEDED\)\s+Shared library: \[(.+)\]$', line) 86 if not m: continue 87 result.append(m.group(1)) 88 return result 89 90 91kSymbolMatcher = re.compile(r''' 92 \s+ (\d+) : \s* # number 93 [0-9a-f]+ \s+ # value 94 [0-9a-f]+ \s+ # size 95 (FUNC|IFUNC|OBJECT|NOTYPE) \s+ # type 96 (GLOBAL|WEAK) \s+ # bind 97 \w+ \s+ # vis 98 (\d+|UND) \s+ # ndx 99 ([\.\w]+) # name 100 (?:(@@?)(\w+))? # version 101 $ 102''', re.VERBOSE) 103 104 105def get_dyn_symbols(path: Path) -> DynSymbols: 106 kind_lookup = { 107 'FUNC': SymKind.Func, 108 'IFUNC': SymKind.Func, 109 'OBJECT': SymKind.Var, 110 'NOTYPE': SymKind.Func, 111 } 112 bind_lookup = { 'GLOBAL': SymBind.Global, 'WEAK': SymBind.Weak } 113 114 result = {} 115 out = do_readelf_query(['--dyn-syms', str(path)]) 116 for line in out: 117 m = kSymbolMatcher.match(line) 118 if not m: 119 # gLinux currently has a version of llvm-readelf whose output is very different from 120 # the current versions of llvm-readelf (or GNU readelf). 121 if 'Symbol table of .gnu.hash for image:' in line: 122 sys.exit(f'error: obsolete version of llvm-readelf') 123 continue 124 125 num, kind, bind, ndx, name, ver_type, ver_name = m.groups() 126 127 if name == '__cfi_check': 128 # The linker gives an error like: 129 # CANNOT LINK EXECUTABLE "/data/local/tmp/out-linker-bench/b_libandroid_servers": unaligned __cfi_check in the library "(null)" 130 # I am probably breaking some kind of CFI invariant, so strip these out for now. 131 continue 132 133 result[int(num)] = DynSymbol(name, kind_lookup[kind], bind_lookup[bind], ndx != 'UND', 134 ver_type, ver_name) 135 136 return result 137 138 139kRelocationMatcher = re.compile(r''' 140 ([0-9a-f]+) \s+ # offset 141 ([0-9a-f]+) \s+ # info 142 (\w+) # type 143 (?: 144 \s+ [0-9a-f]+ \s+ # symbol value 145 ([\.\w]+) # symbol name 146 (?: @@? ([\.\w]+) )? # version 147 )? 148 \b 149''', re.VERBOSE) 150 151 152def scan_relocations(path: Path, syms: DynSymbols) -> Relocations: 153 result: Relocations = Relocations() 154 out = do_readelf_query(['-r', str(path)]) 155 for line in out: 156 m = kRelocationMatcher.match(line) 157 if not m: continue 158 159 offset_str, info_str, reloc_name, sym_name, ver = m.groups() 160 161 if len(offset_str) == 8: 162 offset = int(offset_str, 16) // 4 163 sym_idx = int(info_str, 16) >> 8 164 elif len(offset_str) == 16: 165 offset = int(offset_str, 16) // 8 166 sym_idx = int(info_str, 16) >> 32 167 else: 168 sys.exit(f'error: invalid offset length: {repr(offset_str)}') 169 170 # TODO: R_ARM_IRELATIVE doesn't work, so skip it. 171 if reloc_name == 'R_ARM_IRELATIVE': continue 172 173 if reloc_name in ['R_ARM_RELATIVE', 'R_AARCH64_RELATIVE']: 174 assert sym_name is None 175 result.relative.append(offset) 176 else: 177 if sym_name is None: 178 sys.exit(f'error: missing symbol for reloc {m.groups()} in {path}') 179 180 is_weak = syms[sym_idx].bind == SymBind.Weak 181 symbol = SymbolRef(sym_name, is_weak, ver) 182 183 if reloc_name in ['R_ARM_JUMP_SLOT', 'R_AARCH64_JUMP_SLOT']: 184 result.jump_slots.append(symbol) 185 elif reloc_name in ['R_ARM_GLOB_DAT', 'R_AARCH64_GLOB_DAT']: 186 result.got.append(symbol) 187 elif reloc_name in ['R_ARM_ABS32', 'R_AARCH64_ABS64']: 188 result.symbolic.append((offset, symbol)) 189 else: 190 sys.exit(f'error: unrecognized reloc {m.groups()} in {path}') 191 192 return result 193 194 195def load_elf_tree(search_path: List[Path], path: Path) -> LoadedLibrary: 196 197 libraries: Dict[str, LoadedLibrary] = {} 198 199 def find_library(needed: str) -> Optional[LoadedLibrary]: 200 nonlocal libraries 201 202 if needed in libraries: return libraries[needed] 203 204 for candidate_dir in search_path: 205 candidate_path = candidate_dir / needed 206 if candidate_path.exists(): 207 return load(candidate_path) 208 209 sys.exit(f'error: missing DT_NEEDED lib {needed}!') 210 211 def load(path: Path) -> LoadedLibrary: 212 nonlocal libraries 213 214 lib = LoadedLibrary() 215 lib.soname = get_elf_soname(path) 216 if lib.soname in libraries: sys.exit(f'soname already loaded: {lib.soname}') 217 libraries[lib.soname] = lib 218 219 lib.syms = get_dyn_symbols(path) 220 lib.rels = scan_relocations(path, lib.syms) 221 222 for needed in get_elf_needed(path): 223 needed_lib = find_library(needed) 224 if needed_lib is not None: 225 lib.needed.append(needed_lib) 226 227 return lib 228 229 return load(path) 230 231 232def main() -> None: 233 parser = argparse.ArgumentParser() 234 parser.add_argument('input', type=str) 235 parser.add_argument('output', type=str) 236 parser.add_argument('-L', dest='search_path', metavar='PATH', action='append', type=str, default=[]) 237 238 args = parser.parse_args() 239 search_path = [Path(p) for p in args.search_path] 240 241 with open(Path(args.output), 'w') as f: 242 root = load_elf_tree(search_path, Path(args.input)) 243 json.dump(elf_tree_to_json(root), f, sort_keys=True, indent=2) 244 245 246if __name__ == '__main__': 247 main() 248