1#!/usr/bin/env python
2#
3# Copyright (C) 2019 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""ELF file checker.
18
19This command ensures all undefined symbols in an ELF file can be resolved to
20global (or weak) symbols defined in shared objects specified in DT_NEEDED
21entries.
22"""
23
24from __future__ import print_function
25
26import argparse
27import collections
28import os
29import os.path
30import re
31import struct
32import subprocess
33import sys
34
35
36_ELF_MAGIC = b'\x7fELF'
37
38
39# Known machines
40_EM_386 = 3
41_EM_ARM = 40
42_EM_X86_64 = 62
43_EM_AARCH64 = 183
44
45_KNOWN_MACHINES = {_EM_386, _EM_ARM, _EM_X86_64, _EM_AARCH64}
46
47
48# ELF header struct
49_ELF_HEADER_STRUCT = (
50  ('ei_magic', '4s'),
51  ('ei_class', 'B'),
52  ('ei_data', 'B'),
53  ('ei_version', 'B'),
54  ('ei_osabi', 'B'),
55  ('ei_pad', '8s'),
56  ('e_type', 'H'),
57  ('e_machine', 'H'),
58  ('e_version', 'I'),
59)
60
61_ELF_HEADER_STRUCT_FMT = ''.join(_fmt for _, _fmt in _ELF_HEADER_STRUCT)
62
63
64ELFHeader = collections.namedtuple(
65  'ELFHeader', [_name for _name, _ in _ELF_HEADER_STRUCT])
66
67
68ELF = collections.namedtuple(
69  'ELF',
70  ('dt_soname', 'dt_needed', 'imported', 'exported', 'header'))
71
72
73def _get_os_name():
74  """Get the host OS name."""
75  if sys.platform == 'linux2':
76    return 'linux'
77  if sys.platform == 'darwin':
78    return 'darwin'
79  raise ValueError(sys.platform + ' is not supported')
80
81
82def _get_build_top():
83  """Find the build top of the source tree ($ANDROID_BUILD_TOP)."""
84  prev_path = None
85  curr_path = os.path.abspath(os.getcwd())
86  while prev_path != curr_path:
87    if os.path.exists(os.path.join(curr_path, '.repo')):
88      return curr_path
89    prev_path = curr_path
90    curr_path = os.path.dirname(curr_path)
91  return None
92
93
94def _select_latest_llvm_version(versions):
95  """Select the latest LLVM prebuilts version from a set of versions."""
96  pattern = re.compile('clang-r([0-9]+)([a-z]?)')
97  found_rev = 0
98  found_ver = None
99  for curr_ver in versions:
100    match = pattern.match(curr_ver)
101    if not match:
102      continue
103    curr_rev = int(match.group(1))
104    if not found_ver or curr_rev > found_rev or (
105        curr_rev == found_rev and curr_ver > found_ver):
106      found_rev = curr_rev
107      found_ver = curr_ver
108  return found_ver
109
110
111def _get_latest_llvm_version(llvm_dir):
112  """Find the latest LLVM prebuilts version from `llvm_dir`."""
113  return _select_latest_llvm_version(os.listdir(llvm_dir))
114
115
116def _get_llvm_dir():
117  """Find the path to LLVM prebuilts."""
118  build_top = _get_build_top()
119
120  llvm_prebuilts_base = os.environ.get('LLVM_PREBUILTS_BASE')
121  if not llvm_prebuilts_base:
122    llvm_prebuilts_base = os.path.join('prebuilts', 'clang', 'host')
123
124  llvm_dir = os.path.join(
125    build_top, llvm_prebuilts_base, _get_os_name() + '-x86')
126
127  if not os.path.exists(llvm_dir):
128    return None
129
130  llvm_prebuilts_version = os.environ.get('LLVM_PREBUILTS_VERSION')
131  if not llvm_prebuilts_version:
132    llvm_prebuilts_version = _get_latest_llvm_version(llvm_dir)
133
134  llvm_dir = os.path.join(llvm_dir, llvm_prebuilts_version)
135
136  if not os.path.exists(llvm_dir):
137    return None
138
139  return llvm_dir
140
141
142def _get_llvm_readobj():
143  """Find the path to llvm-readobj executable."""
144  llvm_dir = _get_llvm_dir()
145  llvm_readobj = os.path.join(llvm_dir, 'bin', 'llvm-readobj')
146  return llvm_readobj if os.path.exists(llvm_readobj) else 'llvm-readobj'
147
148
149class ELFError(ValueError):
150  """Generic ELF parse error"""
151  pass
152
153
154class ELFInvalidMagicError(ELFError):
155  """Invalid ELF magic word error"""
156  def __init__(self):
157    super(ELFInvalidMagicError, self).__init__('bad ELF magic')
158
159
160class ELFParser(object):
161  """ELF file parser"""
162
163  @classmethod
164  def _read_elf_header(cls, elf_file_path):
165    """Read the ELF magic word from the beginning of the file."""
166    with open(elf_file_path, 'rb') as elf_file:
167      buf = elf_file.read(struct.calcsize(_ELF_HEADER_STRUCT_FMT))
168      try:
169        return ELFHeader(*struct.unpack(_ELF_HEADER_STRUCT_FMT, buf))
170      except struct.error:
171        return None
172
173
174  @classmethod
175  def open(cls, elf_file_path, llvm_readobj):
176    """Open and parse the ELF file."""
177    # Parse the ELF header to check the magic word.
178    header = cls._read_elf_header(elf_file_path)
179    if not header or header.ei_magic != _ELF_MAGIC:
180      raise ELFInvalidMagicError()
181
182    # Run llvm-readobj and parse the output.
183    return cls._read_llvm_readobj(elf_file_path, header, llvm_readobj)
184
185
186  @classmethod
187  def _find_prefix(cls, pattern, lines_it):
188    """Iterate `lines_it` until finding a string that starts with `pattern`."""
189    for line in lines_it:
190      if line.startswith(pattern):
191        return True
192    return False
193
194
195  @classmethod
196  def _read_llvm_readobj(cls, elf_file_path, header, llvm_readobj):
197    """Run llvm-readobj and parse the output."""
198    proc = subprocess.Popen(
199      [llvm_readobj, '-dynamic-table', '-dyn-symbols', elf_file_path],
200      stdout=subprocess.PIPE, stderr=subprocess.PIPE)
201    out, _ = proc.communicate()
202    lines = out.splitlines()
203    return cls._parse_llvm_readobj(elf_file_path, header, lines)
204
205
206  @classmethod
207  def _parse_llvm_readobj(cls, elf_file_path, header, lines):
208    """Parse the output of llvm-readobj."""
209    lines_it = iter(lines)
210    dt_soname, dt_needed = cls._parse_dynamic_table(elf_file_path, lines_it)
211    imported, exported = cls._parse_dynamic_symbols(lines_it)
212    return ELF(dt_soname, dt_needed, imported, exported, header)
213
214
215  _DYNAMIC_SECTION_START_PATTERN = 'DynamicSection ['
216
217  _DYNAMIC_SECTION_NEEDED_PATTERN = re.compile(
218    '^  0x[0-9a-fA-F]+\\s+NEEDED\\s+Shared library: \\[(.*)\\]$')
219
220  _DYNAMIC_SECTION_SONAME_PATTERN = re.compile(
221    '^  0x[0-9a-fA-F]+\\s+SONAME\\s+Library soname: \\[(.*)\\]$')
222
223  _DYNAMIC_SECTION_END_PATTERN = ']'
224
225
226  @classmethod
227  def _parse_dynamic_table(cls, elf_file_path, lines_it):
228    """Parse the dynamic table section."""
229    dt_soname = os.path.basename(elf_file_path)
230    dt_needed = []
231
232    dynamic = cls._find_prefix(cls._DYNAMIC_SECTION_START_PATTERN, lines_it)
233    if not dynamic:
234      return (dt_soname, dt_needed)
235
236    for line in lines_it:
237      if line == cls._DYNAMIC_SECTION_END_PATTERN:
238        break
239
240      match = cls._DYNAMIC_SECTION_NEEDED_PATTERN.match(line)
241      if match:
242        dt_needed.append(match.group(1))
243        continue
244
245      match = cls._DYNAMIC_SECTION_SONAME_PATTERN.match(line)
246      if match:
247        dt_soname = match.group(1)
248        continue
249
250    return (dt_soname, dt_needed)
251
252
253  _DYNAMIC_SYMBOLS_START_PATTERN = 'DynamicSymbols ['
254  _DYNAMIC_SYMBOLS_END_PATTERN = ']'
255
256  _SYMBOL_ENTRY_START_PATTERN = '  Symbol {'
257  _SYMBOL_ENTRY_PATTERN = re.compile('^    ([A-Za-z0-9_]+): (.*)$')
258  _SYMBOL_ENTRY_PAREN_PATTERN = re.compile(
259    '\\s+\\((?:(?:\\d+)|(?:0x[0-9a-fA-F]+))\\)$')
260  _SYMBOL_ENTRY_END_PATTERN = '  }'
261
262
263  @staticmethod
264  def _parse_symbol_name(name_with_version):
265    """Split `name_with_version` into name and version. This function may split
266    at last occurrence of `@@` or `@`."""
267    pos = name_with_version.rfind('@')
268    if pos == -1:
269      name = name_with_version
270      version = ''
271    else:
272      if pos > 0 and name_with_version[pos - 1] == '@':
273        name = name_with_version[0:pos - 1]
274      else:
275        name = name_with_version[0:pos]
276      version = name_with_version[pos + 1:]
277    return (name, version)
278
279
280  @classmethod
281  def _parse_dynamic_symbols(cls, lines_it):
282    """Parse dynamic symbol table and collect imported and exported symbols."""
283    imported = collections.defaultdict(set)
284    exported = collections.defaultdict(set)
285
286    for symbol in cls._parse_dynamic_symbols_internal(lines_it):
287      name, version = cls._parse_symbol_name(symbol['Name'])
288      if name:
289        if symbol['Section'] == 'Undefined':
290          if symbol['Binding'] != 'Weak':
291            imported[name].add(version)
292        else:
293          if symbol['Binding'] != 'Local':
294            exported[name].add(version)
295
296    # Freeze the returned imported/exported dict.
297    return (dict(imported), dict(exported))
298
299
300  @classmethod
301  def _parse_dynamic_symbols_internal(cls, lines_it):
302    """Parse symbols entries and yield each symbols."""
303
304    if not cls._find_prefix(cls._DYNAMIC_SYMBOLS_START_PATTERN, lines_it):
305      return
306
307    for line in lines_it:
308      if line == cls._DYNAMIC_SYMBOLS_END_PATTERN:
309        return
310
311      if line == cls._SYMBOL_ENTRY_START_PATTERN:
312        symbol = {}
313        continue
314
315      if line == cls._SYMBOL_ENTRY_END_PATTERN:
316        yield symbol
317        symbol = None
318        continue
319
320      match = cls._SYMBOL_ENTRY_PATTERN.match(line)
321      if match:
322        key = match.group(1)
323        value = cls._SYMBOL_ENTRY_PAREN_PATTERN.sub('', match.group(2))
324        symbol[key] = value
325        continue
326
327
328class Checker(object):
329  """ELF file checker that checks DT_SONAME, DT_NEEDED, and symbols."""
330
331  def __init__(self, llvm_readobj):
332    self._file_path = ''
333    self._file_under_test = None
334    self._shared_libs = []
335
336    self._llvm_readobj = llvm_readobj
337
338
339  if sys.stderr.isatty():
340    _ERROR_TAG = '\033[0;1;31merror:\033[m'  # Red error
341    _NOTE_TAG = '\033[0;1;30mnote:\033[m'  # Black note
342  else:
343    _ERROR_TAG = 'error:'  # Red error
344    _NOTE_TAG = 'note:'  # Black note
345
346
347  def _error(self, *args):
348    """Emit an error to stderr."""
349    print(self._file_path + ': ' + self._ERROR_TAG, *args, file=sys.stderr)
350
351
352  def _note(self, *args):
353    """Emit a note to stderr."""
354    print(self._file_path + ': ' + self._NOTE_TAG, *args, file=sys.stderr)
355
356
357  def _load_elf_file(self, path, skip_bad_elf_magic):
358    """Load an ELF file from the `path`."""
359    try:
360      return ELFParser.open(path, self._llvm_readobj)
361    except (IOError, OSError):
362      self._error('Failed to open "{}".'.format(path))
363      sys.exit(2)
364    except ELFInvalidMagicError:
365      if skip_bad_elf_magic:
366        sys.exit(0)
367      else:
368        self._error('File "{}" must have a valid ELF magic word.'.format(path))
369        sys.exit(2)
370    except:
371      self._error('An unknown error occurred while opening "{}".'.format(path))
372      raise
373
374
375  def load_file_under_test(self, path, skip_bad_elf_magic,
376                           skip_unknown_elf_machine):
377    """Load file-under-test (either an executable or a shared lib)."""
378    self._file_path = path
379    self._file_under_test = self._load_elf_file(path, skip_bad_elf_magic)
380
381    if skip_unknown_elf_machine and \
382        self._file_under_test.header.e_machine not in _KNOWN_MACHINES:
383      sys.exit(0)
384
385
386  def load_shared_libs(self, shared_lib_paths):
387    """Load shared libraries."""
388    for path in shared_lib_paths:
389      self._shared_libs.append(self._load_elf_file(path, False))
390
391
392  def check_dt_soname(self, soname):
393    """Check whether DT_SONAME matches installation file name."""
394    if self._file_under_test.dt_soname != soname:
395      self._error('DT_SONAME "{}" must be equal to the file name "{}".'
396                  .format(self._file_under_test.dt_soname, soname))
397      sys.exit(2)
398
399
400  def check_dt_needed(self, system_shared_lib_names):
401    """Check whether all DT_NEEDED entries are specified in the build
402    system."""
403
404    missing_shared_libs = False
405
406    # Collect the DT_SONAMEs from shared libs specified in the build system.
407    specified_sonames = {lib.dt_soname for lib in self._shared_libs}
408
409    # Chech whether all DT_NEEDED entries are specified.
410    for lib in self._file_under_test.dt_needed:
411      if lib not in specified_sonames:
412        self._error('DT_NEEDED "{}" is not specified in shared_libs.'
413                    .format(lib.decode('utf-8')))
414        missing_shared_libs = True
415
416    if missing_shared_libs:
417      dt_needed = sorted(set(self._file_under_test.dt_needed))
418      modules = [re.sub('\\.so$', '', lib) for lib in dt_needed]
419
420      # Remove system shared libraries from the suggestion since they are added
421      # by default.
422      modules = [name for name in modules
423                 if name not in system_shared_lib_names]
424
425      self._note()
426      self._note('Fix suggestions:')
427      self._note(
428        '  Android.bp: shared_libs: [' +
429        ', '.join('"' + module + '"' for module in modules) + '],')
430      self._note(
431        '  Android.mk: LOCAL_SHARED_LIBRARIES := ' + ' '.join(modules))
432
433      self._note()
434      self._note('If the fix above doesn\'t work, bypass this check with:')
435      self._note('  Android.bp: check_elf_files: false,')
436      self._note('  Android.mk: LOCAL_CHECK_ELF_FILES := false')
437
438      sys.exit(2)
439
440
441  @staticmethod
442  def _find_symbol(lib, name, version):
443    """Check whether the symbol name and version matches a definition in
444    lib."""
445    try:
446      lib_sym_vers = lib.exported[name]
447    except KeyError:
448      return False
449    if version == '':  # Symbol version is not requested
450      return True
451    return version in lib_sym_vers
452
453
454  @classmethod
455  def _find_symbol_from_libs(cls, libs, name, version):
456    """Check whether the symbol name and version is defined in one of the
457    shared libraries in libs."""
458    for lib in libs:
459      if cls._find_symbol(lib, name, version):
460        return lib
461    return None
462
463
464  def check_symbols(self):
465    """Check whether all undefined symbols are resolved to a definition."""
466    all_elf_files = [self._file_under_test] + self._shared_libs
467    missing_symbols = []
468    for sym, imported_vers in self._file_under_test.imported.iteritems():
469      for imported_ver in imported_vers:
470        lib = self._find_symbol_from_libs(all_elf_files, sym, imported_ver)
471        if not lib:
472          missing_symbols.append((sym, imported_ver))
473
474    if missing_symbols:
475      for sym, ver in sorted(missing_symbols):
476        sym = sym.decode('utf-8')
477        if ver:
478          sym += '@' + ver.decode('utf-8')
479        self._error('Unresolved symbol: {}'.format(sym))
480
481      self._note()
482      self._note('Some dependencies might be changed, thus the symbol(s) '
483                 'above cannot be resolved.')
484      self._note('Please re-build the prebuilt file: "{}".'
485                 .format(self._file_path))
486
487      self._note()
488      self._note('If this is a new prebuilt file and it is designed to have '
489                 'unresolved symbols, add one of the following properties:')
490      self._note('  Android.bp: allow_undefined_symbols: true,')
491      self._note('  Android.mk: LOCAL_ALLOW_UNDEFINED_SYMBOLS := true')
492
493      sys.exit(2)
494
495
496def _parse_args():
497  """Parse command line options."""
498  parser = argparse.ArgumentParser()
499
500  # Input file
501  parser.add_argument('file',
502                      help='Path to the input file to be checked')
503  parser.add_argument('--soname',
504                      help='Shared object name of the input file')
505
506  # Shared library dependencies
507  parser.add_argument('--shared-lib', action='append', default=[],
508                      help='Path to shared library dependencies')
509
510  # System Shared library names
511  parser.add_argument('--system-shared-lib', action='append', default=[],
512                      help='System shared libraries to be hidden from fix '
513                      'suggestions')
514
515  # Check options
516  parser.add_argument('--skip-bad-elf-magic', action='store_true',
517                      help='Ignore the input file without the ELF magic word')
518  parser.add_argument('--skip-unknown-elf-machine', action='store_true',
519                      help='Ignore the input file with unknown machine ID')
520  parser.add_argument('--allow-undefined-symbols', action='store_true',
521                      help='Ignore unresolved undefined symbols')
522
523  # Other options
524  parser.add_argument('--llvm-readobj',
525                      help='Path to the llvm-readobj executable')
526
527  return parser.parse_args()
528
529
530def main():
531  """Main function"""
532  args = _parse_args()
533
534  llvm_readobj = args.llvm_readobj
535  if not llvm_readobj:
536    llvm_readobj = _get_llvm_readobj()
537
538  # Load ELF files
539  checker = Checker(llvm_readobj)
540  checker.load_file_under_test(
541    args.file, args.skip_bad_elf_magic, args.skip_unknown_elf_machine)
542  checker.load_shared_libs(args.shared_lib)
543
544  # Run checks
545  if args.soname:
546    checker.check_dt_soname(args.soname)
547
548  checker.check_dt_needed(args.system_shared_lib)
549
550  if not args.allow_undefined_symbols:
551    checker.check_symbols()
552
553
554if __name__ == '__main__':
555  main()
556