1# Copyright 2016 Googie Inc.  All rights Reserved.
2#
3# This script is used to help the compiler wrapper in the ChromeOS and
4# Android build systems bisect for bad object files.
5"""Utilities for bisection of ChromeOS and Android object files.
6
7This module contains a set of utilities to allow bisection between
8two sets (good and bad) of object files. Mostly used to find compiler
9bugs.
10
11Reference page:
12https://sites.google.com/a/google.com/chromeos-toolchain-team-home2/home/team-tools-and-scripts/bisecting-chromeos-compiler-problems/bisection-compiler-wrapper
13
14Design doc:
15https://docs.google.com/document/d/1yDgaUIa2O5w6dc3sSTe1ry-1ehKajTGJGQCbyn0fcEM
16"""
17
18from __future__ import print_function
19
20import contextlib
21import fcntl
22import os
23import shutil
24import subprocess
25import sys
26
27VALID_MODES = ('POPULATE_GOOD', 'POPULATE_BAD', 'TRIAGE')
28GOOD_CACHE = 'good'
29BAD_CACHE = 'bad'
30LIST_FILE = os.path.join(GOOD_CACHE, '_LIST')
31
32CONTINUE_ON_MISSING = os.environ.get('BISECT_CONTINUE_ON_MISSING', None) == '1'
33WRAPPER_SAFE_MODE = os.environ.get('BISECT_WRAPPER_SAFE_MODE', None) == '1'
34
35
36class Error(Exception):
37  """The general compiler wrapper error class."""
38
39
40@contextlib.contextmanager
41def lock_file(path, mode):
42  """Lock file and block if other process has lock on file.
43
44  Acquire exclusive lock for file. Only blocks other processes if they attempt
45  to also acquire lock through this method. If only reading (modes 'r' and 'rb')
46  then the lock is shared (i.e. many reads can happen concurrently, but only one
47  process may write at a time).
48
49  This function is a contextmanager, meaning it's meant to be used with the
50  "with" statement in Python. This is so cleanup and setup happens automatically
51  and cleanly. Execution of the outer "with" statement happens at the "yield"
52  statement. Execution resumes after the yield when the outer "with" statement
53  ends.
54
55  Args:
56    path: path to file being locked
57    mode: mode to open file with ('w', 'r', etc.)
58  """
59  with open(path, mode) as f:
60    # Apply FD_CLOEXEC argument to fd. This ensures that the file descriptor
61    # won't be leaked to any child processes.
62    current_args = fcntl.fcntl(f.fileno(), fcntl.F_GETFD)
63    fcntl.fcntl(f.fileno(), fcntl.F_SETFD, current_args | fcntl.FD_CLOEXEC)
64
65    # Reads can share the lock as no race conditions exist. If write is needed,
66    # give writing process exclusive access to the file.
67    if f.mode == 'r' or f.mode == 'rb':
68      lock_type = fcntl.LOCK_SH
69    else:
70      lock_type = fcntl.LOCK_EX
71
72    try:
73      fcntl.lockf(f, lock_type)
74      yield f
75      f.flush()
76    finally:
77      fcntl.lockf(f, fcntl.LOCK_UN)
78
79
80def log_to_file(path, execargs, link_from=None, link_to=None):
81  """Common logging function.
82
83  Log current working directory, current execargs, and a from-to relationship
84  between files.
85  """
86  with lock_file(path, 'a') as log:
87    log.write('cd: %s; %s\n' % (os.getcwd(), ' '.join(execargs)))
88    if link_from and link_to:
89      log.write('%s -> %s\n' % (link_from, link_to))
90
91
92def exec_and_return(execargs):
93  """Execute process and return.
94
95  Execute according to execargs and return immediately. Don't inspect
96  stderr or stdout.
97  """
98  return subprocess.call(execargs)
99
100
101def which_cache(obj_file):
102  """Determine which cache an object belongs to.
103
104  The binary search tool creates two files for each search iteration listing
105  the full set of bad objects and full set of good objects. We use this to
106  determine where an object file should be linked from (good or bad).
107  """
108  bad_set_file = os.environ.get('BISECT_BAD_SET')
109  if in_object_list(obj_file, bad_set_file):
110    return BAD_CACHE
111  else:
112    return GOOD_CACHE
113
114
115def makedirs(path):
116  """Try to create directories in path."""
117  try:
118    os.makedirs(path)
119  except os.error:
120    if not os.path.isdir(path):
121      raise
122
123
124def get_obj_path(execargs):
125  """Get the object path for the object file in the list of arguments.
126
127  Returns:
128    Absolute object path from execution args (-o argument). If no object being
129    outputted, then return empty string. -o argument is checked only if -c is
130    also present.
131  """
132  try:
133    i = execargs.index('-o')
134    _ = execargs.index('-c')
135  except ValueError:
136    return ''
137
138  obj_path = execargs[i + 1]
139  # Ignore args that do not create a file.
140  if obj_path in (
141      '-',
142      '/dev/null',):
143    return ''
144  # Ignore files ending in .tmp.
145  if obj_path.endswith(('.tmp',)):
146    return ''
147  # Ignore configuration files generated by Automake/Autoconf/CMake etc.
148  if (obj_path.endswith('conftest.o') or
149      obj_path.endswith('CMakeFiles/test.o') or
150      obj_path.find('CMakeTmp') != -1 or
151      os.path.abspath(obj_path).find('CMakeTmp') != -1):
152    return ''
153
154  return os.path.abspath(obj_path)
155
156
157def get_dep_path(execargs):
158  """Get the dep file path for the dep file in the list of arguments.
159
160  Returns:
161    Absolute path of dependency file path from execution args (-o argument). If
162    no dependency being outputted then return empty string.
163  """
164  if '-MD' not in execargs and '-MMD' not in execargs:
165    return ''
166
167  # If -MF is given this is the path of the dependency file. Otherwise the
168  # dependency file is the value of -o but with a .d extension
169  if '-MF' in execargs:
170    i = execargs.index('-MF')
171    dep_path = execargs[i + 1]
172    return os.path.abspath(dep_path)
173
174  full_obj_path = get_obj_path(execargs)
175  if not full_obj_path:
176    return ''
177
178  return full_obj_path[:-2] + '.d'
179
180
181def get_dwo_path(execargs):
182  """Get the dwo file path for the dwo file in the list of arguments.
183
184  Returns:
185    Absolute dwo file path from execution args (-gsplit-dwarf argument) If no
186    dwo file being outputted then return empty string.
187  """
188  if '-gsplit-dwarf' not in execargs:
189    return ''
190
191  full_obj_path = get_obj_path(execargs)
192  if not full_obj_path:
193    return ''
194
195  return full_obj_path[:-2] + '.dwo'
196
197
198def in_object_list(obj_name, list_filename):
199  """Check if object file name exist in file with object list."""
200  if not obj_name:
201    return False
202
203  with lock_file(list_filename, 'r') as list_file:
204    for line in list_file:
205      if line.strip() == obj_name:
206        return True
207
208    return False
209
210
211def get_side_effects(execargs):
212  """Determine side effects generated by compiler
213
214  Returns:
215    List of paths of objects that the compiler generates as side effects.
216  """
217  side_effects = []
218
219  # Cache dependency files
220  full_dep_path = get_dep_path(execargs)
221  if full_dep_path:
222    side_effects.append(full_dep_path)
223
224  # Cache dwo files
225  full_dwo_path = get_dwo_path(execargs)
226  if full_dwo_path:
227    side_effects.append(full_dwo_path)
228
229  return side_effects
230
231
232def cache_file(execargs, bisect_dir, cache, abs_file_path):
233  """Cache compiler output file (.o/.d/.dwo).
234
235  Args:
236    execargs: compiler execution arguments.
237    bisect_dir: The directory where bisection caches live.
238    cache: Which cache the file will be cached to (GOOD/BAD).
239    abs_file_path: Absolute path to file being cached.
240  Returns:
241    True if caching was successful, False otherwise.
242  """
243  # os.path.join fails with absolute paths, use + instead
244  bisect_path = os.path.join(bisect_dir, cache) + abs_file_path
245  bisect_path_dir = os.path.dirname(bisect_path)
246  makedirs(bisect_path_dir)
247  pop_log = os.path.join(bisect_dir, cache, '_POPULATE_LOG')
248  log_to_file(pop_log, execargs, abs_file_path, bisect_path)
249
250  try:
251    if os.path.exists(abs_file_path):
252      if os.path.exists(bisect_path):
253        # File exists
254        population_dir = os.path.join(bisect_dir, cache)
255        with lock_file(os.path.join(population_dir, '_DUPS'),
256                       'a') as dup_object_list:
257          dup_object_list.write('%s\n' % abs_file_path)
258        raise Exception(
259            'Trying to cache file %s multiple times.' % abs_file_path)
260
261      shutil.copy2(abs_file_path, bisect_path)
262      # Set cache object to be read-only so later compilations can't
263      # accidentally overwrite it.
264      os.chmod(bisect_path, 0o444)
265      return True
266    else:
267      # File not found (happens when compilation fails but error code is still 0)
268      return False
269  except Exception:
270    print('Could not cache file %s' % abs_file_path, file=sys.stderr)
271    raise
272
273
274def restore_file(bisect_dir, cache, abs_file_path):
275  """Restore file from cache (.o/.d/.dwo).
276
277  Args:
278    bisect_dir: The directory where bisection caches live.
279    cache: Which cache the file will be restored from (GOOD/BAD).
280    abs_file_path: Absolute path to file being restored.
281  """
282  # os.path.join fails with absolute paths, use + instead
283  cached_path = os.path.join(bisect_dir, cache) + abs_file_path
284  if os.path.exists(cached_path):
285    if os.path.exists(abs_file_path):
286      os.remove(abs_file_path)
287    os.link(cached_path, abs_file_path)
288  else:
289    raise Error(('%s is missing from %s cache! Unsure how to proceed. Make '
290                 'will now crash.' % (cache, cached_path)))
291
292
293def bisect_populate(execargs, bisect_dir, population_name):
294  """Add necessary information to the bisect cache for the given execution.
295
296  Extract the necessary information for bisection from the compiler
297  execution arguments and put it into the bisection cache. This
298  includes copying the created object file, adding the object
299  file path to the cache list and keeping a log of the execution.
300
301  Args:
302    execargs: compiler execution arguments.
303    bisect_dir: bisection directory.
304    population_name: name of the cache being populated (good/bad).
305  """
306  retval = exec_and_return(execargs)
307  if retval:
308    return retval
309
310  full_obj_path = get_obj_path(execargs)
311  # This is not a normal compiler call because it doesn't have a -o argument,
312  # or the -o argument has an unusable output file.
313  # It's likely that this compiler call was actually made to invoke the linker,
314  # or as part of a configuratoin test. In this case we want to simply call the
315  # compiler and return.
316  if not full_obj_path:
317    return retval
318
319  # Return if not able to cache the object file
320  if not cache_file(execargs, bisect_dir, population_name, full_obj_path):
321    return retval
322
323  population_dir = os.path.join(bisect_dir, population_name)
324  with lock_file(os.path.join(population_dir, '_LIST'), 'a') as object_list:
325    object_list.write('%s\n' % full_obj_path)
326
327  for side_effect in get_side_effects(execargs):
328    _ = cache_file(execargs, bisect_dir, population_name, side_effect)
329
330  return retval
331
332
333def bisect_triage(execargs, bisect_dir):
334  """Use object object file from appropriate cache (good/bad).
335
336  Given a populated bisection directory, use the object file saved
337  into one of the caches (good/bad) according to what is specified
338  in the good/bad sets. The good/bad sets are generated by the
339  high level binary search tool. Additionally restore any possible
340  side effects of compiler.
341
342  Args:
343    execargs: compiler execution arguments.
344    bisect_dir: populated bisection directory.
345  """
346  full_obj_path = get_obj_path(execargs)
347  obj_list = os.path.join(bisect_dir, LIST_FILE)
348
349  # If the output isn't an object file just call compiler
350  if not full_obj_path:
351    return exec_and_return(execargs)
352
353  # If this isn't a bisected object just call compiler
354  # This shouldn't happen!
355  if not in_object_list(full_obj_path, obj_list):
356    if CONTINUE_ON_MISSING:
357      log_file = os.path.join(bisect_dir, '_MISSING_CACHED_OBJ_LOG')
358      log_to_file(log_file, execargs, '? compiler', full_obj_path)
359      return exec_and_return(execargs)
360    else:
361      raise Error(('%s is missing from cache! To ignore export '
362                   'BISECT_CONTINUE_ON_MISSING=1. See documentation for more '
363                   'details on this option.' % full_obj_path))
364
365  cache = which_cache(full_obj_path)
366
367  # If using safe WRAPPER_SAFE_MODE option call compiler and overwrite the
368  # result from the good/bad cache. This option is safe and covers all compiler
369  # side effects, but is very slow!
370  if WRAPPER_SAFE_MODE:
371    retval = exec_and_return(execargs)
372    if retval:
373      return retval
374    os.remove(full_obj_path)
375    restore_file(bisect_dir, cache, full_obj_path)
376    return retval
377
378  # Generate compiler side effects. Trick Make into thinking compiler was
379  # actually executed.
380  for side_effect in get_side_effects(execargs):
381    restore_file(bisect_dir, cache, side_effect)
382
383  # If generated object file happened to be pruned/cleaned by Make then link it
384  # over from cache again.
385  if not os.path.exists(full_obj_path):
386    restore_file(bisect_dir, cache, full_obj_path)
387
388  return 0
389
390
391def bisect_driver(bisect_stage, bisect_dir, execargs):
392  """Call appropriate bisection stage according to value in bisect_stage."""
393  if bisect_stage == 'POPULATE_GOOD':
394    return bisect_populate(execargs, bisect_dir, GOOD_CACHE)
395  elif bisect_stage == 'POPULATE_BAD':
396    return bisect_populate(execargs, bisect_dir, BAD_CACHE)
397  elif bisect_stage == 'TRIAGE':
398    return bisect_triage(execargs, bisect_dir)
399  else:
400    raise ValueError('wrong value for BISECT_STAGE: %s' % bisect_stage)
401