1#!/usr/bin/env python
2"""Calls C-Reduce to create a minimal reproducer for clang crashes.
3
4Output files:
5  *.reduced.sh -- crash reproducer with minimal arguments
6  *.reduced.cpp -- the reduced file
7  *.test.sh -- interestingness test for C-Reduce
8"""
9
10from __future__ import print_function
11from argparse import ArgumentParser, RawTextHelpFormatter
12import os
13import re
14import stat
15import sys
16import subprocess
17import pipes
18import shlex
19import tempfile
20import shutil
21from distutils.spawn import find_executable
22
23verbose = False
24creduce_cmd = None
25clang_cmd = None
26
27def verbose_print(*args, **kwargs):
28  if verbose:
29    print(*args, **kwargs)
30
31def check_file(fname):
32  fname = os.path.normpath(fname)
33  if not os.path.isfile(fname):
34    sys.exit("ERROR: %s does not exist" % (fname))
35  return fname
36
37def check_cmd(cmd_name, cmd_dir, cmd_path=None):
38  """
39  Returns absolute path to cmd_path if it is given,
40  or absolute path to cmd_dir/cmd_name.
41  """
42  if cmd_path:
43    # Make the path absolute so the creduce test can be run from any directory.
44    cmd_path = os.path.abspath(cmd_path)
45    cmd = find_executable(cmd_path)
46    if cmd:
47      return cmd
48    sys.exit("ERROR: executable `%s` not found" % (cmd_path))
49
50  cmd = find_executable(cmd_name, path=cmd_dir)
51  if cmd:
52    return cmd
53
54  if not cmd_dir:
55    cmd_dir = "$PATH"
56  sys.exit("ERROR: `%s` not found in %s" % (cmd_name, cmd_dir))
57
58def quote_cmd(cmd):
59  return ' '.join(pipes.quote(arg) for arg in cmd)
60
61def write_to_script(text, filename):
62  with open(filename, 'w') as f:
63    f.write(text)
64  os.chmod(filename, os.stat(filename).st_mode | stat.S_IEXEC)
65
66class Reduce(object):
67  def __init__(self, crash_script, file_to_reduce):
68    crash_script_name, crash_script_ext = os.path.splitext(crash_script)
69    file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
70
71    self.testfile = file_reduce_name + '.test.sh'
72    self.crash_script = crash_script_name + '.reduced' + crash_script_ext
73    self.file_to_reduce = file_reduce_name + '.reduced' + file_reduce_ext
74    shutil.copy(file_to_reduce, self.file_to_reduce)
75
76    self.clang = clang_cmd
77    self.clang_args = []
78    self.expected_output = []
79    self.needs_stack_trace = False
80    self.creduce_flags = ["--tidy"]
81
82    self.read_clang_args(crash_script, file_to_reduce)
83    self.read_expected_output()
84
85  def get_crash_cmd(self, cmd=None, args=None, filename=None):
86    if not cmd:
87      cmd = self.clang
88    if not args:
89      args = self.clang_args
90    if not filename:
91      filename = self.file_to_reduce
92
93    return [cmd] + args + [filename]
94
95  def read_clang_args(self, crash_script, filename):
96    print("\nReading arguments from crash script...")
97    with open(crash_script) as f:
98      # Assume clang call is the first non comment line.
99      cmd = []
100      for line in f:
101        if not line.lstrip().startswith('#'):
102          cmd = shlex.split(line)
103          break
104    if not cmd:
105      sys.exit("Could not find command in the crash script.");
106
107    # Remove clang and filename from the command
108    # Assume the last occurrence of the filename is the clang input file
109    del cmd[0]
110    for i in range(len(cmd)-1, -1, -1):
111      if cmd[i] == filename:
112        del cmd[i]
113        break
114    self.clang_args = cmd
115    verbose_print("Clang arguments:", quote_cmd(self.clang_args))
116
117  def read_expected_output(self):
118    print("\nGetting expected crash output...")
119    p = subprocess.Popen(self.get_crash_cmd(),
120                         stdout=subprocess.PIPE,
121                         stderr=subprocess.STDOUT)
122    crash_output, _ = p.communicate()
123    result = []
124
125    # Remove color codes
126    ansi_escape = r'\x1b\[[0-?]*m'
127    crash_output = re.sub(ansi_escape, '', crash_output.decode('utf-8'))
128
129    # Look for specific error messages
130    regexes = [r"Assertion .+ failed", # Linux assert()
131               r"Assertion failed: .+,", # FreeBSD/Mac assert()
132               r"fatal error: error in backend: .+",
133               r"LLVM ERROR: .+",
134               r"UNREACHABLE executed at .+?!",
135               r"LLVM IR generation of declaration '.+'",
136               r"Generating code for declaration '.+'",
137               r"\*\*\* Bad machine code: .+ \*\*\*"]
138    for msg_re in regexes:
139      match = re.search(msg_re, crash_output)
140      if match:
141        msg = match.group(0)
142        result = [msg]
143        print("Found message:", msg)
144        break
145
146    # If no message was found, use the top five stack trace functions,
147    # ignoring some common functions
148    # Five is a somewhat arbitrary number; the goal is to get a small number
149    # of identifying functions with some leeway for common functions
150    if not result:
151      self.needs_stack_trace = True
152      stacktrace_re = r'[0-9]+\s+0[xX][0-9a-fA-F]+\s*([^(]+)\('
153      filters = ["PrintStackTrace", "RunSignalHandlers", "CleanupOnSignal",
154                 "HandleCrash", "SignalHandler", "__restore_rt", "gsignal", "abort"]
155      def skip_function(func_name):
156        return any(name in func_name for name in filters)
157
158      matches = re.findall(stacktrace_re, crash_output)
159      result = [x for x in matches if x and not skip_function(x)][:5]
160      for msg in result:
161        print("Found stack trace function:", msg)
162
163    if not result:
164      print("ERROR: no crash was found")
165      print("The crash output was:\n========\n%s========" % crash_output)
166      sys.exit(1)
167
168    self.expected_output = result
169
170  def check_expected_output(self, args=None, filename=None):
171    if not args:
172      args = self.clang_args
173    if not filename:
174      filename = self.file_to_reduce
175
176    p = subprocess.Popen(self.get_crash_cmd(args=args, filename=filename),
177                         stdout=subprocess.PIPE,
178                         stderr=subprocess.STDOUT)
179    crash_output, _ = p.communicate()
180    return all(msg in crash_output.decode('utf-8') for msg in
181               self.expected_output)
182
183  def write_interestingness_test(self):
184    print("\nCreating the interestingness test...")
185
186    # Disable symbolization if it's not required to avoid slow symbolization.
187    disable_symbolization = ''
188    if not self.needs_stack_trace:
189      disable_symbolization = 'export LLVM_DISABLE_SYMBOLIZATION=1'
190
191    output = """#!/bin/bash
192%s
193if %s >& t.log ; then
194  exit 1
195fi
196""" % (disable_symbolization, quote_cmd(self.get_crash_cmd()))
197
198    for msg in self.expected_output:
199      output += 'grep -F %s t.log || exit 1\n' % pipes.quote(msg)
200
201    write_to_script(output, self.testfile)
202    self.check_interestingness()
203
204  def check_interestingness(self):
205    testfile = os.path.abspath(self.testfile)
206
207    # Check that the test considers the original file interesting
208    with open(os.devnull, 'w') as devnull:
209      returncode = subprocess.call(testfile, stdout=devnull)
210    if returncode:
211      sys.exit("The interestingness test does not pass for the original file.")
212
213    # Check that an empty file is not interesting
214    # Instead of modifying the filename in the test file, just run the command
215    with tempfile.NamedTemporaryFile() as empty_file:
216      is_interesting = self.check_expected_output(filename=empty_file.name)
217    if is_interesting:
218      sys.exit("The interestingness test passes for an empty file.")
219
220  def clang_preprocess(self):
221    print("\nTrying to preprocess the source file...")
222    with tempfile.NamedTemporaryFile() as tmpfile:
223      cmd_preprocess = self.get_crash_cmd() + ['-E', '-o', tmpfile.name]
224      cmd_preprocess_no_lines = cmd_preprocess + ['-P']
225      try:
226        subprocess.check_call(cmd_preprocess_no_lines)
227        if self.check_expected_output(filename=tmpfile.name):
228          print("Successfully preprocessed with line markers removed")
229          shutil.copy(tmpfile.name, self.file_to_reduce)
230        else:
231          subprocess.check_call(cmd_preprocess)
232          if self.check_expected_output(filename=tmpfile.name):
233            print("Successfully preprocessed without removing line markers")
234            shutil.copy(tmpfile.name, self.file_to_reduce)
235          else:
236            print("No longer crashes after preprocessing -- "
237                  "using original source")
238      except subprocess.CalledProcessError:
239        print("Preprocessing failed")
240
241  @staticmethod
242  def filter_args(args, opts_equal=[], opts_startswith=[],
243                  opts_one_arg_startswith=[]):
244    result = []
245    skip_next = False
246    for arg in args:
247      if skip_next:
248        skip_next = False
249        continue
250      if any(arg == a for a in opts_equal):
251        continue
252      if any(arg.startswith(a) for a in opts_startswith):
253        continue
254      if any(arg.startswith(a) for a in opts_one_arg_startswith):
255        skip_next = True
256        continue
257      result.append(arg)
258    return result
259
260  def try_remove_args(self, args, msg=None, extra_arg=None, **kwargs):
261    new_args = self.filter_args(args, **kwargs)
262
263    if extra_arg:
264      if extra_arg in new_args:
265        new_args.remove(extra_arg)
266      new_args.append(extra_arg)
267
268    if (new_args != args and
269        self.check_expected_output(args=new_args)):
270      if msg:
271        verbose_print(msg)
272      return new_args
273    return args
274
275  def try_remove_arg_by_index(self, args, index):
276    new_args = args[:index] + args[index+1:]
277    removed_arg = args[index]
278
279    # Heuristic for grouping arguments:
280    # remove next argument if it doesn't start with "-"
281    if index < len(new_args) and not new_args[index].startswith('-'):
282      del new_args[index]
283      removed_arg += ' ' + args[index+1]
284
285    if self.check_expected_output(args=new_args):
286      verbose_print("Removed", removed_arg)
287      return new_args, index
288    return args, index+1
289
290  def simplify_clang_args(self):
291    """Simplify clang arguments before running C-Reduce to reduce the time the
292    interestingness test takes to run.
293    """
294    print("\nSimplifying the clang command...")
295
296    # Remove some clang arguments to speed up the interestingness test
297    new_args = self.clang_args
298    new_args = self.try_remove_args(new_args,
299                                    msg="Removed debug info options",
300                                    opts_startswith=["-gcodeview",
301                                                     "-debug-info-kind=",
302                                                     "-debugger-tuning="])
303
304    new_args = self.try_remove_args(new_args,
305                                    msg="Removed --show-includes",
306                                    opts_startswith=["--show-includes"])
307    # Not suppressing warnings (-w) sometimes prevents the crash from occurring
308    # after preprocessing
309    new_args = self.try_remove_args(new_args,
310                                    msg="Replaced -W options with -w",
311                                    extra_arg='-w',
312                                    opts_startswith=["-W"])
313    new_args = self.try_remove_args(new_args,
314                                    msg="Replaced optimization level with -O0",
315                                    extra_arg="-O0",
316                                    opts_startswith=["-O"])
317
318    # Try to remove compilation steps
319    new_args = self.try_remove_args(new_args, msg="Added -emit-llvm",
320                                    extra_arg="-emit-llvm")
321    new_args = self.try_remove_args(new_args, msg="Added -fsyntax-only",
322                                    extra_arg="-fsyntax-only")
323
324    # Try to make implicit int an error for more sensible test output
325    new_args = self.try_remove_args(new_args, msg="Added -Werror=implicit-int",
326                                    opts_equal=["-w"],
327                                    extra_arg="-Werror=implicit-int")
328
329    self.clang_args = new_args
330    verbose_print("Simplified command:", quote_cmd(self.get_crash_cmd()))
331
332  def reduce_clang_args(self):
333    """Minimize the clang arguments after running C-Reduce, to get the smallest
334    command that reproduces the crash on the reduced file.
335    """
336    print("\nReducing the clang crash command...")
337
338    new_args = self.clang_args
339
340    # Remove some often occurring args
341    new_args = self.try_remove_args(new_args, msg="Removed -D options",
342                                    opts_startswith=["-D"])
343    new_args = self.try_remove_args(new_args, msg="Removed -D options",
344                                    opts_one_arg_startswith=["-D"])
345    new_args = self.try_remove_args(new_args, msg="Removed -I options",
346                                    opts_startswith=["-I"])
347    new_args = self.try_remove_args(new_args, msg="Removed -I options",
348                                    opts_one_arg_startswith=["-I"])
349    new_args = self.try_remove_args(new_args, msg="Removed -W options",
350                                    opts_startswith=["-W"])
351
352    # Remove other cases that aren't covered by the heuristic
353    new_args = self.try_remove_args(new_args, msg="Removed -mllvm",
354                                    opts_one_arg_startswith=["-mllvm"])
355
356    i = 0
357    while i < len(new_args):
358      new_args, i = self.try_remove_arg_by_index(new_args, i)
359
360    self.clang_args = new_args
361
362    reduced_cmd = quote_cmd(self.get_crash_cmd())
363    write_to_script(reduced_cmd, self.crash_script)
364    print("Reduced command:", reduced_cmd)
365
366  def run_creduce(self):
367    print("\nRunning C-Reduce...")
368    try:
369      p = subprocess.Popen([creduce_cmd] + self.creduce_flags +
370                           [self.testfile, self.file_to_reduce])
371      p.communicate()
372    except KeyboardInterrupt:
373      # Hack to kill C-Reduce because it jumps into its own pgid
374      print('\n\nctrl-c detected, killed creduce')
375      p.kill()
376
377def main():
378  global verbose
379  global creduce_cmd
380  global clang_cmd
381
382  parser = ArgumentParser(description=__doc__,
383                          formatter_class=RawTextHelpFormatter)
384  parser.add_argument('crash_script', type=str, nargs=1,
385                      help="Name of the script that generates the crash.")
386  parser.add_argument('file_to_reduce', type=str, nargs=1,
387                      help="Name of the file to be reduced.")
388  parser.add_argument('--llvm-bin', dest='llvm_bin', type=str,
389                      help="Path to the LLVM bin directory.")
390  parser.add_argument('--clang', dest='clang', type=str,
391                      help="The path to the `clang` executable. "
392                      "By default uses the llvm-bin directory.")
393  parser.add_argument('--creduce', dest='creduce', type=str,
394                      help="The path to the `creduce` executable. "
395                      "Required if `creduce` is not in PATH environment.")
396  parser.add_argument('-v', '--verbose', action='store_true')
397  args = parser.parse_args()
398
399  verbose = args.verbose
400  llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
401  creduce_cmd = check_cmd('creduce', None, args.creduce)
402  clang_cmd = check_cmd('clang', llvm_bin, args.clang)
403
404  crash_script = check_file(args.crash_script[0])
405  file_to_reduce = check_file(args.file_to_reduce[0])
406
407  r = Reduce(crash_script, file_to_reduce)
408
409  r.simplify_clang_args()
410  r.write_interestingness_test()
411  r.clang_preprocess()
412  r.run_creduce()
413  r.reduce_clang_args()
414
415if __name__ == '__main__':
416  main()
417