1#!/usr/bin/env python3
2# Copyright 2019 Google Inc.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16################################################################################
17"""Use git bisect to find the Clang/LLVM commit causing a regression."""
18
19import logging
20import os
21import re
22import shutil
23import subprocess
24import sys
25
26
27def execute(command, *args, expect_zero=True, **kwargs):
28  """Execute |command| and return the returncode, stdout and stderr."""
29  kwargs['stdout'] = subprocess.PIPE
30  kwargs['stderr'] = subprocess.PIPE
31  logging.debug('Running command: "%s"', str(command))
32  process = subprocess.Popen(command, *args, **kwargs)
33  stdout, stderr = process.communicate()
34  stdout = stdout.decode('utf-8')
35  stderr = stderr.decode('utf-8')
36  retcode = process.returncode
37  logging.info('Command: "%s" returned: %d.\nStdout: %s.\nStderr: %s',
38               str(command), retcode, stdout, stderr)
39  if expect_zero and retcode != 0:
40    raise subprocess.CalledProcessError(retcode, command)
41  return retcode, stdout, stderr
42
43
44def search_bisect_output(output):
45  """Search |output| for a message indicating the culprit commit has been
46  found."""
47  # TODO(metzman): Is it necessary to look for "good"?
48  culprit_regex = re.compile('([a-z0-9]{40}) is the first (good|bad) commit')
49  match = re.match(culprit_regex, output)
50  return match.group(1) if match is not None else None
51
52
53class GitRepo:
54  """Class for executing commmands on a git repo."""
55
56  def __init__(self, repo_dir):
57    self.repo_dir = repo_dir
58
59  def do_command(self, git_subcommand):
60    """Execute a |git_subcommand| (a list of strings)."""
61    command = ['git', '-C', self.repo_dir] + git_subcommand
62    return execute(command)
63
64  def test_commit(self, test_command):
65    """Build LLVM at the currently checkedout commit, then run |test_command|.
66    If returncode is 0 run 'git bisect good' otherwise return 'git bisect bad'.
67    Return None if bisect didn't finish yet. Return the culprit commit if it
68    does."""
69    build_clang(self.repo_dir)
70    retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
71    if retcode == 0:
72      retcode, stdout, _ = self.do_bisect_command('good')
73    else:
74      retcode, stdout, _ = self.do_bisect_command('bad')
75    return search_bisect_output(stdout)
76
77  def bisect(self, good_commit, bad_commit, test_command):
78    """Do git bisect assuming |good_commit| is good, |bad_commit| is bad and
79    |test_command| is an oracle. Return the culprit commit."""
80    self.bisect_start(good_commit, bad_commit, test_command)
81    result = self.test_commit(test_command)
82    while result is None:
83      result = self.test_commit(test_command)
84    return result
85
86  def bisect_start(self, good_commit, bad_commit, test_command):
87    """Start doing git bisect."""
88    self.do_bisect_command('start')
89    # Do bad commit first since it is more likely to be recent.
90    self.test_start_commit(bad_commit, 'bad', test_command)
91    self.test_start_commit(good_commit, 'good', test_command)
92
93  def do_bisect_command(self, subcommand):
94    """Execute a git bisect |subcommand| (string) and return the result."""
95    return self.do_command(['bisect', subcommand])
96
97  def test_start_commit(self, commit, label, test_command):
98    """Use |test_command| to test the first good or bad |commit| (depending on
99    |label|)."""
100    assert label in ('good', 'bad'), label
101    self.do_command(['checkout', commit])
102    build_clang(self.repo_dir)
103    retcode, _, _ = execute(test_command, shell=True, expect_zero=False)
104    if label == 'good' and retcode != 0:
105      raise BisectError('Test command "%s" returns %d on first good commit %s' %
106                        (test_command, retcode, commit))
107    if label == 'bad' and retcode == 0:
108      raise BisectError('Test command "%s" returns %d on first bad commit %s' %
109                        (test_command, retcode, commit))
110
111    self.do_bisect_command(label)
112
113
114class BisectError(Exception):
115  """Error that was encountered during bisection."""
116
117
118def get_clang_build_env():
119  """Get an environment for building Clang."""
120  env = os.environ.copy()
121  for variable in ['CXXFLAGS', 'CFLAGS']:
122    if variable in env:
123      del env[variable]
124  return env
125
126
127def install_clang_build_deps():
128  """Instal dependencies necessary to build clang."""
129  execute([
130      'apt-get', 'install', '-y', 'build-essential', 'make', 'cmake',
131      'ninja-build', 'git', 'subversion', 'g++-multilib'
132  ])
133
134
135def clone_with_retries(repo, local_path, num_retries=10):
136  """Clone |repo| to |local_path| if it doesn't exist already. Try up to
137  |num_retries| times. Return False if unable to checkout."""
138  if os.path.isdir(local_path):
139    return
140  for _ in range(num_retries):
141    if os.path.isdir(local_path):
142      shutil.rmtree(local_path)
143    retcode, _, _ = execute(['git', 'clone', repo, local_path],
144                            expect_zero=False)
145    if retcode == 0:
146      return
147  raise Exception('Could not checkout %s.' % repo)
148
149
150def get_clang_target_arch():
151  """Get target architecture we want clang to target when we build it."""
152  _, arch, _ = execute(['uname', '-m'])
153  if 'x86_64' in arch:
154    return 'X86'
155  if 'aarch64' in arch:
156    return 'AArch64'
157  raise Exception('Unsupported target: %s.' % arch)
158
159
160def prepare_build(llvm_project_path):
161  """Prepare to build clang."""
162  llvm_build_dir = os.path.join(os.getenv('WORK'), 'llvm-build')
163  if not os.path.exists(llvm_build_dir):
164    os.mkdir(llvm_build_dir)
165  execute([
166      'cmake', '-G', 'Ninja', '-DLIBCXX_ENABLE_SHARED=OFF',
167      '-DLIBCXX_ENABLE_STATIC_ABI_LIBRARY=ON', '-DLIBCXXABI_ENABLE_SHARED=OFF',
168      '-DCMAKE_BUILD_TYPE=Release',
169      '-DLLVM_ENABLE_PROJECTS=libcxx;libcxxabi;compiler-rt;clang',
170      '-DLLVM_TARGETS_TO_BUILD=' + get_clang_target_arch(),
171      os.path.join(llvm_project_path, 'llvm')
172  ],
173          env=get_clang_build_env(),
174          cwd=llvm_build_dir)
175  return llvm_build_dir
176
177
178def build_clang(llvm_project_path):
179  """Checkout, build and install Clang."""
180  # TODO(metzman): Merge Python checkout and build code with
181  # checkout_build_install_llvm.sh.
182  # TODO(metzman): Look into speeding this process using ccache.
183  # TODO(metzman): Make this program capable of handling MSAN and i386 Clang
184  # regressions.
185  llvm_build_dir = prepare_build(llvm_project_path)
186  execute(['ninja', '-C', llvm_build_dir, 'install'], env=get_clang_build_env())
187
188
189def find_culprit_commit(test_command, good_commit, bad_commit):
190  """Returns the culprit LLVM commit that introduced a bug revealed by running
191  |test_command|. Uses git bisect and treats |good_commit| as the first latest
192   known good commit and |bad_commit| as the first known bad commit."""
193  llvm_project_path = os.path.join(os.getenv('SRC'), 'llvm-project')
194  clone_with_retries('https://github.com/llvm/llvm-project.git',
195                     llvm_project_path)
196  git_repo = GitRepo(llvm_project_path)
197  result = git_repo.bisect(good_commit, bad_commit, test_command)
198  print('Culprit commit', result)
199  return result
200
201
202def main():
203  # pylint: disable=line-too-long
204  """Finds the culprit LLVM commit that introduced a clang regression.
205  Can be tested using this command in a libsodium shell:
206  python3 bisect_clang.py "cd /src/libsodium; make clean; cd -; compile && /out/secret_key_auth_fuzzer -runs=100" \
207                          f7e52fbdb5a7af8ea0808e98458b497125a5eca1 \
208                          8288453f6aac05080b751b680455349e09d49825
209  """
210  # pylint: enable=line-too-long
211  # TODO(metzman): Check CFLAGS for things like -fsanitize=fuzzer-no-link.
212  # TODO(metzman): Allow test_command to be optional and for just build.sh to be
213  # used instead.
214  test_command = sys.argv[1]
215  # TODO(metzman): Add in more automation so that the script can automatically
216  # determine the commits used in last Clang roll.
217  good_commit = sys.argv[2]
218  bad_commit = sys.argv[3]
219  # TODO(metzman): Make verbosity configurable.
220  logging.getLogger().setLevel(logging.DEBUG)
221  install_clang_build_deps()
222  find_culprit_commit(test_command, good_commit, bad_commit)
223  return 0
224
225
226if __name__ == '__main__':
227  sys.exit(main())
228