• Home
  • History
  • Annotate
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Uses bisection to determine which commit a bug was introduced and fixed.
15This module takes a high and a low commit SHA, a repo name, and a bug.
16The module bisects the high and low commit SHA searching for the location
17where the bug was introduced. It also looks for where the bug was fixed.
18This is done with the following steps:
19
20
21  NOTE: Needs to be run from root of the OSS-Fuzz source checkout.
22
23  Typical usage example:
24        python3 infra/bisector.py
25          --old_commit 1e403e9259a1abedf108ab86f711ba52c907226d
26          --new_commit f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f
27          --fuzz_target rules_fuzzer
28          --project_name yara
29          --testcase infra/yara_testcase
30          --sanitizer address
31"""
32
33import argparse
34import collections
35import logging
36import os
37import sys
38import tempfile
39
40import build_specified_commit
41import helper
42import repo_manager
43import utils
44
45Result = collections.namedtuple('Result', ['repo_url', 'commit'])
46
47START_MARKERS = [
48    '==ERROR',
49    '==WARNING',
50]
51
52END_MARKERS = [
53    'SUMMARY:',
54]
55
56DEDUP_TOKEN_MARKER = 'DEDUP_TOKEN:'
57
58
59class BisectError(Exception):
60  """Bisection error."""
61
62  def __init__(self, message, repo_url):
63    super().__init__(message)
64    self.repo_url = repo_url
65
66
67def main():
68  """Finds the commit SHA where an error was initally introduced."""
69  logging.getLogger().setLevel(logging.INFO)
70  utils.chdir_to_root()
71  parser = argparse.ArgumentParser(
72      description='git bisection for finding introduction of bugs')
73
74  parser.add_argument('--project_name',
75                      help='The name of the project where the bug occurred.',
76                      required=True)
77  parser.add_argument('--new_commit',
78                      help='The newest commit SHA to be bisected.',
79                      required=True)
80  parser.add_argument('--old_commit',
81                      help='The oldest commit SHA to be bisected.',
82                      required=True)
83  parser.add_argument('--fuzz_target',
84                      help='The name of the fuzzer to be built.',
85                      required=True)
86  parser.add_argument('--test_case_path',
87                      help='The path to test case.',
88                      required=True)
89  parser.add_argument('--engine',
90                      help='The default is "libfuzzer".',
91                      default='libfuzzer')
92  parser.add_argument('--sanitizer',
93                      default='address',
94                      help='The default is "address".')
95  parser.add_argument('--type',
96                      choices=['regressed', 'fixed'],
97                      help='The bisection type.',
98                      required=True)
99  parser.add_argument('--architecture', default='x86_64')
100  args = parser.parse_args()
101
102  build_data = build_specified_commit.BuildData(project_name=args.project_name,
103                                                engine=args.engine,
104                                                sanitizer=args.sanitizer,
105                                                architecture=args.architecture)
106
107  result = bisect(args.type, args.old_commit, args.new_commit,
108                  args.test_case_path, args.fuzz_target, build_data)
109  if not result.commit:
110    logging.error('No error was found in commit range %s:%s', args.old_commit,
111                  args.new_commit)
112    return 1
113  if result.commit == args.old_commit:
114    logging.error(
115        'Bisection Error: Both the first and the last commits in'
116        'the given range have the same behavior, bisection is not possible. ')
117    return 1
118  print('Error was introduced at commit %s' % result.commit)
119  return 0
120
121
122def _get_dedup_token(output):
123  """Get dedup token."""
124  for line in output.splitlines():
125    token_location = line.find(DEDUP_TOKEN_MARKER)
126    if token_location == -1:
127      continue
128
129    return line[token_location + len(DEDUP_TOKEN_MARKER):].strip()
130
131  return None
132
133
134def _check_for_crash(project_name, fuzz_target, test_case_path):
135  """Check for crash."""
136
137  def docker_run(args):
138    command = ['docker', 'run', '--rm', '--privileged']
139    if sys.stdin.isatty():
140      command.append('-i')
141
142    return utils.execute(command + args)
143
144  logging.info('Checking for crash')
145  out, err, return_code = helper.reproduce_impl(project_name,
146                                                fuzz_target,
147                                                False, [], [],
148                                                test_case_path,
149                                                runner=docker_run,
150                                                err_result=(None, None, None))
151  if return_code is None:
152    return None
153
154  logging.info('stdout =\n%s', out)
155  logging.info('stderr =\n%s', err)
156
157  # pylint: disable=unsupported-membership-test
158  has_start_marker = any(
159      marker in out or marker in err for marker in START_MARKERS)
160  has_end_marker = any(marker in out or marker in err for marker in END_MARKERS)
161  if not has_start_marker or not has_end_marker:
162    return None
163
164  return _get_dedup_token(out + err)
165
166
167# pylint: disable=too-many-locals
168# pylint: disable=too-many-arguments
169# pylint: disable=too-many-statements
170def _bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target,
171            build_data):
172  """Perform the bisect."""
173  # pylint: disable=too-many-branches
174  base_builder_repo = build_specified_commit.load_base_builder_repo()
175
176  with tempfile.TemporaryDirectory() as tmp_dir:
177    repo_url, repo_path = build_specified_commit.detect_main_repo(
178        build_data.project_name, commit=new_commit)
179    if not repo_url or not repo_path:
180      raise ValueError('Main git repo can not be determined.')
181
182    if old_commit == new_commit:
183      raise BisectError('old_commit is the same as new_commit', repo_url)
184
185    # Copy /src from the built Docker container to ensure all dependencies
186    # exist. This will be mounted when running them.
187    host_src_dir = build_specified_commit.copy_src_from_docker(
188        build_data.project_name, tmp_dir)
189
190    bisect_repo_manager = repo_manager.RepoManager(
191        os.path.join(host_src_dir, os.path.basename(repo_path)))
192    bisect_repo_manager.fetch_all_remotes()
193
194    commit_list = bisect_repo_manager.get_commit_list(new_commit, old_commit)
195
196    old_idx = len(commit_list) - 1
197    new_idx = 0
198    logging.info('Testing against new_commit (%s)', commit_list[new_idx])
199    if not build_specified_commit.build_fuzzers_from_commit(
200        commit_list[new_idx],
201        bisect_repo_manager,
202        host_src_dir,
203        build_data,
204        base_builder_repo=base_builder_repo):
205      raise BisectError('Failed to build new_commit', repo_url)
206
207    if bisect_type == 'fixed':
208      should_crash = False
209    elif bisect_type == 'regressed':
210      should_crash = True
211    else:
212      raise BisectError('Invalid bisect type ' + bisect_type, repo_url)
213
214    expected_error = _check_for_crash(build_data.project_name, fuzz_target,
215                                      test_case_path)
216    logging.info('new_commit result = %s', expected_error)
217
218    if not should_crash and expected_error:
219      logging.warning('new_commit crashed but not shouldn\'t. '
220                      'Continuing to see if stack changes.')
221
222    range_valid = False
223    for _ in range(2):
224      logging.info('Testing against old_commit (%s)', commit_list[old_idx])
225      if not build_specified_commit.build_fuzzers_from_commit(
226          commit_list[old_idx],
227          bisect_repo_manager,
228          host_src_dir,
229          build_data,
230          base_builder_repo=base_builder_repo):
231        raise BisectError('Failed to build old_commit', repo_url)
232
233      if _check_for_crash(build_data.project_name, fuzz_target,
234                          test_case_path) == expected_error:
235        logging.warning('old_commit %s had same result as new_commit %s',
236                        old_commit, new_commit)
237        # Try again on an slightly older commit.
238        old_commit = bisect_repo_manager.get_parent(old_commit, 64)
239        if not old_commit:
240          break
241
242        commit_list = bisect_repo_manager.get_commit_list(
243            new_commit, old_commit)
244        old_idx = len(commit_list) - 1
245        continue
246
247      range_valid = True
248      break
249
250    if not range_valid:
251      raise BisectError('old_commit had same result as new_commit', repo_url)
252
253    while old_idx - new_idx > 1:
254      curr_idx = (old_idx + new_idx) // 2
255      logging.info('Testing against %s (idx=%d)', commit_list[curr_idx],
256                   curr_idx)
257      if not build_specified_commit.build_fuzzers_from_commit(
258          commit_list[curr_idx],
259          bisect_repo_manager,
260          host_src_dir,
261          build_data,
262          base_builder_repo=base_builder_repo):
263        # Treat build failures as if we couldn't repo.
264        # TODO(ochang): retry nearby commits?
265        old_idx = curr_idx
266        continue
267
268      current_error = _check_for_crash(build_data.project_name, fuzz_target,
269                                       test_case_path)
270      logging.info('Current result = %s', current_error)
271      if expected_error == current_error:
272        new_idx = curr_idx
273      else:
274        old_idx = curr_idx
275    return Result(repo_url, commit_list[new_idx])
276
277
278# pylint: disable=too-many-locals
279# pylint: disable=too-many-arguments
280def bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target,
281           build_data):
282  """From a commit range, this function caluclates which introduced a
283  specific error from a fuzz test_case_path.
284
285  Args:
286    bisect_type: The type of the bisect ('regressed' or 'fixed').
287    old_commit: The oldest commit in the error regression range.
288    new_commit: The newest commit in the error regression range.
289    test_case_path: The file path of the test case that triggers the error
290    fuzz_target: The name of the fuzzer to be tested.
291    build_data: a class holding all of the input parameters for bisection.
292
293  Returns:
294    The commit SHA that introduced the error or None.
295
296  Raises:
297    ValueError: when a repo url can't be determine from the project.
298  """
299  try:
300    return _bisect(bisect_type, old_commit, new_commit, test_case_path,
301                   fuzz_target, build_data)
302  finally:
303    # Clean up projects/ as _bisect may have modified it.
304    oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR)
305    oss_fuzz_repo_manager.git(['reset', 'projects'])
306    oss_fuzz_repo_manager.git(['checkout', 'projects'])
307    oss_fuzz_repo_manager.git(['clean', '-fxd', 'projects'])
308
309
310if __name__ == '__main__':
311  main()
312