1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Uses bisection to determine which commit a bug was introduced and fixed. 15This module takes a high and a low commit SHA, a repo name, and a bug. 16The module bisects the high and low commit SHA searching for the location 17where the bug was introduced. It also looks for where the bug was fixed. 18This is done with the following steps: 19 20 21 NOTE: Needs to be run from root of the OSS-Fuzz source checkout. 22 23 Typical usage example: 24 python3 infra/bisector.py 25 --old_commit 1e403e9259a1abedf108ab86f711ba52c907226d 26 --new_commit f79be4f2330f4b89ea2f42e1c44ca998c59a0c0f 27 --fuzz_target rules_fuzzer 28 --project_name yara 29 --testcase infra/yara_testcase 30 --sanitizer address 31""" 32 33import argparse 34import collections 35import logging 36import os 37import sys 38import tempfile 39 40import build_specified_commit 41import helper 42import repo_manager 43import utils 44 45Result = collections.namedtuple('Result', ['repo_url', 'commit']) 46 47START_MARKERS = [ 48 '==ERROR', 49 '==WARNING', 50] 51 52END_MARKERS = [ 53 'SUMMARY:', 54] 55 56DEDUP_TOKEN_MARKER = 'DEDUP_TOKEN:' 57 58 59class BisectError(Exception): 60 """Bisection error.""" 61 62 def __init__(self, message, repo_url): 63 super().__init__(message) 64 self.repo_url = repo_url 65 66 67def main(): 68 """Finds the commit SHA where an error was initally introduced.""" 69 logging.getLogger().setLevel(logging.INFO) 70 utils.chdir_to_root() 71 parser = argparse.ArgumentParser( 72 description='git bisection for finding introduction of bugs') 73 74 parser.add_argument('--project_name', 75 help='The name of the project where the bug occurred.', 76 required=True) 77 parser.add_argument('--new_commit', 78 help='The newest commit SHA to be bisected.', 79 required=True) 80 parser.add_argument('--old_commit', 81 help='The oldest commit SHA to be bisected.', 82 required=True) 83 parser.add_argument('--fuzz_target', 84 help='The name of the fuzzer to be built.', 85 required=True) 86 parser.add_argument('--test_case_path', 87 help='The path to test case.', 88 required=True) 89 parser.add_argument('--engine', 90 help='The default is "libfuzzer".', 91 default='libfuzzer') 92 parser.add_argument('--sanitizer', 93 default='address', 94 help='The default is "address".') 95 parser.add_argument('--type', 96 choices=['regressed', 'fixed'], 97 help='The bisection type.', 98 required=True) 99 parser.add_argument('--architecture', default='x86_64') 100 args = parser.parse_args() 101 102 build_data = build_specified_commit.BuildData(project_name=args.project_name, 103 engine=args.engine, 104 sanitizer=args.sanitizer, 105 architecture=args.architecture) 106 107 result = bisect(args.type, args.old_commit, args.new_commit, 108 args.test_case_path, args.fuzz_target, build_data) 109 if not result.commit: 110 logging.error('No error was found in commit range %s:%s', args.old_commit, 111 args.new_commit) 112 return 1 113 if result.commit == args.old_commit: 114 logging.error( 115 'Bisection Error: Both the first and the last commits in' 116 'the given range have the same behavior, bisection is not possible. ') 117 return 1 118 print('Error was introduced at commit %s' % result.commit) 119 return 0 120 121 122def _get_dedup_token(output): 123 """Get dedup token.""" 124 for line in output.splitlines(): 125 token_location = line.find(DEDUP_TOKEN_MARKER) 126 if token_location == -1: 127 continue 128 129 return line[token_location + len(DEDUP_TOKEN_MARKER):].strip() 130 131 return None 132 133 134def _check_for_crash(project_name, fuzz_target, test_case_path): 135 """Check for crash.""" 136 137 def docker_run(args): 138 command = ['docker', 'run', '--rm', '--privileged'] 139 if sys.stdin.isatty(): 140 command.append('-i') 141 142 return utils.execute(command + args) 143 144 logging.info('Checking for crash') 145 out, err, return_code = helper.reproduce_impl(project_name, 146 fuzz_target, 147 False, [], [], 148 test_case_path, 149 runner=docker_run, 150 err_result=(None, None, None)) 151 if return_code is None: 152 return None 153 154 logging.info('stdout =\n%s', out) 155 logging.info('stderr =\n%s', err) 156 157 # pylint: disable=unsupported-membership-test 158 has_start_marker = any( 159 marker in out or marker in err for marker in START_MARKERS) 160 has_end_marker = any(marker in out or marker in err for marker in END_MARKERS) 161 if not has_start_marker or not has_end_marker: 162 return None 163 164 return _get_dedup_token(out + err) 165 166 167# pylint: disable=too-many-locals 168# pylint: disable=too-many-arguments 169# pylint: disable=too-many-statements 170def _bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target, 171 build_data): 172 """Perform the bisect.""" 173 # pylint: disable=too-many-branches 174 base_builder_repo = build_specified_commit.load_base_builder_repo() 175 176 with tempfile.TemporaryDirectory() as tmp_dir: 177 repo_url, repo_path = build_specified_commit.detect_main_repo( 178 build_data.project_name, commit=new_commit) 179 if not repo_url or not repo_path: 180 raise ValueError('Main git repo can not be determined.') 181 182 if old_commit == new_commit: 183 raise BisectError('old_commit is the same as new_commit', repo_url) 184 185 # Copy /src from the built Docker container to ensure all dependencies 186 # exist. This will be mounted when running them. 187 host_src_dir = build_specified_commit.copy_src_from_docker( 188 build_data.project_name, tmp_dir) 189 190 bisect_repo_manager = repo_manager.RepoManager( 191 os.path.join(host_src_dir, os.path.basename(repo_path))) 192 bisect_repo_manager.fetch_all_remotes() 193 194 commit_list = bisect_repo_manager.get_commit_list(new_commit, old_commit) 195 196 old_idx = len(commit_list) - 1 197 new_idx = 0 198 logging.info('Testing against new_commit (%s)', commit_list[new_idx]) 199 if not build_specified_commit.build_fuzzers_from_commit( 200 commit_list[new_idx], 201 bisect_repo_manager, 202 host_src_dir, 203 build_data, 204 base_builder_repo=base_builder_repo): 205 raise BisectError('Failed to build new_commit', repo_url) 206 207 if bisect_type == 'fixed': 208 should_crash = False 209 elif bisect_type == 'regressed': 210 should_crash = True 211 else: 212 raise BisectError('Invalid bisect type ' + bisect_type, repo_url) 213 214 expected_error = _check_for_crash(build_data.project_name, fuzz_target, 215 test_case_path) 216 logging.info('new_commit result = %s', expected_error) 217 218 if not should_crash and expected_error: 219 logging.warning('new_commit crashed but not shouldn\'t. ' 220 'Continuing to see if stack changes.') 221 222 range_valid = False 223 for _ in range(2): 224 logging.info('Testing against old_commit (%s)', commit_list[old_idx]) 225 if not build_specified_commit.build_fuzzers_from_commit( 226 commit_list[old_idx], 227 bisect_repo_manager, 228 host_src_dir, 229 build_data, 230 base_builder_repo=base_builder_repo): 231 raise BisectError('Failed to build old_commit', repo_url) 232 233 if _check_for_crash(build_data.project_name, fuzz_target, 234 test_case_path) == expected_error: 235 logging.warning('old_commit %s had same result as new_commit %s', 236 old_commit, new_commit) 237 # Try again on an slightly older commit. 238 old_commit = bisect_repo_manager.get_parent(old_commit, 64) 239 if not old_commit: 240 break 241 242 commit_list = bisect_repo_manager.get_commit_list( 243 new_commit, old_commit) 244 old_idx = len(commit_list) - 1 245 continue 246 247 range_valid = True 248 break 249 250 if not range_valid: 251 raise BisectError('old_commit had same result as new_commit', repo_url) 252 253 while old_idx - new_idx > 1: 254 curr_idx = (old_idx + new_idx) // 2 255 logging.info('Testing against %s (idx=%d)', commit_list[curr_idx], 256 curr_idx) 257 if not build_specified_commit.build_fuzzers_from_commit( 258 commit_list[curr_idx], 259 bisect_repo_manager, 260 host_src_dir, 261 build_data, 262 base_builder_repo=base_builder_repo): 263 # Treat build failures as if we couldn't repo. 264 # TODO(ochang): retry nearby commits? 265 old_idx = curr_idx 266 continue 267 268 current_error = _check_for_crash(build_data.project_name, fuzz_target, 269 test_case_path) 270 logging.info('Current result = %s', current_error) 271 if expected_error == current_error: 272 new_idx = curr_idx 273 else: 274 old_idx = curr_idx 275 return Result(repo_url, commit_list[new_idx]) 276 277 278# pylint: disable=too-many-locals 279# pylint: disable=too-many-arguments 280def bisect(bisect_type, old_commit, new_commit, test_case_path, fuzz_target, 281 build_data): 282 """From a commit range, this function caluclates which introduced a 283 specific error from a fuzz test_case_path. 284 285 Args: 286 bisect_type: The type of the bisect ('regressed' or 'fixed'). 287 old_commit: The oldest commit in the error regression range. 288 new_commit: The newest commit in the error regression range. 289 test_case_path: The file path of the test case that triggers the error 290 fuzz_target: The name of the fuzzer to be tested. 291 build_data: a class holding all of the input parameters for bisection. 292 293 Returns: 294 The commit SHA that introduced the error or None. 295 296 Raises: 297 ValueError: when a repo url can't be determine from the project. 298 """ 299 try: 300 return _bisect(bisect_type, old_commit, new_commit, test_case_path, 301 fuzz_target, build_data) 302 finally: 303 # Clean up projects/ as _bisect may have modified it. 304 oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR) 305 oss_fuzz_repo_manager.git(['reset', 'projects']) 306 oss_fuzz_repo_manager.git(['checkout', 'projects']) 307 oss_fuzz_repo_manager.git(['clean', '-fxd', 'projects']) 308 309 310if __name__ == '__main__': 311 main() 312