1# Copyright 2019 Google LLC 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14"""Module to build a image from a specific commit, branch or pull request 15 16This module is allows each of the OSS Fuzz projects fuzzers to be built 17from a specific point in time. This feature can be used for implementations 18like continuious integration fuzzing and bisection to find errors 19""" 20import argparse 21import bisect 22import datetime 23from distutils import spawn 24import os 25import collections 26import json 27import logging 28import re 29import shutil 30import tempfile 31 32import helper 33import repo_manager 34import retry 35import utils 36 37BuildData = collections.namedtuple( 38 'BuildData', ['project_name', 'engine', 'sanitizer', 'architecture']) 39 40_GIT_DIR_MARKER = 'gitdir: ' 41_IMAGE_BUILD_TRIES = 3 42 43 44class BaseBuilderRepo: 45 """Repo of base-builder images.""" 46 47 def __init__(self): 48 self.timestamps = [] 49 self.digests = [] 50 51 def add_digest(self, timestamp, digest): 52 """Add a digest.""" 53 self.timestamps.append(timestamp) 54 self.digests.append(digest) 55 56 def find_digest(self, timestamp): 57 """Find the latest image before the given timestamp.""" 58 index = bisect.bisect_right(self.timestamps, timestamp) 59 if index > 0: 60 return self.digests[index - 1] 61 62 logging.error('Failed to find suitable base-builder.') 63 return None 64 65 66def _replace_gitdir(src_dir, file_path): 67 """Replace gitdir with a relative path.""" 68 with open(file_path) as handle: 69 lines = handle.readlines() 70 71 new_lines = [] 72 for line in lines: 73 if line.startswith(_GIT_DIR_MARKER): 74 absolute_path = line[len(_GIT_DIR_MARKER):].strip() 75 if not os.path.isabs(absolute_path): 76 # Already relative. 77 return 78 79 current_dir = os.path.dirname(file_path) 80 # Rebase to /src rather than the host src dir. 81 base_dir = current_dir.replace(src_dir, '/src') 82 relative_path = os.path.relpath(absolute_path, base_dir) 83 logging.info('Replacing absolute submodule gitdir from %s to %s', 84 absolute_path, relative_path) 85 86 line = _GIT_DIR_MARKER + relative_path 87 88 new_lines.append(line) 89 90 with open(file_path, 'w') as handle: 91 handle.write(''.join(new_lines)) 92 93 94def _make_gitdirs_relative(src_dir): 95 """Make gitdirs relative.""" 96 for root_dir, _, files in os.walk(src_dir): 97 for filename in files: 98 if filename != '.git': 99 continue 100 101 file_path = os.path.join(root_dir, filename) 102 _replace_gitdir(src_dir, file_path) 103 104 105def _replace_base_builder_digest(dockerfile_path, digest): 106 """Replace the base-builder digest in a Dockerfile.""" 107 with open(dockerfile_path) as handle: 108 lines = handle.readlines() 109 110 new_lines = [] 111 for line in lines: 112 if line.strip().startswith('FROM'): 113 line = 'FROM gcr.io/oss-fuzz-base/base-builder@' + digest + '\n' 114 115 new_lines.append(line) 116 117 with open(dockerfile_path, 'w') as handle: 118 handle.write(''.join(new_lines)) 119 120 121def copy_src_from_docker(project_name, host_dir): 122 """Copy /src from docker to the host.""" 123 # Copy /src to host. 124 image_name = 'gcr.io/oss-fuzz/' + project_name 125 src_dir = os.path.join(host_dir, 'src') 126 if os.path.exists(src_dir): 127 shutil.rmtree(src_dir, ignore_errors=True) 128 129 docker_args = [ 130 '-v', 131 host_dir + ':/out', 132 image_name, 133 'cp', 134 '-r', 135 '-p', 136 '/src', 137 '/out', 138 ] 139 helper.docker_run(docker_args) 140 141 # Submodules can have gitdir entries which point to absolute paths. Make them 142 # relative, as otherwise we can't do operations on the checkout on the host. 143 _make_gitdirs_relative(src_dir) 144 return src_dir 145 146 147@retry.wrap(_IMAGE_BUILD_TRIES, 2) 148def _build_image_with_retries(project_name): 149 """Build image with retries.""" 150 return helper.build_image_impl(project_name) 151 152 153def get_required_post_checkout_steps(dockerfile_path): 154 """Get required post checkout steps (best effort).""" 155 156 checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)') 157 158 # If the build.sh is copied from upstream, we need to copy it again after 159 # changing the revision to ensure correct building. 160 post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)') 161 162 with open(dockerfile_path) as handle: 163 lines = handle.readlines() 164 165 subsequent_run_cmds = [] 166 for i, line in enumerate(lines): 167 if checkout_pattern.match(line): 168 subsequent_run_cmds = [] 169 continue 170 171 match = post_run_pattern.match(line) 172 if match: 173 workdir = helper.workdir_from_lines(lines[:i]) 174 command = match.group(1) 175 subsequent_run_cmds.append((workdir, command)) 176 177 return subsequent_run_cmds 178 179 180# pylint: disable=too-many-locals 181def build_fuzzers_from_commit(commit, 182 build_repo_manager, 183 host_src_path, 184 build_data, 185 base_builder_repo=None): 186 """Builds a OSS-Fuzz fuzzer at a specific commit SHA. 187 188 Args: 189 commit: The commit SHA to build the fuzzers at. 190 build_repo_manager: The OSS-Fuzz project's repo manager to be built at. 191 build_data: A struct containing project build information. 192 base_builder_repo: A BaseBuilderRepo. 193 Returns: 194 0 on successful build or error code on failure. 195 """ 196 oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR) 197 num_retry = 1 198 199 def cleanup(): 200 # Re-copy /src for a clean checkout every time. 201 copy_src_from_docker(build_data.project_name, 202 os.path.dirname(host_src_path)) 203 build_repo_manager.fetch_all_remotes() 204 205 projects_dir = os.path.join('projects', build_data.project_name) 206 dockerfile_path = os.path.join(projects_dir, 'Dockerfile') 207 208 for i in range(num_retry + 1): 209 build_repo_manager.checkout_commit(commit, clean=False) 210 211 post_checkout_steps = get_required_post_checkout_steps(dockerfile_path) 212 for workdir, post_checkout_step in post_checkout_steps: 213 logging.info('Running post-checkout step `%s` in %s.', post_checkout_step, 214 workdir) 215 helper.docker_run([ 216 '-w', 217 workdir, 218 '-v', 219 host_src_path + ':' + '/src', 220 'gcr.io/oss-fuzz/' + build_data.project_name, 221 '/bin/bash', 222 '-c', 223 post_checkout_step, 224 ]) 225 226 result = helper.build_fuzzers_impl(project_name=build_data.project_name, 227 clean=True, 228 engine=build_data.engine, 229 sanitizer=build_data.sanitizer, 230 architecture=build_data.architecture, 231 env_to_add=None, 232 source_path=host_src_path, 233 mount_location='/src') 234 if result == 0 or i == num_retry: 235 break 236 237 # Retry with an OSS-Fuzz builder container that's closer to the project 238 # commit date. 239 commit_date = build_repo_manager.commit_date(commit) 240 241 # Find first change in the projects/<PROJECT> directory before the project 242 # commit date. 243 oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git([ 244 'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H', 245 projects_dir 246 ], 247 check_result=True) 248 oss_fuzz_commit = oss_fuzz_commit.strip() 249 if not oss_fuzz_commit: 250 logging.info( 251 'Could not find first OSS-Fuzz commit prior to upstream commit. ' 252 'Falling back to oldest integration commit.') 253 254 # Find the oldest commit. 255 oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git( 256 ['log', '--reverse', '--format=%H', projects_dir], check_result=True) 257 258 oss_fuzz_commit = oss_fuzz_commit.splitlines()[0].strip() 259 260 if not oss_fuzz_commit: 261 logging.error('Failed to get oldest integration commit.') 262 break 263 264 logging.info('Build failed. Retrying on earlier OSS-Fuzz commit %s.', 265 oss_fuzz_commit) 266 267 # Check out projects/<PROJECT> dir to the commit that was found. 268 oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir], 269 check_result=True) 270 271 # Also use the closest base-builder we can find. 272 if base_builder_repo: 273 base_builder_digest = base_builder_repo.find_digest(commit_date) 274 if not base_builder_digest: 275 return False 276 277 logging.info('Using base-builder with digest %s.', base_builder_digest) 278 _replace_base_builder_digest(dockerfile_path, base_builder_digest) 279 280 # Rebuild image and re-copy src dir since things in /src could have changed. 281 if not _build_image_with_retries(build_data.project_name): 282 logging.error('Failed to rebuild image.') 283 return False 284 285 cleanup() 286 287 cleanup() 288 return result == 0 289 290 291def detect_main_repo(project_name, repo_name=None, commit=None): 292 """Checks a docker image for the main repo of an OSS-Fuzz project. 293 294 Note: The default is to use the repo name to detect the main repo. 295 296 Args: 297 project_name: The name of the oss-fuzz project. 298 repo_name: The name of the main repo in an OSS-Fuzz project. 299 commit: A commit SHA that is associated with the main repo. 300 src_dir: The location of the projects source on the docker image. 301 302 Returns: 303 The repo's origin, the repo's path. 304 """ 305 306 if not repo_name and not commit: 307 logging.error( 308 'Error: can not detect main repo without a repo_name or a commit.') 309 return None, None 310 if repo_name and commit: 311 logging.info( 312 'Both repo name and commit specific. Using repo name for detection.') 313 314 # Change to oss-fuzz main directory so helper.py runs correctly. 315 utils.chdir_to_root() 316 if not _build_image_with_retries(project_name): 317 logging.error('Error: building %s image failed.', project_name) 318 return None, None 319 docker_image_name = 'gcr.io/oss-fuzz/' + project_name 320 command_to_run = [ 321 'docker', 'run', '--rm', '-t', docker_image_name, 'python3', 322 os.path.join('/opt', 'cifuzz', 'detect_repo.py') 323 ] 324 if repo_name: 325 command_to_run.extend(['--repo_name', repo_name]) 326 else: 327 command_to_run.extend(['--example_commit', commit]) 328 out, _, _ = utils.execute(command_to_run) 329 match = re.search(r'\bDetected repo: ([^ ]+) ([^ ]+)', out.rstrip()) 330 if match and match.group(1) and match.group(2): 331 return match.group(1), match.group(2) 332 333 logging.error('Failed to detect repo:\n%s', out) 334 return None, None 335 336 337def load_base_builder_repo(): 338 """Get base-image digests.""" 339 gcloud_path = spawn.find_executable('gcloud') 340 if not gcloud_path: 341 logging.warning('gcloud not found in PATH.') 342 return None 343 344 result, _, _ = utils.execute([ 345 gcloud_path, 346 'container', 347 'images', 348 'list-tags', 349 'gcr.io/oss-fuzz-base/base-builder', 350 '--format=json', 351 '--sort-by=timestamp', 352 ], 353 check_result=True) 354 result = json.loads(result) 355 356 repo = BaseBuilderRepo() 357 for image in result: 358 timestamp = datetime.datetime.fromisoformat( 359 image['timestamp']['datetime']).astimezone(datetime.timezone.utc) 360 repo.add_digest(timestamp, image['digest']) 361 362 return repo 363 364 365def main(): 366 """Main function.""" 367 logging.getLogger().setLevel(logging.INFO) 368 369 parser = argparse.ArgumentParser( 370 description='Build fuzzers at a specific commit') 371 parser.add_argument('--project_name', 372 help='The name of the project where the bug occurred.', 373 required=True) 374 parser.add_argument('--commit', 375 help='The newest commit SHA to be bisected.', 376 required=True) 377 parser.add_argument('--engine', 378 help='The default is "libfuzzer".', 379 default='libfuzzer') 380 parser.add_argument('--sanitizer', 381 default='address', 382 help='The default is "address".') 383 parser.add_argument('--architecture', default='x86_64') 384 385 args = parser.parse_args() 386 387 repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit) 388 389 if not repo_url or not repo_path: 390 raise ValueError('Main git repo can not be determined.') 391 392 with tempfile.TemporaryDirectory() as tmp_dir: 393 host_src_dir = copy_src_from_docker(args.project_name, tmp_dir) 394 build_repo_manager = repo_manager.RepoManager( 395 os.path.join(host_src_dir, os.path.basename(repo_path))) 396 base_builder_repo = load_base_builder_repo() 397 398 build_data = BuildData(project_name=args.project_name, 399 engine=args.engine, 400 sanitizer=args.sanitizer, 401 architecture=args.architecture) 402 if not build_fuzzers_from_commit(args.commit, 403 build_repo_manager, 404 host_src_dir, 405 build_data, 406 base_builder_repo=base_builder_repo): 407 raise RuntimeError('Failed to build.') 408 409 410if __name__ == '__main__': 411 main() 412