1# Copyright 2019 Google LLC
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14"""Module to build a image from a specific commit, branch or pull request
15
16This module is allows each of the OSS Fuzz projects fuzzers to be built
17from a specific point in time. This feature can be used for implementations
18like continuious integration fuzzing and bisection to find errors
19"""
20import argparse
21import bisect
22import datetime
23from distutils import spawn
24import os
25import collections
26import json
27import logging
28import re
29import shutil
30import tempfile
31
32import helper
33import repo_manager
34import retry
35import utils
36
37BuildData = collections.namedtuple(
38    'BuildData', ['project_name', 'engine', 'sanitizer', 'architecture'])
39
40_GIT_DIR_MARKER = 'gitdir: '
41_IMAGE_BUILD_TRIES = 3
42
43
44class BaseBuilderRepo:
45  """Repo of base-builder images."""
46
47  def __init__(self):
48    self.timestamps = []
49    self.digests = []
50
51  def add_digest(self, timestamp, digest):
52    """Add a digest."""
53    self.timestamps.append(timestamp)
54    self.digests.append(digest)
55
56  def find_digest(self, timestamp):
57    """Find the latest image before the given timestamp."""
58    index = bisect.bisect_right(self.timestamps, timestamp)
59    if index > 0:
60      return self.digests[index - 1]
61
62    logging.error('Failed to find suitable base-builder.')
63    return None
64
65
66def _replace_gitdir(src_dir, file_path):
67  """Replace gitdir with a relative path."""
68  with open(file_path) as handle:
69    lines = handle.readlines()
70
71  new_lines = []
72  for line in lines:
73    if line.startswith(_GIT_DIR_MARKER):
74      absolute_path = line[len(_GIT_DIR_MARKER):].strip()
75      if not os.path.isabs(absolute_path):
76        # Already relative.
77        return
78
79      current_dir = os.path.dirname(file_path)
80      # Rebase to /src rather than the host src dir.
81      base_dir = current_dir.replace(src_dir, '/src')
82      relative_path = os.path.relpath(absolute_path, base_dir)
83      logging.info('Replacing absolute submodule gitdir from %s to %s',
84                   absolute_path, relative_path)
85
86      line = _GIT_DIR_MARKER + relative_path
87
88    new_lines.append(line)
89
90  with open(file_path, 'w') as handle:
91    handle.write(''.join(new_lines))
92
93
94def _make_gitdirs_relative(src_dir):
95  """Make gitdirs relative."""
96  for root_dir, _, files in os.walk(src_dir):
97    for filename in files:
98      if filename != '.git':
99        continue
100
101      file_path = os.path.join(root_dir, filename)
102      _replace_gitdir(src_dir, file_path)
103
104
105def _replace_base_builder_digest(dockerfile_path, digest):
106  """Replace the base-builder digest in a Dockerfile."""
107  with open(dockerfile_path) as handle:
108    lines = handle.readlines()
109
110  new_lines = []
111  for line in lines:
112    if line.strip().startswith('FROM'):
113      line = 'FROM gcr.io/oss-fuzz-base/base-builder@' + digest + '\n'
114
115    new_lines.append(line)
116
117  with open(dockerfile_path, 'w') as handle:
118    handle.write(''.join(new_lines))
119
120
121def copy_src_from_docker(project_name, host_dir):
122  """Copy /src from docker to the host."""
123  # Copy /src to host.
124  image_name = 'gcr.io/oss-fuzz/' + project_name
125  src_dir = os.path.join(host_dir, 'src')
126  if os.path.exists(src_dir):
127    shutil.rmtree(src_dir, ignore_errors=True)
128
129  docker_args = [
130      '-v',
131      host_dir + ':/out',
132      image_name,
133      'cp',
134      '-r',
135      '-p',
136      '/src',
137      '/out',
138  ]
139  helper.docker_run(docker_args)
140
141  # Submodules can have gitdir entries which point to absolute paths. Make them
142  # relative, as otherwise we can't do operations on the checkout on the host.
143  _make_gitdirs_relative(src_dir)
144  return src_dir
145
146
147@retry.wrap(_IMAGE_BUILD_TRIES, 2)
148def _build_image_with_retries(project_name):
149  """Build image with retries."""
150  return helper.build_image_impl(project_name)
151
152
153def get_required_post_checkout_steps(dockerfile_path):
154  """Get required post checkout steps (best effort)."""
155
156  checkout_pattern = re.compile(r'\s*RUN\s*(git|svn|hg)')
157
158  # If the build.sh is copied from upstream, we need to copy it again after
159  # changing the revision to ensure correct building.
160  post_run_pattern = re.compile(r'\s*RUN\s*(.*build\.sh.*(\$SRC|/src).*)')
161
162  with open(dockerfile_path) as handle:
163    lines = handle.readlines()
164
165  subsequent_run_cmds = []
166  for i, line in enumerate(lines):
167    if checkout_pattern.match(line):
168      subsequent_run_cmds = []
169      continue
170
171    match = post_run_pattern.match(line)
172    if match:
173      workdir = helper.workdir_from_lines(lines[:i])
174      command = match.group(1)
175      subsequent_run_cmds.append((workdir, command))
176
177  return subsequent_run_cmds
178
179
180# pylint: disable=too-many-locals
181def build_fuzzers_from_commit(commit,
182                              build_repo_manager,
183                              host_src_path,
184                              build_data,
185                              base_builder_repo=None):
186  """Builds a OSS-Fuzz fuzzer at a specific commit SHA.
187
188  Args:
189    commit: The commit SHA to build the fuzzers at.
190    build_repo_manager: The OSS-Fuzz project's repo manager to be built at.
191    build_data: A struct containing project build information.
192    base_builder_repo: A BaseBuilderRepo.
193  Returns:
194    0 on successful build or error code on failure.
195  """
196  oss_fuzz_repo_manager = repo_manager.RepoManager(helper.OSS_FUZZ_DIR)
197  num_retry = 1
198
199  def cleanup():
200    # Re-copy /src for a clean checkout every time.
201    copy_src_from_docker(build_data.project_name,
202                         os.path.dirname(host_src_path))
203    build_repo_manager.fetch_all_remotes()
204
205  projects_dir = os.path.join('projects', build_data.project_name)
206  dockerfile_path = os.path.join(projects_dir, 'Dockerfile')
207
208  for i in range(num_retry + 1):
209    build_repo_manager.checkout_commit(commit, clean=False)
210
211    post_checkout_steps = get_required_post_checkout_steps(dockerfile_path)
212    for workdir, post_checkout_step in post_checkout_steps:
213      logging.info('Running post-checkout step `%s` in %s.', post_checkout_step,
214                   workdir)
215      helper.docker_run([
216          '-w',
217          workdir,
218          '-v',
219          host_src_path + ':' + '/src',
220          'gcr.io/oss-fuzz/' + build_data.project_name,
221          '/bin/bash',
222          '-c',
223          post_checkout_step,
224      ])
225
226    result = helper.build_fuzzers_impl(project_name=build_data.project_name,
227                                       clean=True,
228                                       engine=build_data.engine,
229                                       sanitizer=build_data.sanitizer,
230                                       architecture=build_data.architecture,
231                                       env_to_add=None,
232                                       source_path=host_src_path,
233                                       mount_location='/src')
234    if result == 0 or i == num_retry:
235      break
236
237    # Retry with an OSS-Fuzz builder container that's closer to the project
238    # commit date.
239    commit_date = build_repo_manager.commit_date(commit)
240
241    # Find first change in the projects/<PROJECT> directory before the project
242    # commit date.
243    oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git([
244        'log', '--before=' + commit_date.isoformat(), '-n1', '--format=%H',
245        projects_dir
246    ],
247                                                      check_result=True)
248    oss_fuzz_commit = oss_fuzz_commit.strip()
249    if not oss_fuzz_commit:
250      logging.info(
251          'Could not find first OSS-Fuzz commit prior to upstream commit. '
252          'Falling back to oldest integration commit.')
253
254      # Find the oldest commit.
255      oss_fuzz_commit, _, _ = oss_fuzz_repo_manager.git(
256          ['log', '--reverse', '--format=%H', projects_dir], check_result=True)
257
258      oss_fuzz_commit = oss_fuzz_commit.splitlines()[0].strip()
259
260    if not oss_fuzz_commit:
261      logging.error('Failed to get oldest integration commit.')
262      break
263
264    logging.info('Build failed. Retrying on earlier OSS-Fuzz commit %s.',
265                 oss_fuzz_commit)
266
267    # Check out projects/<PROJECT> dir to the commit that was found.
268    oss_fuzz_repo_manager.git(['checkout', oss_fuzz_commit, projects_dir],
269                              check_result=True)
270
271    # Also use the closest base-builder we can find.
272    if base_builder_repo:
273      base_builder_digest = base_builder_repo.find_digest(commit_date)
274      if not base_builder_digest:
275        return False
276
277      logging.info('Using base-builder with digest %s.', base_builder_digest)
278      _replace_base_builder_digest(dockerfile_path, base_builder_digest)
279
280    # Rebuild image and re-copy src dir since things in /src could have changed.
281    if not _build_image_with_retries(build_data.project_name):
282      logging.error('Failed to rebuild image.')
283      return False
284
285    cleanup()
286
287  cleanup()
288  return result == 0
289
290
291def detect_main_repo(project_name, repo_name=None, commit=None):
292  """Checks a docker image for the main repo of an OSS-Fuzz project.
293
294  Note: The default is to use the repo name to detect the main repo.
295
296  Args:
297    project_name: The name of the oss-fuzz project.
298    repo_name: The name of the main repo in an OSS-Fuzz project.
299    commit: A commit SHA that is associated with the main repo.
300    src_dir: The location of the projects source on the docker image.
301
302  Returns:
303    The repo's origin, the repo's path.
304  """
305
306  if not repo_name and not commit:
307    logging.error(
308        'Error: can not detect main repo without a repo_name or a commit.')
309    return None, None
310  if repo_name and commit:
311    logging.info(
312        'Both repo name and commit specific. Using repo name for detection.')
313
314  # Change to oss-fuzz main directory so helper.py runs correctly.
315  utils.chdir_to_root()
316  if not _build_image_with_retries(project_name):
317    logging.error('Error: building %s image failed.', project_name)
318    return None, None
319  docker_image_name = 'gcr.io/oss-fuzz/' + project_name
320  command_to_run = [
321      'docker', 'run', '--rm', '-t', docker_image_name, 'python3',
322      os.path.join('/opt', 'cifuzz', 'detect_repo.py')
323  ]
324  if repo_name:
325    command_to_run.extend(['--repo_name', repo_name])
326  else:
327    command_to_run.extend(['--example_commit', commit])
328  out, _, _ = utils.execute(command_to_run)
329  match = re.search(r'\bDetected repo: ([^ ]+) ([^ ]+)', out.rstrip())
330  if match and match.group(1) and match.group(2):
331    return match.group(1), match.group(2)
332
333  logging.error('Failed to detect repo:\n%s', out)
334  return None, None
335
336
337def load_base_builder_repo():
338  """Get base-image digests."""
339  gcloud_path = spawn.find_executable('gcloud')
340  if not gcloud_path:
341    logging.warning('gcloud not found in PATH.')
342    return None
343
344  result, _, _ = utils.execute([
345      gcloud_path,
346      'container',
347      'images',
348      'list-tags',
349      'gcr.io/oss-fuzz-base/base-builder',
350      '--format=json',
351      '--sort-by=timestamp',
352  ],
353                               check_result=True)
354  result = json.loads(result)
355
356  repo = BaseBuilderRepo()
357  for image in result:
358    timestamp = datetime.datetime.fromisoformat(
359        image['timestamp']['datetime']).astimezone(datetime.timezone.utc)
360    repo.add_digest(timestamp, image['digest'])
361
362  return repo
363
364
365def main():
366  """Main function."""
367  logging.getLogger().setLevel(logging.INFO)
368
369  parser = argparse.ArgumentParser(
370      description='Build fuzzers at a specific commit')
371  parser.add_argument('--project_name',
372                      help='The name of the project where the bug occurred.',
373                      required=True)
374  parser.add_argument('--commit',
375                      help='The newest commit SHA to be bisected.',
376                      required=True)
377  parser.add_argument('--engine',
378                      help='The default is "libfuzzer".',
379                      default='libfuzzer')
380  parser.add_argument('--sanitizer',
381                      default='address',
382                      help='The default is "address".')
383  parser.add_argument('--architecture', default='x86_64')
384
385  args = parser.parse_args()
386
387  repo_url, repo_path = detect_main_repo(args.project_name, commit=args.commit)
388
389  if not repo_url or not repo_path:
390    raise ValueError('Main git repo can not be determined.')
391
392  with tempfile.TemporaryDirectory() as tmp_dir:
393    host_src_dir = copy_src_from_docker(args.project_name, tmp_dir)
394    build_repo_manager = repo_manager.RepoManager(
395        os.path.join(host_src_dir, os.path.basename(repo_path)))
396    base_builder_repo = load_base_builder_repo()
397
398    build_data = BuildData(project_name=args.project_name,
399                           engine=args.engine,
400                           sanitizer=args.sanitizer,
401                           architecture=args.architecture)
402    if not build_fuzzers_from_commit(args.commit,
403                                     build_repo_manager,
404                                     host_src_dir,
405                                     build_data,
406                                     base_builder_repo=base_builder_repo):
407      raise RuntimeError('Failed to build.')
408
409
410if __name__ == '__main__':
411  main()
412