1# Copyright 2020 Google Inc.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#      http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15################################################################################
16#!/usr/bin/python2
17"""Starts project build on Google Cloud Builder.
18
19Usage: build_project.py <project_dir>
20"""
21
22from __future__ import print_function
23
24import datetime
25import json
26import logging
27import os
28import re
29import sys
30
31import six
32import yaml
33
34from oauth2client.client import GoogleCredentials
35from googleapiclient.discovery import build
36
37import build_lib
38
39FUZZING_BUILD_TAG = 'fuzzing'
40
41GCB_LOGS_BUCKET = 'oss-fuzz-gcb-logs'
42
43CONFIGURATIONS = {
44    'sanitizer-address': ['SANITIZER=address'],
45    'sanitizer-dataflow': ['SANITIZER=dataflow'],
46    'sanitizer-memory': ['SANITIZER=memory'],
47    'sanitizer-undefined': ['SANITIZER=undefined'],
48    'engine-libfuzzer': ['FUZZING_ENGINE=libfuzzer'],
49    'engine-afl': ['FUZZING_ENGINE=afl'],
50    'engine-honggfuzz': ['FUZZING_ENGINE=honggfuzz'],
51    'engine-dataflow': ['FUZZING_ENGINE=dataflow'],
52    'engine-none': ['FUZZING_ENGINE=none'],
53}
54
55DEFAULT_ARCHITECTURES = ['x86_64']
56DEFAULT_ENGINES = ['libfuzzer', 'afl', 'honggfuzz']
57DEFAULT_SANITIZERS = ['address', 'undefined']
58
59LATEST_VERSION_FILENAME = 'latest.version'
60LATEST_VERSION_CONTENT_TYPE = 'text/plain'
61
62QUEUE_TTL_SECONDS = 60 * 60 * 24  # 24 hours.
63
64
65def usage():
66  """Exit with code 1 and display syntax to use this file."""
67  sys.stderr.write('Usage: ' + sys.argv[0] + ' <project_dir>\n')
68  sys.exit(1)
69
70
71def set_yaml_defaults(project_name, project_yaml, image_project):
72  """Set project.yaml's default parameters."""
73  project_yaml.setdefault('disabled', False)
74  project_yaml.setdefault('name', project_name)
75  project_yaml.setdefault('image',
76                          'gcr.io/{0}/{1}'.format(image_project, project_name))
77  project_yaml.setdefault('architectures', DEFAULT_ARCHITECTURES)
78  project_yaml.setdefault('sanitizers', DEFAULT_SANITIZERS)
79  project_yaml.setdefault('fuzzing_engines', DEFAULT_ENGINES)
80  project_yaml.setdefault('run_tests', True)
81  project_yaml.setdefault('coverage_extra_args', '')
82  project_yaml.setdefault('labels', {})
83
84
85def is_supported_configuration(fuzzing_engine, sanitizer, architecture):
86  """Check if the given configuration is supported."""
87  fuzzing_engine_info = build_lib.ENGINE_INFO[fuzzing_engine]
88  if architecture == 'i386' and sanitizer != 'address':
89    return False
90  return (sanitizer in fuzzing_engine_info.supported_sanitizers and
91          architecture in fuzzing_engine_info.supported_architectures)
92
93
94def get_sanitizers(project_yaml):
95  """Retrieve sanitizers from project.yaml."""
96  sanitizers = project_yaml['sanitizers']
97  assert isinstance(sanitizers, list)
98
99  processed_sanitizers = []
100  for sanitizer in sanitizers:
101    if isinstance(sanitizer, six.string_types):
102      processed_sanitizers.append(sanitizer)
103    elif isinstance(sanitizer, dict):
104      for key in sanitizer.keys():
105        processed_sanitizers.append(key)
106
107  return processed_sanitizers
108
109
110def workdir_from_dockerfile(dockerfile_lines):
111  """Parse WORKDIR from the Dockerfile."""
112  workdir_regex = re.compile(r'\s*WORKDIR\s*([^\s]+)')
113  for line in dockerfile_lines:
114    match = re.match(workdir_regex, line)
115    if match:
116      # We need to escape '$' since they're used for subsitutions in Container
117      # Builer builds.
118      return match.group(1).replace('$', '$$')
119
120  return None
121
122
123def load_project_yaml(project_name, project_yaml_file, image_project):
124  """Loads project yaml and sets default values."""
125  project_yaml = yaml.safe_load(project_yaml_file)
126  set_yaml_defaults(project_name, project_yaml, image_project)
127  return project_yaml
128
129
130# pylint: disable=too-many-locals, too-many-statements, too-many-branches
131def get_build_steps(project_name, project_yaml_file, dockerfile_lines,
132                    image_project, base_images_project):
133  """Returns build steps for project."""
134  project_yaml = load_project_yaml(project_name, project_yaml_file,
135                                   image_project)
136
137  if project_yaml['disabled']:
138    logging.info('Project "%s" is disabled.', project_name)
139    return []
140
141  name = project_yaml['name']
142  image = project_yaml['image']
143  language = project_yaml['language']
144  run_tests = project_yaml['run_tests']
145  time_stamp = datetime.datetime.now().strftime('%Y%m%d%H%M')
146
147  build_steps = build_lib.project_image_steps(name, image, language)
148  # Copy over MSan instrumented libraries.
149  build_steps.append({
150      'name': 'gcr.io/{0}/msan-libs-builder'.format(base_images_project),
151      'args': [
152          'bash',
153          '-c',
154          'cp -r /msan /workspace',
155      ],
156  })
157
158  for fuzzing_engine in project_yaml['fuzzing_engines']:
159    for sanitizer in get_sanitizers(project_yaml):
160      for architecture in project_yaml['architectures']:
161        if not is_supported_configuration(fuzzing_engine, sanitizer,
162                                          architecture):
163          continue
164
165        env = CONFIGURATIONS['engine-' + fuzzing_engine][:]
166        env.extend(CONFIGURATIONS['sanitizer-' + sanitizer])
167        out = '/workspace/out/' + sanitizer
168        stamped_name = '-'.join([name, sanitizer, time_stamp])
169        latest_version_file = '-'.join(
170            [name, sanitizer, LATEST_VERSION_FILENAME])
171        zip_file = stamped_name + '.zip'
172        stamped_srcmap_file = stamped_name + '.srcmap.json'
173        bucket = build_lib.ENGINE_INFO[fuzzing_engine].upload_bucket
174        if architecture != 'x86_64':
175          bucket += '-' + architecture
176
177        upload_url = build_lib.get_signed_url(
178            build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name, zip_file))
179        srcmap_url = build_lib.get_signed_url(
180            build_lib.GCS_UPLOAD_URL_FORMAT.format(bucket, name,
181                                                   stamped_srcmap_file))
182        latest_version_url = build_lib.GCS_UPLOAD_URL_FORMAT.format(
183            bucket, name, latest_version_file)
184        latest_version_url = build_lib.get_signed_url(
185            latest_version_url, content_type=LATEST_VERSION_CONTENT_TYPE)
186
187        targets_list_filename = build_lib.get_targets_list_filename(sanitizer)
188        targets_list_url = build_lib.get_signed_url(
189            build_lib.get_targets_list_url(bucket, name, sanitizer))
190
191        env.append('OUT=' + out)
192        env.append('MSAN_LIBS_PATH=/workspace/msan')
193        env.append('ARCHITECTURE=' + architecture)
194        env.append('FUZZING_LANGUAGE=' + language)
195
196        workdir = workdir_from_dockerfile(dockerfile_lines)
197        if not workdir:
198          workdir = '/src'
199
200        failure_msg = ('*' * 80 + '\nFailed to build.\nTo reproduce, run:\n'
201                       'python infra/helper.py build_image {name}\n'
202                       'python infra/helper.py build_fuzzers --sanitizer '
203                       '{sanitizer} --engine {engine} --architecture '
204                       '{architecture} {name}\n' + '*' * 80).format(
205                           name=name,
206                           sanitizer=sanitizer,
207                           engine=fuzzing_engine,
208                           architecture=architecture)
209
210        build_steps.append(
211            # compile
212            {
213                'name':
214                    image,
215                'env':
216                    env,
217                'args': [
218                    'bash',
219                    '-c',
220                    # Remove /out to break loudly when a build script
221                    # incorrectly uses /out instead of $OUT.
222                    # `cd /src && cd {workdir}` (where {workdir} is parsed from
223                    # the Dockerfile). Container Builder overrides our workdir
224                    # so we need to add this step to set it back.
225                    ('rm -r /out && cd /src && cd {workdir} && mkdir -p {out} '
226                     '&& compile || (echo "{failure_msg}" && false)'
227                    ).format(workdir=workdir, out=out, failure_msg=failure_msg),
228                ],
229            })
230
231        if sanitizer == 'memory':
232          # Patch dynamic libraries to use instrumented ones.
233          build_steps.append({
234              'name':
235                  'gcr.io/{0}/msan-libs-builder'.format(base_images_project),
236              'args': [
237                  'bash',
238                  '-c',
239                  # TODO(ochang): Replace with just patch_build.py once
240                  # permission in image is fixed.
241                  'python /usr/local/bin/patch_build.py {0}'.format(out),
242              ],
243          })
244
245        if run_tests:
246          failure_msg = ('*' * 80 + '\nBuild checks failed.\n'
247                         'To reproduce, run:\n'
248                         'python infra/helper.py build_image {name}\n'
249                         'python infra/helper.py build_fuzzers --sanitizer '
250                         '{sanitizer} --engine {engine} --architecture '
251                         '{architecture} {name}\n'
252                         'python infra/helper.py check_build --sanitizer '
253                         '{sanitizer} --engine {engine} --architecture '
254                         '{architecture} {name}\n' + '*' * 80).format(
255                             name=name,
256                             sanitizer=sanitizer,
257                             engine=fuzzing_engine,
258                             architecture=architecture)
259
260          build_steps.append(
261              # test binaries
262              {
263                  'name':
264                      'gcr.io/{0}/base-runner'.format(base_images_project),
265                  'env':
266                      env,
267                  'args': [
268                      'bash', '-c',
269                      'test_all.py || (echo "{0}" && false)'.format(failure_msg)
270                  ],
271              })
272
273        if project_yaml['labels']:
274          # write target labels
275          build_steps.append({
276              'name':
277                  image,
278              'env':
279                  env,
280              'args': [
281                  '/usr/local/bin/write_labels.py',
282                  json.dumps(project_yaml['labels']),
283                  out,
284              ],
285          })
286
287        if sanitizer == 'dataflow' and fuzzing_engine == 'dataflow':
288          dataflow_steps = dataflow_post_build_steps(name, env,
289                                                     base_images_project)
290          if dataflow_steps:
291            build_steps.extend(dataflow_steps)
292          else:
293            sys.stderr.write('Skipping dataflow post build steps.\n')
294
295        build_steps.extend([
296            # generate targets list
297            {
298                'name':
299                    'gcr.io/{0}/base-runner'.format(base_images_project),
300                'env':
301                    env,
302                'args': [
303                    'bash',
304                    '-c',
305                    'targets_list > /workspace/{0}'.format(
306                        targets_list_filename),
307                ],
308            },
309            # zip binaries
310            {
311                'name':
312                    image,
313                'args': [
314                    'bash', '-c',
315                    'cd {out} && zip -r {zip_file} *'.format(out=out,
316                                                             zip_file=zip_file)
317                ],
318            },
319            # upload srcmap
320            {
321                'name': 'gcr.io/{0}/uploader'.format(base_images_project),
322                'args': [
323                    '/workspace/srcmap.json',
324                    srcmap_url,
325                ],
326            },
327            # upload binaries
328            {
329                'name': 'gcr.io/{0}/uploader'.format(base_images_project),
330                'args': [
331                    os.path.join(out, zip_file),
332                    upload_url,
333                ],
334            },
335            # upload targets list
336            {
337                'name':
338                    'gcr.io/{0}/uploader'.format(base_images_project),
339                'args': [
340                    '/workspace/{0}'.format(targets_list_filename),
341                    targets_list_url,
342                ],
343            },
344            # upload the latest.version file
345            build_lib.http_upload_step(zip_file, latest_version_url,
346                                       LATEST_VERSION_CONTENT_TYPE),
347            # cleanup
348            {
349                'name': image,
350                'args': [
351                    'bash',
352                    '-c',
353                    'rm -r ' + out,
354                ],
355            },
356        ])
357
358  return build_steps
359
360
361def dataflow_post_build_steps(project_name, env, base_images_project):
362  """Appends dataflow post build steps."""
363  steps = build_lib.download_corpora_steps(project_name)
364  if not steps:
365    return None
366
367  steps.append({
368      'name':
369          'gcr.io/{0}/base-runner'.format(base_images_project),
370      'env':
371          env + [
372              'COLLECT_DFT_TIMEOUT=2h',
373              'DFT_FILE_SIZE_LIMIT=65535',
374              'DFT_MIN_TIMEOUT=2.0',
375              'DFT_TIMEOUT_RANGE=6.0',
376          ],
377      'args': [
378          'bash', '-c',
379          ('for f in /corpus/*.zip; do unzip -q $f -d ${f%%.*}; done && '
380           'collect_dft || (echo "DFT collection failed." && false)')
381      ],
382      'volumes': [{
383          'name': 'corpus',
384          'path': '/corpus'
385      }],
386  })
387  return steps
388
389
390def get_logs_url(build_id, image_project='oss-fuzz'):
391  """Returns url where logs are displayed for the build."""
392  url_format = ('https://console.developers.google.com/logs/viewer?'
393                'resource=build%2Fbuild_id%2F{0}&project={1}')
394  return url_format.format(build_id, image_project)
395
396
397# pylint: disable=no-member
398def run_build(build_steps, project_name, tag):
399  """Run the build for given steps on cloud build."""
400  options = {}
401  if 'GCB_OPTIONS' in os.environ:
402    options = yaml.safe_load(os.environ['GCB_OPTIONS'])
403
404  build_body = {
405      'steps': build_steps,
406      'timeout': str(build_lib.BUILD_TIMEOUT) + 's',
407      'options': options,
408      'logsBucket': GCB_LOGS_BUCKET,
409      'tags': [project_name + '-' + tag,],
410      'queueTtl': str(QUEUE_TTL_SECONDS) + 's',
411  }
412
413  credentials = GoogleCredentials.get_application_default()
414  cloudbuild = build('cloudbuild',
415                     'v1',
416                     credentials=credentials,
417                     cache_discovery=False)
418  build_info = cloudbuild.projects().builds().create(projectId='oss-fuzz',
419                                                     body=build_body).execute()
420  build_id = build_info['metadata']['build']['id']
421
422  print('Logs:', get_logs_url(build_id), file=sys.stderr)
423  print(build_id)
424
425
426def main():
427  """Build and run projects."""
428  if len(sys.argv) != 2:
429    usage()
430
431  image_project = 'oss-fuzz'
432  base_images_project = 'oss-fuzz-base'
433  project_dir = sys.argv[1].rstrip(os.path.sep)
434  dockerfile_path = os.path.join(project_dir, 'Dockerfile')
435  project_yaml_path = os.path.join(project_dir, 'project.yaml')
436  project_name = os.path.basename(project_dir)
437
438  with open(dockerfile_path) as dockerfile:
439    dockerfile_lines = dockerfile.readlines()
440
441  with open(project_yaml_path) as project_yaml_file:
442    steps = get_build_steps(project_name, project_yaml_file, dockerfile_lines,
443                            image_project, base_images_project)
444
445  run_build(steps, project_name, FUZZING_BUILD_TAG)
446
447
448if __name__ == '__main__':
449  main()
450