1#!/usr/bin/env python3
2# Copyright 2020 Google LLC.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#        http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16################################################################################
17"""Check code for common issues before submitting."""
18
19import argparse
20import os
21import subprocess
22import sys
23import unittest
24import yaml
25
26_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
27
28
29def _is_project_file(actual_path, expected_filename):
30  """Returns True if actual_path's name is |expected_filename| and is a file
31  that exists and is in in projects/."""
32  if os.path.basename(actual_path) != expected_filename:
33    return False
34
35  if os.path.basename(os.path.dirname(
36      os.path.dirname(actual_path))) != 'projects':
37    return False
38
39  return os.path.exists(actual_path)
40
41
42# TODO: Check for -fsanitize=fuzzer in files as well.
43
44
45def _check_one_lib_fuzzing_engine(build_sh_file):
46  """Returns False if |build_sh_file| contains -lFuzzingEngine.
47  This is deprecated behavior. $LIB_FUZZING_ENGINE should be used instead
48  so that -fsanitize=fuzzer is used."""
49  if not _is_project_file(build_sh_file, 'build.sh'):
50    return True
51
52  with open(build_sh_file) as build_sh:
53    build_sh_lines = build_sh.readlines()
54  for line_num, line in enumerate(build_sh_lines):
55    uncommented_code = line.split('#')[0]
56    if '-lFuzzingEngine' in uncommented_code:
57      print(
58          'Error: build.sh contains deprecated "-lFuzzingEngine" on line: {0}. '
59          'Please use "$LIB_FUZZING_ENGINE" instead.'.format(line_num))
60      return False
61  return True
62
63
64def check_lib_fuzzing_engine(paths):
65  """Call _check_one_lib_fuzzing_engine on each path in |paths|. Return True if
66  the result of every call is True."""
67  return all([_check_one_lib_fuzzing_engine(path) for path in paths])
68
69
70class ProjectYamlChecker:
71  """Checks for a project.yaml file."""
72
73  # Sections in a project.yaml and the constant values that they are allowed
74  # to have.
75  SECTIONS_AND_CONSTANTS = {
76      'sanitizers': {'address', 'none', 'memory', 'undefined', 'dataflow'},
77      'architectures': {'i386', 'x86_64'},
78      'fuzzing_engines': {'afl', 'libfuzzer', 'honggfuzz', 'dataflow', 'none'},
79  }
80
81  # Note: this list must be updated when we allow new sections.
82  VALID_SECTION_NAMES = [
83      'architectures',
84      'auto_ccs',
85      'blackbox',
86      'builds_per_day',
87      'coverage_extra_args',
88      'disabled',
89      'fuzzing_engines',
90      'help_url',
91      'homepage',
92      'language',
93      'labels',  # For internal use only, hard to lint as it uses fuzzer names.
94      'main_repo',
95      'primary_contact',
96      'run_tests',
97      'sanitizers',
98      'selective_unpack',
99      'vendor_ccs',
100      'view_restrictions',
101  ]
102
103  LANGUAGES_SUPPORTED = [
104      'c',
105      'c++',
106      'go',
107      'jvm',
108      'python',
109      'rust',
110  ]
111
112  # Note that some projects like boost only have auto-ccs. However, forgetting
113  # primary contact is probably a mistake.
114  REQUIRED_SECTIONS = ['primary_contact', 'main_repo']
115
116  def __init__(self, filename):
117    self.filename = filename
118    with open(filename) as file_handle:
119      self.data = yaml.safe_load(file_handle)
120
121    self.success = True
122
123  def do_checks(self):
124    """Do all project.yaml checks. Return True if they pass."""
125    if self.is_disabled():
126      return True
127
128    checks = [
129        self.check_project_yaml_constants,
130        self.check_required_sections,
131        self.check_valid_section_names,
132        self.check_valid_emails,
133        self.check_valid_language,
134    ]
135    for check_function in checks:
136      check_function()
137    return self.success
138
139  def is_disabled(self):
140    """Is this project disabled."""
141    return self.data.get('disabled', False)
142
143  def error(self, message):
144    """Print an error message and set self.success to False."""
145    self.success = False
146    print('Error in {filename}: {message}'.format(filename=self.filename,
147                                                  message=message))
148
149  def check_project_yaml_constants(self):
150    """Check that certain sections only have certain constant values."""
151    for section, allowed_constants in self.SECTIONS_AND_CONSTANTS.items():
152      if section not in self.data:
153        continue
154      actual_constants = self.data[section]
155      for constant in actual_constants:
156        if isinstance(constant, str):
157          if constant not in allowed_constants:
158            self.error(('{constant} (in {section} section) is not a valid '
159                        'constant ({allowed_constants}).').format(
160                            constant=constant,
161                            section=section,
162                            allowed_constants=', '.join(allowed_constants)))
163        elif isinstance(constant, dict):
164          # The only alternative value allowed is the experimental flag, i.e.
165          # `constant == {'memory': {'experimental': True}}`. Do not check the
166          # experimental flag, but assert that the sanitizer is a valid one.
167          if (len(constant.keys()) > 1 or
168              list(constant.keys())[0] not in allowed_constants):
169            self.error('Not allowed value in the project.yaml: ' +
170                       str(constant))
171        else:
172          self.error('Not allowed value in the project.yaml: ' + str(constant))
173
174  def check_valid_section_names(self):
175    """Check that only valid sections are included."""
176    for name in self.data:
177      if name not in self.VALID_SECTION_NAMES:
178        self.error('{name} is not a valid section name ({valid_names})'.format(
179            name=name, valid_names=self.VALID_SECTION_NAMES))
180
181  def check_required_sections(self):
182    """Check that all required sections are present."""
183    for section in self.REQUIRED_SECTIONS:
184      if section not in self.data:
185        self.error(section + ' section is missing.')
186
187  def check_valid_emails(self):
188    """Check that emails are valid looking."""
189    # Get email addresses.
190    email_addresses = []
191    primary_contact = self.data.get('primary_contact')
192    if primary_contact:
193      email_addresses.append(primary_contact)
194    auto_ccs = self.data.get('auto_ccs')
195    if auto_ccs:
196      email_addresses.extend(auto_ccs)
197
198    # Check that email addresses seem normal.
199    for email_address in email_addresses:
200      if '@' not in email_address or '.' not in email_address:
201        self.error(email_address + ' is an invalid email address.')
202
203  def check_valid_language(self):
204    """Check that the language is specified and valid."""
205    language = self.data.get('language')
206    if not language:
207      self.error('Missing "language" attribute in project.yaml.')
208    elif language not in self.LANGUAGES_SUPPORTED:
209      self.error(
210          '"language: {language}" is not supported ({supported}).'.format(
211              language=language, supported=self.LANGUAGES_SUPPORTED))
212
213
214def _check_one_project_yaml(project_yaml_filename):
215  """Do checks on the project.yaml file."""
216  if not _is_project_file(project_yaml_filename, 'project.yaml'):
217    return True
218
219  checker = ProjectYamlChecker(project_yaml_filename)
220  return checker.do_checks()
221
222
223def check_project_yaml(paths):
224  """Call _check_one_project_yaml on each path in |paths|. Return True if
225  the result of every call is True."""
226  return all([_check_one_project_yaml(path) for path in paths])
227
228
229def do_checks(changed_files):
230  """Run all presubmit checks return False if any fails."""
231  checks = [
232      check_license, yapf, lint, check_project_yaml, check_lib_fuzzing_engine
233  ]
234  # Use a list comprehension here and in other cases where we use all() so that
235  # we don't quit early on failure. This is more user-friendly since the more
236  # errors we spit out at once, the less frequently the less check-fix-check
237  # cycles they need to do.
238  return all([check(changed_files) for check in checks])
239
240
241_CHECK_LICENSE_FILENAMES = ['Dockerfile']
242_CHECK_LICENSE_EXTENSIONS = [
243    '.bash',
244    '.c',
245    '.cc',
246    '.cpp',
247    '.css',
248    '.h',
249    '.htm',
250    '.html',
251    '.js',
252    '.proto',
253    '.py',
254    '.sh',
255]
256
257_LICENSE_STRING = 'http://www.apache.org/licenses/LICENSE-2.0'
258
259
260def check_license(paths):
261  """Validate license header."""
262  if not paths:
263    return True
264
265  success = True
266  for path in paths:
267    filename = os.path.basename(path)
268    extension = os.path.splitext(path)[1]
269    if (filename not in _CHECK_LICENSE_FILENAMES and
270        extension not in _CHECK_LICENSE_EXTENSIONS):
271      continue
272
273    with open(path) as file_handle:
274      if _LICENSE_STRING not in file_handle.read():
275        print('Missing license header in file %s.' % str(path))
276        success = False
277
278  return success
279
280
281def bool_to_returncode(success):
282  """Return 0 if |success|. Otherwise return 1."""
283  if success:
284    print('Success.')
285    return 0
286
287  print('Failed.')
288  return 1
289
290
291def is_nonfuzzer_python(path):
292  """Returns True if |path| ends in .py."""
293  return os.path.splitext(path)[1] == '.py' and '/projects/' not in path
294
295
296def lint(_=None):
297  """Run python's linter on infra. Return False if it fails linting."""
298
299  command = ['python3', '-m', 'pylint', '-j', '0', 'infra']
300  returncode = subprocess.run(command, check=False).returncode
301  return returncode == 0
302
303
304def yapf(paths, validate=True):
305  """Do yapf on |path| if it is Python file. Only validates format if
306  |validate| otherwise, formats the file. Returns False if validation
307  or formatting fails."""
308  paths = [path for path in paths if is_nonfuzzer_python(path)]
309  if not paths:
310    return True
311
312  validate_argument = '-d' if validate else '-i'
313  command = ['yapf', validate_argument, '-p']
314  command.extend(paths)
315
316  returncode = subprocess.run(command, check=False).returncode
317  return returncode == 0
318
319
320def get_changed_files():
321  """Return a list of absolute paths of files changed in this git branch."""
322  branch_commit_hash = subprocess.check_output(
323      ['git', 'merge-base', 'FETCH_HEAD', 'origin/HEAD']).strip().decode()
324
325  diff_commands = [
326      # Return list of modified files in the commits on this branch.
327      ['git', 'diff', '--name-only', branch_commit_hash + '..'],
328      # Return list of modified files from uncommitted changes.
329      ['git', 'diff', '--name-only']
330  ]
331
332  changed_files = set()
333  for command in diff_commands:
334    file_paths = subprocess.check_output(command).decode().splitlines()
335    for file_path in file_paths:
336      if not os.path.isfile(file_path):
337        continue
338      changed_files.add(file_path)
339  print('Changed files: {changed_files}'.format(
340      changed_files=' '.join(changed_files)))
341  return [os.path.abspath(f) for f in changed_files]
342
343
344def run_build_tests():
345  """Runs build tests because they can't be run in parallel."""
346  suite_list = [
347      unittest.TestLoader().discover(os.path.join(_SRC_ROOT, 'infra', 'build'),
348                                     pattern='*_test.py'),
349  ]
350  suite = unittest.TestSuite(suite_list)
351  print('Running build tests.')
352  result = unittest.TextTestRunner().run(suite)
353  return not result.failures and not result.errors
354
355
356def run_nonbuild_tests(parallel):
357  """Run all tests but build tests. Do it in parallel if |parallel|. The reason
358  why we exclude build tests is because they use an emulator that prevents them
359  from being used in parallel."""
360  # We look for all project directories because otherwise pytest won't run tests
361  # that are not in valid modules (e.g. "base-images").
362  relevant_dirs = set()
363  all_files = get_all_files()
364  for file_path in all_files:
365    directory = os.path.dirname(file_path)
366    relevant_dirs.add(directory)
367
368  # Use ignore-glob because ignore doesn't seem to work properly with the way we
369  # pass directories to pytest.
370  command = [
371      'pytest',
372      # Test errors with error: "ModuleNotFoundError: No module named 'apt'.
373      '--ignore-glob=infra/base-images/base-sanitizer-libs-builder/*',
374      '--ignore-glob=infra/build/*',
375  ]
376  if parallel:
377    command.extend(['-n', 'auto'])
378  command += list(relevant_dirs)
379  print('Running non-build tests.')
380  return subprocess.run(command, check=False).returncode == 0
381
382
383def run_tests(_=None, parallel=False):
384  """Runs all unit tests."""
385  nonbuild_success = run_nonbuild_tests(parallel)
386  build_success = run_build_tests()
387  return nonbuild_success and build_success
388
389
390def get_all_files():
391  """Returns a list of absolute paths of files in this repo."""
392  get_all_files_command = ['git', 'ls-files']
393  output = subprocess.check_output(get_all_files_command).decode().splitlines()
394  return [os.path.abspath(path) for path in output if os.path.isfile(path)]
395
396
397def main():
398  """Check changes on a branch for common issues before submitting."""
399  # Get program arguments.
400  parser = argparse.ArgumentParser(description='Presubmit script for oss-fuzz.')
401  parser.add_argument('command',
402                      choices=['format', 'lint', 'license', 'infra-tests'],
403                      nargs='?')
404  parser.add_argument('-a',
405                      '--all-files',
406                      action='store_true',
407                      help='Run presubmit check(s) on all files',
408                      default=False)
409  parser.add_argument('-p',
410                      '--parallel',
411                      action='store_true',
412                      help='Run tests in parallel.',
413                      default=False)
414  args = parser.parse_args()
415
416  if args.all_files:
417    relevant_files = get_all_files()
418  else:
419    relevant_files = get_changed_files()
420
421  os.chdir(_SRC_ROOT)
422
423  # Do one specific check if the user asked for it.
424  if args.command == 'format':
425    success = yapf(relevant_files, False)
426    return bool_to_returncode(success)
427
428  if args.command == 'lint':
429    success = lint()
430    return bool_to_returncode(success)
431
432  if args.command == 'license':
433    success = check_license(relevant_files)
434    return bool_to_returncode(success)
435
436  if args.command == 'infra-tests':
437    success = run_tests(relevant_files, parallel=args.parallel)
438    return bool_to_returncode(success)
439
440  # Do all the checks (but no tests).
441  success = do_checks(relevant_files)
442
443  return bool_to_returncode(success)
444
445
446if __name__ == '__main__':
447  sys.exit(main())
448