1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright 2020 The Chromium OS Authors. All rights reserved.
4# Use of this source code is governed by a BSD-style license that can be
5# found in the LICENSE file.
6
7"""Fetches and submits the artifacts from Chrome OS toolchain's crash bucket.
8"""
9
10# pylint: disable=cros-logging-import
11
12import argparse
13import glob
14import json
15import logging
16import os
17import os.path
18import shutil
19import subprocess
20import sys
21
22import chroot
23
24
25def get_artifacts(pattern):
26  results = subprocess.check_output(['gsutil.py', 'ls', pattern],
27                                    stderr=subprocess.STDOUT,
28                                    encoding='utf-8')
29  return sorted(l.strip() for l in results.splitlines())
30
31
32def get_crash_reproducers(working_dir):
33  results = []
34  for src in [
35      f for f in glob.glob('%s/*.c*' % working_dir)
36      if f.split('.')[-1] in ['c', 'cc', 'cpp']
37  ]:
38    script = '.'.join(src.split('.')[:-1]) + '.sh'
39    if not os.path.exists(script):
40      logging.warning('could not find the matching script of %s', src)
41    else:
42      results.append((src, script))
43  return results
44
45
46def submit_crash_to_forcey(forcey: str, temporary_directory: str,
47                           buildbucket_id: str, url: str) -> None:
48  dest_dir = os.path.join(temporary_directory, buildbucket_id)
49  dest_file = os.path.join(dest_dir, os.path.basename(url))
50  logging.info('Downloading and submitting %r...', url)
51  subprocess.check_output(['gsutil.py', 'cp', url, dest_file],
52                          stderr=subprocess.STDOUT)
53  subprocess.check_output(['tar', '-xJf', dest_file], cwd=dest_dir)
54  for src, script in get_crash_reproducers(dest_dir):
55    subprocess.check_output([
56        forcey, 'reduce', '-wait=false', '-note',
57        '%s:%s' % (url, src), '-sh_file', script, '-src_file', src
58    ])
59
60
61def main(argv):
62  chroot.VerifyOutsideChroot()
63  logging.basicConfig(
64      format='%(asctime)s: %(levelname)s: %(filename)s:%(lineno)d: %(message)s',
65      level=logging.INFO,
66  )
67  cur_dir = os.path.dirname(os.path.abspath(__file__))
68  parser = argparse.ArgumentParser(description=__doc__)
69  parser.add_argument(
70      '--4c', dest='forcey', required=True, help='Path to a 4c client binary')
71  parser.add_argument(
72      '--state_file',
73      default=os.path.join(cur_dir, 'chromeos-state.json'),
74      help='The path to the state file.')
75  parser.add_argument(
76      '--nocleanup',
77      action='store_false',
78      dest='cleanup',
79      help='Keep temporary files created after the script finishes.')
80  opts = parser.parse_args(argv)
81
82  state_file = os.path.abspath(opts.state_file)
83  os.makedirs(os.path.dirname(state_file), exist_ok=True)
84  temporary_directory = '/tmp/bisect_clang_crashes'
85  os.makedirs(temporary_directory, exist_ok=True)
86  urls = get_artifacts('gs://chromeos-toolchain-artifacts/clang-crash-diagnoses'
87                       '/**/*clang_crash_diagnoses.tar.xz')
88  logging.info('%d crash URLs found', len(urls))
89
90  visited = {}
91  if os.path.exists(state_file):
92    buildbucket_ids = {url.split('/')[-2] for url in urls}
93    with open(state_file, encoding='utf-8') as f:
94      data = json.load(f)
95      visited = {k: v for k, v in data.items() if k in buildbucket_ids}
96    logging.info('Successfully loaded %d previously-submitted crashes',
97                 len(visited))
98
99  try:
100    for url in urls:
101      splits = url.split('/')
102      buildbucket_id = splits[-2]
103      # Skip the builds that has been processed
104      if buildbucket_id in visited:
105        continue
106      submit_crash_to_forcey(
107          forcey=opts.forcey,
108          temporary_directory=temporary_directory,
109          buildbucket_id=buildbucket_id,
110          url=url,
111      )
112      visited[buildbucket_id] = url
113
114    exception_in_flight = False
115  except:
116    exception_in_flight = True
117    raise
118  finally:
119    if exception_in_flight:
120      # This is best-effort. If the machine powers off or similar, we'll just
121      # resubmit the same crashes, which is suboptimal, but otherwise
122      # acceptable.
123      logging.error('Something went wrong; attempting to save our work...')
124    else:
125      logging.info('Persisting state...')
126
127    tmp_state_file = state_file + '.tmp'
128    with open(tmp_state_file, 'w', encoding='utf-8') as f:
129      json.dump(visited, f, indent=2)
130    os.rename(tmp_state_file, state_file)
131
132    logging.info('State successfully persisted')
133
134  if opts.cleanup:
135    shutil.rmtree(temporary_directory)
136
137
138if __name__ == '__main__':
139  sys.exit(main(sys.argv[1:]))
140