1#!/usr/bin/env python
2# Copyright 2017 The Chromium Authors. All rights reserved.
3# Use of this source code is governed by a BSD-style license that can be
4# found in the LICENSE file.
5
6"""Find header files missing in GN.
7
8This script gets all the header files from ninja_deps, which is from the true
9dependency generated by the compiler, and report if they don't exist in GN.
10"""
11
12import argparse
13import json
14import os
15import re
16import shutil
17import subprocess
18import sys
19import tempfile
20from multiprocessing import Process, Queue
21
22SRC_DIR = os.path.abspath(
23    os.path.join(os.path.abspath(os.path.dirname(__file__)), os.path.pardir))
24DEPOT_TOOLS_DIR = os.path.join(SRC_DIR, 'third_party', 'depot_tools')
25
26
27def GetHeadersFromNinja(out_dir, skip_obj, q):
28  """Return all the header files from ninja_deps"""
29
30  def NinjaSource():
31    cmd = [os.path.join(DEPOT_TOOLS_DIR, 'ninja'), '-C', out_dir, '-t', 'deps']
32    # A negative bufsize means to use the system default, which usually
33    # means fully buffered.
34    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, bufsize=-1)
35    for line in iter(popen.stdout.readline, ''):
36      yield line.rstrip()
37
38    popen.stdout.close()
39    return_code = popen.wait()
40    if return_code:
41      raise subprocess.CalledProcessError(return_code, cmd)
42
43  ans, err = set(), None
44  try:
45    ans = ParseNinjaDepsOutput(NinjaSource(), out_dir, skip_obj)
46  except Exception as e:
47    err = str(e)
48  q.put((ans, err))
49
50
51def ParseNinjaDepsOutput(ninja_out, out_dir, skip_obj):
52  """Parse ninja output and get the header files"""
53  all_headers = {}
54
55  # Ninja always uses "/", even on Windows.
56  prefix = '../../'
57
58  is_valid = False
59  obj_file = ''
60  for line in ninja_out:
61    if line.startswith('    '):
62      if not is_valid:
63        continue
64      if line.endswith('.h') or line.endswith('.hh'):
65        f = line.strip()
66        if f.startswith(prefix):
67          f = f[6:]  # Remove the '../../' prefix
68          # build/ only contains build-specific files like build_config.h
69          # and buildflag.h, and system header files, so they should be
70          # skipped.
71          if f.startswith(out_dir) or f.startswith('out'):
72            continue
73          if not f.startswith('build'):
74            all_headers.setdefault(f, [])
75            if not skip_obj:
76              all_headers[f].append(obj_file)
77    else:
78      is_valid = line.endswith('(VALID)')
79      obj_file = line.split(':')[0]
80
81  return all_headers
82
83
84def GetHeadersFromGN(out_dir, q):
85  """Return all the header files from GN"""
86
87  tmp = None
88  ans, err = set(), None
89  try:
90    # Argument |dir| is needed to make sure it's on the same drive on Windows.
91    # dir='' means dir='.', but doesn't introduce an unneeded prefix.
92    tmp = tempfile.mkdtemp(dir='')
93    shutil.copy2(os.path.join(out_dir, 'args.gn'),
94                 os.path.join(tmp, 'args.gn'))
95    # Do "gn gen" in a temp dir to prevent dirtying |out_dir|.
96    gn_exe = 'gn.bat' if sys.platform == 'win32' else 'gn'
97    subprocess.check_call([
98        os.path.join(DEPOT_TOOLS_DIR, gn_exe), 'gen', tmp, '--ide=json', '-q'])
99    gn_json = json.load(open(os.path.join(tmp, 'project.json')))
100    ans = ParseGNProjectJSON(gn_json, out_dir, tmp)
101  except Exception as e:
102    err = str(e)
103  finally:
104    if tmp:
105      shutil.rmtree(tmp)
106  q.put((ans, err))
107
108
109def ParseGNProjectJSON(gn, out_dir, tmp_out):
110  """Parse GN output and get the header files"""
111  all_headers = set()
112
113  for _target, properties in gn['targets'].iteritems():
114    sources = properties.get('sources', [])
115    public = properties.get('public', [])
116    # Exclude '"public": "*"'.
117    if type(public) is list:
118      sources += public
119    for f in sources:
120      if f.endswith('.h') or f.endswith('.hh'):
121        if f.startswith('//'):
122          f = f[2:]  # Strip the '//' prefix.
123          if f.startswith(tmp_out):
124            f = out_dir + f[len(tmp_out):]
125          all_headers.add(f)
126
127  return all_headers
128
129
130def GetDepsPrefixes(q):
131  """Return all the folders controlled by DEPS file"""
132  prefixes, err = set(), None
133  try:
134    gclient_exe = 'gclient.bat' if sys.platform == 'win32' else 'gclient'
135    gclient_out = subprocess.check_output([
136        os.path.join(DEPOT_TOOLS_DIR, gclient_exe),
137        'recurse', '--no-progress', '-j1',
138        'python', '-c', 'import os;print os.environ["GCLIENT_DEP_PATH"]'],
139        universal_newlines=True)
140    for i in gclient_out.split('\n'):
141      if i.startswith('src/'):
142        i = i[4:]
143        prefixes.add(i)
144  except Exception as e:
145    err = str(e)
146  q.put((prefixes, err))
147
148
149def IsBuildClean(out_dir):
150  cmd = [os.path.join(DEPOT_TOOLS_DIR, 'ninja'), '-C', out_dir, '-n']
151  try:
152    out = subprocess.check_output(cmd)
153    return 'no work to do.' in out
154  except Exception as e:
155    print e
156    return False
157
158def ParseWhiteList(whitelist):
159  out = set()
160  for line in whitelist.split('\n'):
161    line = re.sub(r'#.*', '', line).strip()
162    if line:
163      out.add(line)
164  return out
165
166
167def FilterOutDepsedRepo(files, deps):
168  return {f for f in files if not any(f.startswith(d) for d in deps)}
169
170
171def GetNonExistingFiles(lst):
172  out = set()
173  for f in lst:
174    if not os.path.isfile(f):
175      out.add(f)
176  return out
177
178
179def main():
180
181  def DumpJson(data):
182    if args.json:
183      with open(args.json, 'w') as f:
184        json.dump(data, f)
185
186  def PrintError(msg):
187    DumpJson([])
188    parser.error(msg)
189
190  parser = argparse.ArgumentParser(description='''
191      NOTE: Use ninja to build all targets in OUT_DIR before running
192      this script.''')
193  parser.add_argument('--out-dir', metavar='OUT_DIR', default='out/Release',
194                      help='output directory of the build')
195  parser.add_argument('--json',
196                      help='JSON output filename for missing headers')
197  parser.add_argument('--whitelist', help='file containing whitelist')
198  parser.add_argument('--skip-dirty-check', action='store_true',
199                      help='skip checking whether the build is dirty')
200  parser.add_argument('--verbose', action='store_true',
201                      help='print more diagnostic info')
202
203  args, _extras = parser.parse_known_args()
204
205  if not os.path.isdir(args.out_dir):
206    parser.error('OUT_DIR "%s" does not exist.' % args.out_dir)
207
208  if not args.skip_dirty_check and not IsBuildClean(args.out_dir):
209    dirty_msg = 'OUT_DIR looks dirty. You need to build all there.'
210    if args.json:
211      # Assume running on the bots. Silently skip this step.
212      # This is possible because "analyze" step can be wrong due to
213      # underspecified header files. See crbug.com/725877
214      print dirty_msg
215      DumpJson([])
216      return 0
217    else:
218      # Assume running interactively.
219      parser.error(dirty_msg)
220
221  d_q = Queue()
222  d_p = Process(target=GetHeadersFromNinja, args=(args.out_dir, True, d_q,))
223  d_p.start()
224
225  gn_q = Queue()
226  gn_p = Process(target=GetHeadersFromGN, args=(args.out_dir, gn_q,))
227  gn_p.start()
228
229  deps_q = Queue()
230  deps_p = Process(target=GetDepsPrefixes, args=(deps_q,))
231  deps_p.start()
232
233  d, d_err = d_q.get()
234  gn, gn_err = gn_q.get()
235  missing = set(d.keys()) - gn
236  nonexisting = GetNonExistingFiles(gn)
237
238  deps, deps_err = deps_q.get()
239  missing = FilterOutDepsedRepo(missing, deps)
240  nonexisting = FilterOutDepsedRepo(nonexisting, deps)
241
242  d_p.join()
243  gn_p.join()
244  deps_p.join()
245
246  if d_err:
247    PrintError(d_err)
248  if gn_err:
249    PrintError(gn_err)
250  if deps_err:
251    PrintError(deps_err)
252  if len(GetNonExistingFiles(d)) > 0:
253    print 'Non-existing files in ninja deps:', GetNonExistingFiles(d)
254    PrintError('Found non-existing files in ninja deps. You should ' +
255               'build all in OUT_DIR.')
256  if len(d) == 0:
257    PrintError('OUT_DIR looks empty. You should build all there.')
258  if any((('/gen/' in i) for i in nonexisting)):
259    PrintError('OUT_DIR looks wrong. You should build all there.')
260
261  if args.whitelist:
262    whitelist = ParseWhiteList(open(args.whitelist).read())
263    missing -= whitelist
264    nonexisting -= whitelist
265
266  missing = sorted(missing)
267  nonexisting = sorted(nonexisting)
268
269  DumpJson(sorted(missing + nonexisting))
270
271  if len(missing) == 0 and len(nonexisting) == 0:
272    return 0
273
274  if len(missing) > 0:
275    print '\nThe following files should be included in gn files:'
276    for i in missing:
277      print i
278
279  if len(nonexisting) > 0:
280    print '\nThe following non-existing files should be removed from gn files:'
281    for i in nonexisting:
282      print i
283
284  if args.verbose:
285    # Only get detailed obj dependency here since it is slower.
286    GetHeadersFromNinja(args.out_dir, False, d_q)
287    d, d_err = d_q.get()
288    print '\nDetailed dependency info:'
289    for f in missing:
290      print f
291      for cc in d[f]:
292        print '  ', cc
293
294    print '\nMissing headers sorted by number of affected object files:'
295    count = {k: len(v) for (k, v) in d.iteritems()}
296    for f in sorted(count, key=count.get, reverse=True):
297      if f in missing:
298        print count[f], f
299
300  return 1
301
302
303if __name__ == '__main__':
304  sys.exit(main())
305