1#!/usr/bin/env python
2
3"""Helps to keep BUILD.gn files in sync with the corresponding CMakeLists.txt.
4
5For each BUILD.gn file in the tree, checks if the list of cpp files in
6it is identical to the list of cpp files in the corresponding CMakeLists.txt
7file, and prints the difference if not.
8
9Also checks that each CMakeLists.txt file below unittests/ folders that define
10binaries have corresponding BUILD.gn files.
11
12If --write is passed, tries to write modified .gn files and adds one git
13commit for each cmake commit this merges. If an error is reported, the state
14of HEAD is unspecified; run `git reset --hard origin/master` if this happens.
15"""
16
17from __future__ import print_function
18
19from collections import defaultdict
20import os
21import re
22import subprocess
23import sys
24
25
26def patch_gn_file(gn_file, add, remove):
27    with open(gn_file) as f:
28        gn_contents = f.read()
29    if add:
30        srcs_tok = 'sources = ['
31        tokloc = gn_contents.find(srcs_tok)
32        while gn_contents.startswith('sources = []', tokloc):
33            tokloc = gn_contents.find(srcs_tok, tokloc + 1)
34        if tokloc == -1: raise ValueError(gn_file + ': No source list')
35        if gn_contents.find(srcs_tok, tokloc + 1) != -1:
36            raise ValueError(gn_file + ': Multiple source lists')
37        if gn_contents.find('# NOSORT', 0, tokloc) != -1:
38            raise ValueError(gn_file + ': Found # NOSORT, needs manual merge')
39        tokloc += len(srcs_tok)
40        for a in add:
41            gn_contents = (gn_contents[:tokloc] + ('"%s",' % a) +
42                           gn_contents[tokloc:])
43    for r in remove:
44        gn_contents = gn_contents.replace('"%s",' % r, '')
45    with open(gn_file, 'w') as f:
46        f.write(gn_contents)
47
48    # Run `gn format`.
49    gn = os.path.join(os.path.dirname(__file__), '..', 'gn.py')
50    subprocess.check_call([sys.executable, gn, 'format', '-q', gn_file])
51
52
53def sync_source_lists(write):
54    # Use shell=True on Windows in case git is a bat file.
55    def git(args): subprocess.check_call(['git'] + args, shell=os.name == 'nt')
56    def git_out(args):
57        return subprocess.check_output(['git'] + args, shell=os.name == 'nt')
58    gn_files = git_out(['ls-files', '*BUILD.gn']).splitlines()
59
60    # Matches e.g. |   "foo.cpp",|, captures |foo| in group 1.
61    gn_cpp_re = re.compile(r'^\s*"([^$"]+\.(?:cpp|c|h|S))",$', re.MULTILINE)
62    # Matches e.g. |   bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1.
63    gn_cpp_re2 = re.compile(
64        r'^\s*(?:.*_)?sources \+?= \[ "([^$"]+\.(?:cpp|c|h|S))" ]$',
65        re.MULTILINE)
66    # Matches e.g. |   foo.cpp|, captures |foo| in group 1.
67    cmake_cpp_re = re.compile(r'^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$',
68                              re.MULTILINE)
69
70    changes_by_rev = defaultdict(lambda: defaultdict(lambda: defaultdict(list)))
71
72    def find_gitrev(touched_line, in_file):
73        # re.escape() escapes e.g. '-', which works in practice but has
74        # undefined behavior according to the POSIX extended regex spec.
75        posix_re_escape = lambda s: re.sub(r'([.[{()\\*+?|^$])', r'\\\1', s)
76        cmd = ['log', '--format=%h', '-1', '--pickaxe-regex',
77               r'-S\b%s\b' % posix_re_escape(touched_line), in_file]
78        return git_out(cmd).rstrip()
79
80    # Collect changes to gn files, grouped by revision.
81    for gn_file in gn_files:
82        # The CMakeLists.txt for llvm/utils/gn/secondary/foo/BUILD.gn is
83        # at foo/CMakeLists.txt.
84        strip_prefix = 'llvm/utils/gn/secondary/'
85        if not gn_file.startswith(strip_prefix):
86            continue
87        cmake_file = os.path.join(
88                os.path.dirname(gn_file[len(strip_prefix):]), 'CMakeLists.txt')
89        if not os.path.exists(cmake_file):
90            continue
91
92        def get_sources(source_re, text):
93            return set([m.group(1) for m in source_re.finditer(text)])
94        gn_cpp = get_sources(gn_cpp_re, open(gn_file).read())
95        gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read())
96        cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read())
97
98        if gn_cpp == cmake_cpp:
99            continue
100
101        def by_rev(files, key):
102            for f in files:
103                rev = find_gitrev(f, cmake_file)
104                changes_by_rev[rev][gn_file][key].append(f)
105        by_rev(sorted(cmake_cpp - gn_cpp), 'add')
106        by_rev(sorted(gn_cpp - cmake_cpp), 'remove')
107
108    # Output necessary changes grouped by revision.
109    for rev in sorted(changes_by_rev):
110        print('[gn build] Port {0} -- https://reviews.llvm.org/rG{0}'
111            .format(rev))
112        for gn_file, data in sorted(changes_by_rev[rev].items()):
113            add = data.get('add', [])
114            remove = data.get('remove', [])
115            if write:
116                patch_gn_file(gn_file, add, remove)
117                git(['add', gn_file])
118            else:
119                print('  ' + gn_file)
120                if add:
121                    print('   add:\n' + '\n'.join('    "%s",' % a for a in add))
122                if remove:
123                    print('   remove:\n    ' + '\n    '.join(remove))
124                print()
125        if write:
126            git(['commit', '-m', '[gn build] Port %s' % rev])
127        else:
128            print()
129
130    return bool(changes_by_rev) and not write
131
132
133def sync_unittests():
134    # Matches e.g. |add_llvm_unittest_with_input_files|.
135    unittest_re = re.compile(r'^add_\S+_unittest', re.MULTILINE)
136
137    checked = [ 'clang', 'clang-tools-extra', 'lld', 'llvm' ]
138    changed = False
139    for c in checked:
140        for root, _, _ in os.walk(os.path.join(c, 'unittests')):
141            cmake_file = os.path.join(root, 'CMakeLists.txt')
142            if not os.path.exists(cmake_file):
143                continue
144            if not unittest_re.search(open(cmake_file).read()):
145                continue  # Skip CMake files that just add subdirectories.
146            gn_file = os.path.join('llvm/utils/gn/secondary', root, 'BUILD.gn')
147            if not os.path.exists(gn_file):
148                changed = True
149                print('missing GN file %s for unittest CMake file %s' %
150                      (gn_file, cmake_file))
151    return changed
152
153
154def main():
155    src = sync_source_lists(len(sys.argv) > 1 and sys.argv[1] == '--write')
156    tests = sync_unittests()
157    if src or tests:
158        sys.exit(1)
159
160
161if __name__ == '__main__':
162    main()
163