1#!/usr/bin/env python3
2"""
3This script:
4- Builds clang with user-defined flags
5- Uses that clang to build an instrumented clang, which can be used to collect
6  PGO samples
7- Builds a user-defined set of sources (default: clang) to act as a
8  "benchmark" to generate a PGO profile
9- Builds clang once more with the PGO profile generated above
10
11This is a total of four clean builds of clang (by default). This may take a
12while. :)
13
14This scripts duplicates https://llvm.org/docs/AdvancedBuilds.html#multi-stage-pgo
15Eventually, it will be updated to instead call the cmake cache mentioned there.
16"""
17
18import argparse
19import collections
20import multiprocessing
21import os
22import shlex
23import shutil
24import subprocess
25import sys
26
27### User configuration
28
29
30# If you want to use a different 'benchmark' than building clang, make this
31# function do what you want. out_dir is the build directory for clang, so all
32# of the clang binaries will live under "${out_dir}/bin/". Using clang in
33# ${out_dir} will magically have the profiles go to the right place.
34#
35# You may assume that out_dir is a freshly-built directory that you can reach
36# in to build more things, if you'd like.
37def _run_benchmark(env, out_dir, include_debug_info):
38    """The 'benchmark' we run to generate profile data."""
39    target_dir = env.output_subdir('instrumentation_run')
40
41    # `check-llvm` and `check-clang` are cheap ways to increase coverage. The
42    # former lets us touch on the non-x86 backends a bit if configured, and the
43    # latter gives us more C to chew on (and will send us through diagnostic
44    # paths a fair amount, though the `if (stuff_is_broken) { diag() ... }`
45    # branches should still heavily be weighted in the not-taken direction,
46    # since we built all of LLVM/etc).
47    _build_things_in(env, out_dir, what=['check-llvm', 'check-clang'])
48
49    # Building tblgen gets us coverage; don't skip it. (out_dir may also not
50    # have them anyway, but that's less of an issue)
51    cmake = _get_cmake_invocation_for_bootstrap_from(
52        env, out_dir, skip_tablegens=False)
53
54    if include_debug_info:
55        cmake.add_flag('CMAKE_BUILD_TYPE', 'RelWithDebInfo')
56
57    _run_fresh_cmake(env, cmake, target_dir)
58
59    # Just build all the things. The more data we have, the better.
60    _build_things_in(env, target_dir, what=['all'])
61
62### Script
63
64
65class CmakeInvocation:
66    _cflags = ['CMAKE_C_FLAGS', 'CMAKE_CXX_FLAGS']
67    _ldflags = [
68        'CMAKE_EXE_LINKER_FLAGS',
69        'CMAKE_MODULE_LINKER_FLAGS',
70        'CMAKE_SHARED_LINKER_FLAGS',
71    ]
72
73    def __init__(self, cmake, maker, cmake_dir):
74        self._prefix = [cmake, '-G', maker, cmake_dir]
75
76        # Map of str -> (list|str).
77        self._flags = {}
78        for flag in CmakeInvocation._cflags + CmakeInvocation._ldflags:
79            self._flags[flag] = []
80
81    def add_new_flag(self, key, value):
82        self.add_flag(key, value, allow_overwrites=False)
83
84    def add_flag(self, key, value, allow_overwrites=True):
85        if key not in self._flags:
86            self._flags[key] = value
87            return
88
89        existing_value = self._flags[key]
90        if isinstance(existing_value, list):
91            existing_value.append(value)
92            return
93
94        if not allow_overwrites:
95            raise ValueError('Invalid overwrite of %s requested' % key)
96
97        self._flags[key] = value
98
99    def add_cflags(self, flags):
100        # No, I didn't intend to append ['-', 'O', '2'] to my flags, thanks :)
101        assert not isinstance(flags, str)
102        for f in CmakeInvocation._cflags:
103            self._flags[f].extend(flags)
104
105    def add_ldflags(self, flags):
106        assert not isinstance(flags, str)
107        for f in CmakeInvocation._ldflags:
108            self._flags[f].extend(flags)
109
110    def to_args(self):
111        args = self._prefix.copy()
112        for key, value in sorted(self._flags.items()):
113            if isinstance(value, list):
114                # We preload all of the list-y values (cflags, ...). If we've
115                # nothing to add, don't.
116                if not value:
117                    continue
118                value = ' '.join(value)
119
120            arg = '-D' + key
121            if value != '':
122                arg += '=' + value
123            args.append(arg)
124        return args
125
126
127class Env:
128    def __init__(self, llvm_dir, use_make, output_dir, default_cmake_args,
129                 dry_run):
130        self.llvm_dir = llvm_dir
131        self.use_make = use_make
132        self.output_dir = output_dir
133        self.default_cmake_args = default_cmake_args.copy()
134        self.dry_run = dry_run
135
136    def get_default_cmake_args_kv(self):
137        return self.default_cmake_args.items()
138
139    def get_cmake_maker(self):
140        return 'Ninja' if not self.use_make else 'Unix Makefiles'
141
142    def get_make_command(self):
143        if self.use_make:
144            return ['make', '-j{}'.format(multiprocessing.cpu_count())]
145        return ['ninja']
146
147    def output_subdir(self, name):
148        return os.path.join(self.output_dir, name)
149
150    def has_llvm_subproject(self, name):
151        if name == 'compiler-rt':
152            subdir = '../compiler-rt'
153        elif name == 'clang':
154            subdir = '../clang'
155        else:
156            raise ValueError('Unknown subproject: %s' % name)
157
158        return os.path.isdir(os.path.join(self.llvm_dir, subdir))
159
160    # Note that we don't allow capturing stdout/stderr. This works quite nicely
161    # with dry_run.
162    def run_command(self,
163                    cmd,
164                    cwd=None,
165                    check=False,
166                    silent_unless_error=False):
167        print(
168            'Running `%s` in %s' % (cmd, shlex.quote(cwd or os.getcwd())))
169
170        if self.dry_run:
171            return
172
173        if silent_unless_error:
174            stdout, stderr = subprocess.PIPE, subprocess.STDOUT
175        else:
176            stdout, stderr = None, None
177
178        # Don't use subprocess.run because it's >= py3.5 only, and it's not too
179        # much extra effort to get what it gives us anyway.
180        popen = subprocess.Popen(
181            cmd,
182            stdin=subprocess.DEVNULL,
183            stdout=stdout,
184            stderr=stderr,
185            cwd=cwd)
186        stdout, _ = popen.communicate()
187        return_code = popen.wait(timeout=0)
188
189        if not return_code:
190            return
191
192        if silent_unless_error:
193            print(stdout.decode('utf-8', 'ignore'))
194
195        if check:
196            raise subprocess.CalledProcessError(
197                returncode=return_code, cmd=cmd, output=stdout, stderr=None)
198
199
200def _get_default_cmake_invocation(env):
201    inv = CmakeInvocation(
202        cmake='cmake', maker=env.get_cmake_maker(), cmake_dir=env.llvm_dir)
203    for key, value in env.get_default_cmake_args_kv():
204        inv.add_new_flag(key, value)
205    return inv
206
207
208def _get_cmake_invocation_for_bootstrap_from(env, out_dir,
209                                             skip_tablegens=True):
210    clang = os.path.join(out_dir, 'bin', 'clang')
211    cmake = _get_default_cmake_invocation(env)
212    cmake.add_new_flag('CMAKE_C_COMPILER', clang)
213    cmake.add_new_flag('CMAKE_CXX_COMPILER', clang + '++')
214
215    # We often get no value out of building new tblgens; the previous build
216    # should have them. It's still correct to build them, just slower.
217    def add_tablegen(key, binary):
218        path = os.path.join(out_dir, 'bin', binary)
219
220        # Check that this exists, since the user's allowed to specify their own
221        # stage1 directory (which is generally where we'll source everything
222        # from). Dry runs should hope for the best from our user, as well.
223        if env.dry_run or os.path.exists(path):
224            cmake.add_new_flag(key, path)
225
226    if skip_tablegens:
227        add_tablegen('LLVM_TABLEGEN', 'llvm-tblgen')
228        add_tablegen('CLANG_TABLEGEN', 'clang-tblgen')
229
230    return cmake
231
232
233def _build_things_in(env, target_dir, what):
234    cmd = env.get_make_command() + what
235    env.run_command(cmd, cwd=target_dir, check=True)
236
237
238def _run_fresh_cmake(env, cmake, target_dir):
239    if not env.dry_run:
240        try:
241            shutil.rmtree(target_dir)
242        except FileNotFoundError:
243            pass
244
245        os.makedirs(target_dir, mode=0o755)
246
247    cmake_args = cmake.to_args()
248    env.run_command(
249        cmake_args, cwd=target_dir, check=True, silent_unless_error=True)
250
251
252def _build_stage1_clang(env):
253    target_dir = env.output_subdir('stage1')
254    cmake = _get_default_cmake_invocation(env)
255    _run_fresh_cmake(env, cmake, target_dir)
256    _build_things_in(env, target_dir, what=['clang', 'llvm-profdata', 'profile'])
257    return target_dir
258
259
260def _generate_instrumented_clang_profile(env, stage1_dir, profile_dir,
261                                         output_file):
262    llvm_profdata = os.path.join(stage1_dir, 'bin', 'llvm-profdata')
263    if env.dry_run:
264        profiles = [os.path.join(profile_dir, '*.profraw')]
265    else:
266        profiles = [
267            os.path.join(profile_dir, f) for f in os.listdir(profile_dir)
268            if f.endswith('.profraw')
269        ]
270    cmd = [llvm_profdata, 'merge', '-output=' + output_file] + profiles
271    env.run_command(cmd, check=True)
272
273
274def _build_instrumented_clang(env, stage1_dir):
275    assert os.path.isabs(stage1_dir)
276
277    target_dir = os.path.join(env.output_dir, 'instrumented')
278    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
279    cmake.add_new_flag('LLVM_BUILD_INSTRUMENTED', 'IR')
280
281    # libcxx's configure step messes with our link order: we'll link
282    # libclang_rt.profile after libgcc, and the former requires atexit from the
283    # latter. So, configure checks fail.
284    #
285    # Since we don't need libcxx or compiler-rt anyway, just disable them.
286    cmake.add_new_flag('LLVM_BUILD_RUNTIME', 'No')
287
288    _run_fresh_cmake(env, cmake, target_dir)
289    _build_things_in(env, target_dir, what=['clang', 'lld'])
290
291    profiles_dir = os.path.join(target_dir, 'profiles')
292    return target_dir, profiles_dir
293
294
295def _build_optimized_clang(env, stage1_dir, profdata_file):
296    if not env.dry_run and not os.path.exists(profdata_file):
297        raise ValueError('Looks like the profdata file at %s doesn\'t exist' %
298                         profdata_file)
299
300    target_dir = os.path.join(env.output_dir, 'optimized')
301    cmake = _get_cmake_invocation_for_bootstrap_from(env, stage1_dir)
302    cmake.add_new_flag('LLVM_PROFDATA_FILE', os.path.abspath(profdata_file))
303
304    # We'll get complaints about hash mismatches in `main` in tools/etc. Ignore
305    # it.
306    cmake.add_cflags(['-Wno-backend-plugin'])
307    _run_fresh_cmake(env, cmake, target_dir)
308    _build_things_in(env, target_dir, what=['clang'])
309    return target_dir
310
311
312Args = collections.namedtuple('Args', [
313    'do_optimized_build',
314    'include_debug_info',
315    'profile_location',
316    'stage1_dir',
317])
318
319
320def _parse_args():
321    parser = argparse.ArgumentParser(
322        description='Builds LLVM and Clang with instrumentation, collects '
323        'instrumentation profiles for them, and (optionally) builds things'
324        'with these PGO profiles. By default, it\'s assumed that you\'re '
325        'running this from your LLVM root, and all build artifacts will be '
326        'saved to $PWD/out.')
327    parser.add_argument(
328        '--cmake-extra-arg',
329        action='append',
330        default=[],
331        help='an extra arg to pass to all cmake invocations. Note that this '
332        'is interpreted as a -D argument, e.g. --cmake-extra-arg FOO=BAR will '
333        'be passed as -DFOO=BAR. This may be specified multiple times.')
334    parser.add_argument(
335        '--dry-run',
336        action='store_true',
337        help='print commands instead of running them')
338    parser.add_argument(
339        '--llvm-dir',
340        default='.',
341        help='directory containing an LLVM checkout (default: $PWD)')
342    parser.add_argument(
343        '--no-optimized-build',
344        action='store_true',
345        help='disable the final, PGO-optimized build')
346    parser.add_argument(
347        '--out-dir',
348        help='directory to write artifacts to (default: $llvm_dir/out)')
349    parser.add_argument(
350        '--profile-output',
351        help='where to output the profile (default is $out/pgo_profile.prof)')
352    parser.add_argument(
353        '--stage1-dir',
354        help='instead of having an initial build of everything, use the given '
355        'directory. It is expected that this directory will have clang, '
356        'llvm-profdata, and the appropriate libclang_rt.profile already built')
357    parser.add_argument(
358        '--use-debug-info-in-benchmark',
359        action='store_true',
360        help='use a regular build instead of RelWithDebInfo in the benchmark. '
361        'This increases benchmark execution time and disk space requirements, '
362        'but gives more coverage over debuginfo bits in LLVM and clang.')
363    parser.add_argument(
364        '--use-make',
365        action='store_true',
366        default=shutil.which('ninja') is None,
367        help='use Makefiles instead of ninja')
368
369    args = parser.parse_args()
370
371    llvm_dir = os.path.abspath(args.llvm_dir)
372    if args.out_dir is None:
373        output_dir = os.path.join(llvm_dir, 'out')
374    else:
375        output_dir = os.path.abspath(args.out_dir)
376
377    extra_args = {'CMAKE_BUILD_TYPE': 'Release',
378                  'LLVM_ENABLE_PROJECTS': 'clang;compiler-rt;lld'}
379    for arg in args.cmake_extra_arg:
380        if arg.startswith('-D'):
381            arg = arg[2:]
382        elif arg.startswith('-'):
383            raise ValueError('Unknown not- -D arg encountered; you may need '
384                             'to tweak the source...')
385        split = arg.split('=', 1)
386        if len(split) == 1:
387            key, val = split[0], ''
388        else:
389            key, val = split
390        extra_args[key] = val
391
392    env = Env(
393        default_cmake_args=extra_args,
394        dry_run=args.dry_run,
395        llvm_dir=llvm_dir,
396        output_dir=output_dir,
397        use_make=args.use_make,
398    )
399
400    if args.profile_output is not None:
401        profile_location = args.profile_output
402    else:
403        profile_location = os.path.join(env.output_dir, 'pgo_profile.prof')
404
405    result_args = Args(
406        do_optimized_build=not args.no_optimized_build,
407        include_debug_info=args.use_debug_info_in_benchmark,
408        profile_location=profile_location,
409        stage1_dir=args.stage1_dir,
410    )
411
412    return env, result_args
413
414
415def _looks_like_llvm_dir(directory):
416    """Arbitrary set of heuristics to determine if `directory` is an llvm dir.
417
418    Errs on the side of false-positives."""
419
420    contents = set(os.listdir(directory))
421    expected_contents = [
422        'CODE_OWNERS.TXT',
423        'cmake',
424        'docs',
425        'include',
426        'utils',
427    ]
428
429    if not all(c in contents for c in expected_contents):
430        return False
431
432    try:
433        include_listing = os.listdir(os.path.join(directory, 'include'))
434    except NotADirectoryError:
435        return False
436
437    return 'llvm' in include_listing
438
439
440def _die(*args, **kwargs):
441    kwargs['file'] = sys.stderr
442    print(*args, **kwargs)
443    sys.exit(1)
444
445
446def _main():
447    env, args = _parse_args()
448
449    if not _looks_like_llvm_dir(env.llvm_dir):
450        _die('Looks like %s isn\'t an LLVM directory; please see --help' %
451             env.llvm_dir)
452    if not env.has_llvm_subproject('clang'):
453        _die('Need a clang checkout at tools/clang')
454    if not env.has_llvm_subproject('compiler-rt'):
455        _die('Need a compiler-rt checkout at projects/compiler-rt')
456
457    def status(*args):
458        print(*args, file=sys.stderr)
459
460    if args.stage1_dir is None:
461        status('*** Building stage1 clang...')
462        stage1_out = _build_stage1_clang(env)
463    else:
464        stage1_out = args.stage1_dir
465
466    status('*** Building instrumented clang...')
467    instrumented_out, profile_dir = _build_instrumented_clang(env, stage1_out)
468    status('*** Running profdata benchmarks...')
469    _run_benchmark(env, instrumented_out, args.include_debug_info)
470    status('*** Generating profile...')
471    _generate_instrumented_clang_profile(env, stage1_out, profile_dir,
472                                         args.profile_location)
473
474    print('Final profile:', args.profile_location)
475    if args.do_optimized_build:
476        status('*** Building PGO-optimized binaries...')
477        optimized_out = _build_optimized_clang(env, stage1_out,
478                                               args.profile_location)
479        print('Final build directory:', optimized_out)
480
481
482if __name__ == '__main__':
483    _main()
484