1#!/usr/bin/env python2
2
3import argparse
4import os
5import pipes
6import re
7import sys
8
9from utils import FindBaseNaCl, GetObjcopyCmd, get_sfi_string, shellcmd
10
11def NewerThanOrNotThere(old_path, new_path):
12    """Returns whether old_path is newer than new_path.
13
14    Also returns true if either path doesn't exist.
15    """
16    if not (os.path.exists(old_path) and os.path.exists(new_path)):
17        return True
18    return os.path.getmtime(old_path) > os.path.getmtime(new_path)
19
20def BuildRegex(patterns, syms):
21    """Build a regular expression string for inclusion or exclusion.
22
23    Creates a regex string from an array of patterns and an array
24    of symbol names.  Each element in the patterns array is either a
25    regex, or a range of entries in the symbol name array, e.g. '2:9'.
26    """
27    pattern_list = []
28    for pattern in patterns:
29        if pattern[0].isdigit() or pattern[0] == ':':
30            # Legitimate symbols or regexes shouldn't start with a
31            # digit or a ':', so interpret the pattern as a range.
32            interval = pattern.split(':')
33            if len(interval) == 1:
34                # Treat singleton 'n' as 'n:n+1'.
35                lower = int(interval[0])
36                upper = lower + 1
37            elif len(interval) == 2:
38                # Handle 'a:b', 'a:', and ':b' with suitable defaults.
39                lower = int(interval[0]) if len(interval[0]) else 0
40                upper = int(interval[1]) if len(interval[1]) else len(syms)
41            else:
42                print 'Invalid range syntax: {p}'.format(p=pattern)
43                exit(1)
44            pattern = '$|^'.join([re.escape(p) for p in syms[lower:upper]])
45        pattern_list.append('^' + pattern + '$')
46    return '|'.join(pattern_list) if len(pattern_list) else '^$'
47
48def MatchSymbol(sym, re_include, re_exclude, default_match):
49    """Match a symbol name against inclusion/exclusion rules.
50
51    Returns True or False depending on whether the given symbol
52    matches the compiled include or exclude regexes.  The default is
53    returned if neither the include nor the exclude regex matches.
54    """
55    if re_exclude.match(sym):
56        # Always honor an explicit exclude before considering
57        # includes.
58        return False
59    if re_include.match(sym):
60        return True
61    return default_match
62
63def AddOptionalArgs(argparser):
64    argparser.add_argument('--force', dest='force', type=int, choices=[0, 1],
65                           default=1,
66                           help='Force all re-translations of the pexe.' +
67                                ' Default %(default)s.')
68    argparser.add_argument('--include', '-i', default=[], dest='include',
69                           action='append',
70                           help='Subzero symbols to include ' +
71                                '(regex or line range)')
72    argparser.add_argument('--exclude', '-e', default=[], dest='exclude',
73                           action='append',
74                           help='Subzero symbols to exclude ' +
75                                '(regex or line range)')
76    argparser.add_argument('--output', '-o', default='a.out', dest='output',
77                           action='store',
78                           help='Output executable. Default %(default)s.')
79    argparser.add_argument('-O', default='2', dest='optlevel',
80                           choices=['m1', '-1', '0', '1', '2'],
81                           help='Optimization level ' +
82                                '(m1 and -1 are equivalent).' +
83                                ' Default %(default)s.')
84    argparser.add_argument('--filetype', default='iasm', dest='filetype',
85                           choices=['obj', 'asm', 'iasm'],
86                           help='Output file type.  Default %(default)s.')
87    argparser.add_argument('--sandbox', dest='sandbox', action='store_true',
88                           help='Enable sandboxing in the translator')
89    argparser.add_argument('--nonsfi', dest='nonsfi', action='store_true',
90                           help='Enable Non-SFI in the translator')
91    argparser.add_argument('--enable-block-profile',
92                           dest='enable_block_profile', action='store_true',
93                           help='Enable basic block profiling.')
94    argparser.add_argument('--target', default='x8632', dest='target',
95                           choices=['arm32', 'x8632', 'x8664'],
96                           help='Generate code for specified target.')
97    argparser.add_argument('--verbose', '-v', dest='verbose',
98                           action='store_true',
99                           help='Display some extra debugging output')
100    argparser.add_argument('--sz', dest='sz_args', action='append', default=[],
101                           help='Extra arguments for Subzero')
102    argparser.add_argument('--llc', dest='llc_args', action='append',
103                           default=[], help='Extra arguments for llc')
104    argparser.add_argument('--no-sz', dest='nosz', action='store_true',
105                           help='Run only post-Subzero build steps')
106    argparser.add_argument('--fsanitize-address', dest='asan',
107                           action='store_true',
108                           help='Instrument with AddressSanitizer')
109
110def LinkSandbox(objs, exe, target, verbose=True):
111    assert target in ('x8632', 'x8664', 'arm32'), \
112        '-sandbox is not available for %s' % target
113    nacl_root = FindBaseNaCl()
114    gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
115            'le32-nacl-ld.gold').format(root=nacl_root)
116    target_lib_dir = {
117      'arm32': 'arm',
118      'x8632': 'x86-32',
119      'x8664': 'x86-64',
120    }[target]
121    linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
122               '{target_dir}/lib').format(root=nacl_root,
123                                          target_dir=target_lib_dir)
124    shellcmd([gold,
125              '-nostdlib',
126              '--no-fix-cortex-a8',
127              '--eh-frame-hdr',
128              '-z', 'text',
129              #'-z', 'noexecstack',
130              '--build-id',
131              '--entry=__pnacl_start',
132              '-static', #'-pie',
133              '{linklib}/crtbegin.o'.format(linklib=linklib)] +
134             objs +
135             [('{root}/toolchain_build/src/subzero/build/runtime/' +
136               'szrt_sb_{target}.o').format(root=nacl_root, target=target),
137              '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
138              '--start-group',
139              '{linklib}/libgcc.a'.format(linklib=linklib),
140              '{linklib}/libcrt_platform.a'.format(linklib=linklib),
141              '--end-group',
142              '{linklib}/crtend.o'.format(linklib=linklib),
143              '--undefined=_start',
144              '--defsym=__Sz_AbsoluteZero=0',
145              #'--defsym=_begin=0',
146              '-o', exe
147             ], echo=verbose)
148
149def LinkNonsfi(objs, exe, target, verbose=True):
150    nacl_root = FindBaseNaCl()
151    gold = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/bin/' +
152            'le32-nacl-ld.gold').format(root=nacl_root)
153    target_lib_dir = {
154      'arm32': 'arm-nonsfi',
155      'x8632': 'x86-32-nonsfi',
156    }[target]
157    linklib = ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
158               '{target_dir}/lib').format(root=nacl_root,
159                                          target_dir=target_lib_dir)
160    shellcmd([gold,
161              '-nostdlib',
162              '--no-fix-cortex-a8',
163              '--eh-frame-hdr',
164              '-z', 'text',
165              '-z', 'noexecstack',
166              '--build-id',
167              '--entry=__pnacl_start',
168              '-pie',
169              '{linklib}/crtbegin.o'.format(linklib=linklib)] +
170             objs +
171             [('{root}/toolchain_build/src/subzero/build/runtime/' +
172               'szrt_nonsfi_{target}.o').format(root=nacl_root, target=target),
173              '{linklib}/libpnacl_irt_shim_dummy.a'.format(linklib=linklib),
174              '--start-group',
175              '{linklib}/libgcc.a'.format(linklib=linklib),
176              '{linklib}/libcrt_platform.a'.format(linklib=linklib),
177              '--end-group',
178              '{linklib}/crtend.o'.format(linklib=linklib),
179              '--undefined=_start',
180              '--defsym=__Sz_AbsoluteZero=0',
181              '--defsym=_begin=0',
182              '-o', exe
183             ], echo=verbose)
184
185def LinkNative(objs, exe, target, verbose=True):
186    nacl_root = FindBaseNaCl()
187    linker = {
188      'arm32': '/usr/bin/arm-linux-gnueabihf-g++',
189      'mips32': '/usr/bin/mipsel-linux-gnu-g++',
190      'x8632': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
191               ).format(root=nacl_root),
192      'x8664': ('{root}/../third_party/llvm-build/Release+Asserts/bin/clang'
193               ).format(root=nacl_root)
194    }[target]
195
196    extra_linker_args = {
197      'arm32': ['-mcpu=cortex-a9'],
198      'x8632': ['-m32'],
199      'x8664': ['-mx32']
200    }[target]
201
202    lib_dir = {
203      'arm32': 'arm-linux',
204      'x8632': 'x86-32-linux',
205      'x8664': 'x86-64-linux',
206    }[target]
207
208    shellcmd([linker] +
209             extra_linker_args +
210             objs +
211             ['-o', exe,
212              ('{root}/toolchain/linux_x86/pnacl_newlib_raw/translator/' +
213               '{lib_dir}/lib/' +
214               '{{unsandboxed_irt,irt_random,irt_query_list}}.o').format(
215                   root=nacl_root, lib_dir=lib_dir),
216              ('{root}/toolchain_build/src/subzero/build/runtime/' +
217               'szrt_native_{target}.o').format(root=nacl_root, target=target),
218              '-lm', '-lpthread', '-lrt',
219              '-Wl,--defsym=__Sz_AbsoluteZero=0'
220             ], echo=verbose)
221
222def main():
223    """Create a hybrid translation from Subzero and llc.
224
225    Takes a finalized pexe and builds a native executable as a hybrid of Subzero
226    and llc translated bitcode.  Linker tricks are used to determine whether
227    Subzero or llc generated symbols are used, on a per-symbol basis.
228
229    By default, for every symbol, its Subzero version is used.  Subzero and llc
230    symbols can be selectively enabled/disabled via regular expressions on the
231    symbol name, or by ranges of lines in this program's auto-generated symbol
232    file.
233
234    For each symbol, the --exclude arguments are first checked (the symbol is
235    'rejected' on a match), followed by the --include arguments (the symbol is
236    'accepted' on a match), followed by unconditional 'rejection'.  The Subzero
237    version is used for an 'accepted' symbol, and the llc version is used for a
238    'rejected' symbol.
239
240    Each --include and --exclude argument can be a regular expression or a range
241    of lines in the symbol file.  Each regular expression is wrapped inside
242    '^$', so if you want a substring match on 'foo', use '.*foo.*' instead.
243    Ranges use python-style 'first:last' notation, so e.g. use '0:10' or ':10'
244    for the first 10 lines of the file, or '1' for the second line of the file.
245
246    If no --include or --exclude arguments are given, the executable is produced
247    entirely using Subzero, without using llc or linker tricks.
248
249    When using the --force=0 option, this script uses file modification
250    timestamps to determine whether llc and Subzero re-translation are needed.
251    It checks timestamps of llc, pnacl-sz, and the pexe against the translated
252    object files to determine the minimal work necessary.  The --force=1 option
253    (default) suppresses those checks and re-translates everything.
254
255    This script expects various PNaCl and LLVM tools to be found within the
256    native_client tree.  When changes are made to these tools, copy them this
257    way:
258      cd native_client
259      toolchain_build/toolchain_build_pnacl.py llvm_x86_64_linux \\
260      --install=toolchain/linux_x86/pnacl_newlib_raw
261    """
262    argparser = argparse.ArgumentParser(
263        description='    ' + main.__doc__,
264        formatter_class=argparse.RawTextHelpFormatter)
265    AddOptionalArgs(argparser)
266    argparser.add_argument('pexe', help='Finalized pexe to translate')
267    args = argparser.parse_args()
268    pexe = args.pexe
269    exe = args.output
270    ProcessPexe(args, pexe, exe)
271
272def ProcessPexe(args, pexe, exe):
273    [pexe_base, ext] = os.path.splitext(pexe)
274    if ext != '.pexe':
275        pexe_base = pexe
276    pexe_base_unescaped = pexe_base
277    pexe_base = pipes.quote(pexe_base)
278    pexe = pipes.quote(pexe)
279
280    nacl_root = FindBaseNaCl()
281    path_addition = (
282        '{root}/toolchain/linux_x86/pnacl_newlib_raw/bin'
283        ).format(root=nacl_root)
284    obj_llc = pexe_base + '.llc.o'
285    obj_sz = pexe_base + '.sz.o'
286    asm_sz = pexe_base + '.sz.s'
287    obj_llc_weak = pexe_base + '.weak.llc.o'
288    obj_sz_weak = pexe_base + '.weak.sz.o'
289    obj_partial = obj_sz  # overridden for hybrid mode
290    sym_llc = pexe_base + '.sym.llc.txt'
291    sym_sz = pexe_base + '.sym.sz.txt'
292    sym_sz_unescaped = pexe_base_unescaped + '.sym.sz.txt'
293    whitelist_sz = pexe_base + '.wl.sz.txt'
294    whitelist_sz_unescaped = pexe_base_unescaped + '.wl.sz.txt'
295    pnacl_sz = (
296        '{root}/toolchain_build/src/subzero/pnacl-sz'
297        ).format(root=nacl_root)
298    llcbin = '{base}/pnacl-llc'.format(base=path_addition)
299    gold = '{base}/le32-nacl-ld.gold'.format(base=path_addition)
300    objcopy = '{base}/{objcopy}'.format(base=path_addition,
301                                        objcopy=GetObjcopyCmd(args.target))
302    opt_level = args.optlevel
303    opt_level_map = { 'm1':'0', '-1':'0', '0':'0', '1':'1', '2':'2' }
304    hybrid = args.include or args.exclude
305    native = not args.sandbox and not args.nonsfi
306    if args.asan:
307        if args.sandbox or args.nonsfi:
308            print 'Can only use AddressSanitizer with a native build'
309            exit(1)
310        if '-fsanitize-address' not in args.sz_args:
311          args.sz_args.append('-fsanitize-address')
312
313    if hybrid and (args.force or
314                   NewerThanOrNotThere(pexe, obj_llc) or
315                   NewerThanOrNotThere(llcbin, obj_llc)):
316        arch = {
317          'arm32': 'arm' + get_sfi_string(args, 'v7', '-nonsfi', '-nonsfi'),
318          'x8632': 'x86-32' + get_sfi_string(args, '', '-nonsfi', '-linux'),
319          'x8664': 'x86-64' + get_sfi_string(args, '', '', '-linux')
320        }[args.target]
321
322        # Only run pnacl-translate in hybrid mode.
323        shellcmd(['{base}/pnacl-translate'.format(base=path_addition),
324                  '-split-module=1',
325                  '-ffunction-sections',
326                  '-fdata-sections',
327                  '-c',
328                  '-arch',  arch,
329                  '-O' + opt_level_map[opt_level],
330                  '--pnacl-driver-append-LLC_FLAGS_EXTRA=-externalize',
331                  '-o', obj_llc] +
332                 (['--pnacl-driver-verbose'] if args.verbose else []) +
333                 args.llc_args +
334                 [pexe],
335                 echo=args.verbose)
336        if native:
337            shellcmd((
338                '{objcopy} --redefine-sym _start=_user_start {obj}'
339                ).format(objcopy=objcopy, obj=obj_llc), echo=args.verbose)
340        # Generate llc syms file for consistency, even though it's not used.
341        shellcmd((
342            'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
343            ).format(obj=obj_llc, sym=sym_llc), echo=args.verbose)
344
345    if (args.force or
346        NewerThanOrNotThere(pexe, obj_sz) or
347        NewerThanOrNotThere(pnacl_sz, obj_sz)):
348        if not args.nosz:
349            # Run pnacl-sz regardless of hybrid mode.
350            shellcmd([pnacl_sz,
351                      '-O' + opt_level,
352                      '-bitcode-format=pnacl',
353                      '-filetype=' + args.filetype,
354                      '-o', obj_sz if args.filetype == 'obj' else asm_sz,
355                      '-target=' + args.target] +
356                     (['-externalize',
357                       '-ffunction-sections',
358                       '-fdata-sections'] if hybrid else []) +
359                     (['-sandbox'] if args.sandbox else []) +
360                     (['-nonsfi'] if args.nonsfi else []) +
361                     (['-enable-block-profile'] if
362                          args.enable_block_profile and not args.sandbox
363                          else []) +
364                     args.sz_args +
365                     [pexe],
366                     echo=args.verbose)
367        if args.filetype != 'obj':
368            triple = {
369              'arm32': 'arm' + get_sfi_string(args, '-nacl', '', ''),
370              'x8632': 'i686' + get_sfi_string(args, '-nacl', '', ''),
371              'x8664': 'x86_64' +
372                        get_sfi_string(args, '-nacl', '-linux-gnux32',
373                                       '-linux-gnux32'),
374            }[args.target]
375
376            shellcmd((
377                '{base}/llvm-mc -triple={triple} -filetype=obj -o {obj} {asm}'
378                ).format(base=path_addition, asm=asm_sz, obj=obj_sz,
379                         triple=triple),
380                     echo=args.verbose)
381        if native:
382            shellcmd((
383                '{objcopy} --redefine-sym _start=_user_start {obj}'
384                ).format(objcopy=objcopy, obj=obj_sz), echo=args.verbose)
385        if hybrid:
386            shellcmd((
387                'nm {obj} | sed -n "s/.* [a-zA-Z] //p" > {sym}'
388                ).format(obj=obj_sz, sym=sym_sz), echo=args.verbose)
389
390    if hybrid:
391        with open(sym_sz_unescaped) as f:
392            sz_syms = f.read().splitlines()
393        re_include_str = BuildRegex(args.include, sz_syms)
394        re_exclude_str = BuildRegex(args.exclude, sz_syms)
395        re_include = re.compile(re_include_str)
396        re_exclude = re.compile(re_exclude_str)
397        # If a symbol doesn't explicitly match re_include or re_exclude,
398        # the default MatchSymbol() result is True, unless some --include
399        # args are provided.
400        default_match = not args.include
401
402        whitelist_has_items = False
403        with open(whitelist_sz_unescaped, 'w') as f:
404            for sym in sz_syms:
405                if MatchSymbol(sym, re_include, re_exclude, default_match):
406                    f.write(sym + '\n')
407                    whitelist_has_items = True
408        shellcmd((
409            '{objcopy} --weaken {obj} {weak}'
410            ).format(objcopy=objcopy, obj=obj_sz, weak=obj_sz_weak),
411            echo=args.verbose)
412        if whitelist_has_items:
413            # objcopy returns an error if the --weaken-symbols file is empty.
414            shellcmd((
415                '{objcopy} --weaken-symbols={whitelist} {obj} {weak}'
416                ).format(objcopy=objcopy,
417                         whitelist=whitelist_sz, obj=obj_llc,
418                         weak=obj_llc_weak),
419                     echo=args.verbose)
420        else:
421            shellcmd((
422                '{objcopy} {obj} {weak}'
423                ).format(objcopy=objcopy, obj=obj_llc, weak=obj_llc_weak),
424                echo=args.verbose)
425        obj_partial = pexe_base + '.o'
426        ld = {
427          'arm32': 'arm-linux-gnueabihf-ld',
428          'x8632': 'ld',
429          'x8664': 'ld',
430        }[args.target]
431        emulation = {
432          'arm32': 'armelf_linux_eabi',
433          'x8632': 'elf_i386',
434          'x8664': 'elf32_x86_64' if not args.sandbox else 'elf_x86_64',
435        }[args.target]
436        shellcmd((
437            '{ld} -r -m {emulation} -o {partial} {sz} {llc}'
438            ).format(ld=ld, emulation=emulation, partial=obj_partial,
439                     sz=obj_sz_weak, llc=obj_llc_weak),
440                 echo=args.verbose)
441        shellcmd((
442            '{objcopy} -w --localize-symbol="*" {partial}'
443            ).format(objcopy=objcopy, partial=obj_partial),
444            echo=args.verbose)
445        shellcmd((
446            '{objcopy} --globalize-symbol={start} ' +
447            '--globalize-symbol=__Sz_block_profile_info {partial}'
448            ).format(objcopy=objcopy, partial=obj_partial,
449                     start=get_sfi_string(args, '_start', '_start',
450                                          '_user_start')),
451                 echo=args.verbose)
452
453    # Run the linker regardless of hybrid mode.
454    if args.sandbox:
455        LinkSandbox([obj_partial], exe, args.target, args.verbose)
456    elif args.nonsfi:
457        LinkNonsfi([obj_partial], exe, args.target, args.verbose)
458    else:
459        objs = [obj_partial]
460        if args.asan:
461            objs.append(
462                ('{root}/toolchain_build/src/subzero/build/runtime/' +
463                 'szrt_asan_{target}.o').format(root=nacl_root,
464                                                target=args.target))
465        LinkNative(objs, exe, args.target, args.verbose)
466
467    # Put the extra verbose printing at the end.
468    if args.verbose and hybrid:
469        print 'include={regex}'.format(regex=re_include_str)
470        print 'exclude={regex}'.format(regex=re_exclude_str)
471        print 'default_match={dm}'.format(dm=default_match)
472        print 'Number of Subzero syms = {num}'.format(num=len(sz_syms))
473
474if __name__ == '__main__':
475    main()
476