1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (C) 2019 The Android Open Source Project
5#
6# Licensed under the Apache License, Version 2.0 (the "License");
7# you may not use this file except in compliance with the License.
8# You may obtain a copy of the License at
9#
10#      http://www.apache.org/licenses/LICENSE-2.0
11#
12# Unless required by applicable law or agreed to in writing, software
13# distributed under the License is distributed on an "AS IS" BASIS,
14# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15# See the License for the specific language governing permissions and
16# limitations under the License.
17"""Helper tool to generate cross-compiled syscall and constant tables to JSON.
18
19This script takes the LLVM IR of libconstants.gen.c and libsyscalls.gen.c and
20generates the `constants.json` file with that. LLVM IR files are moderately
21architecture-neutral (at least for this case).
22"""
23
24import argparse
25import collections
26import json
27import re
28import sys
29
30_STRING_CONSTANT_RE = re.compile(r'(@[a-zA-Z0-9.]+) = .*c"([^"\\]+)\\00".*')
31_TABLE_ENTRY_RE = re.compile(
32    r'%struct.(?:constant|syscall)_entry\s*{\s*([^}]+)\s*}')
33# This looks something like
34#
35#  i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str.5, i32 0, i32 0), i32 5
36#
37# For arm-v7a. What we are interested in are the @.str.x and the very last
38# number.
39_TABLE_ENTRY_CONTENTS = re.compile(r'.*?(null|@[a-zA-Z0-9.]+).* (-?\d+)')
40
41ParseResults = collections.namedtuple('ParseResults', ['table_name',
42                                                       'table_entries'])
43
44HELP_EPILOG = """Generate LLVM IR: clang -S -emit-llvm libconstants.gen.c libsyscalls.gen.c
45"""
46
47
48def parse_llvm_ir(ir):
49    """Parses a single LLVM IR file."""
50    string_constants = collections.OrderedDict()
51    table_entries = collections.OrderedDict()
52    table_name = ''
53    for line in ir:
54        string_constant_match = _STRING_CONSTANT_RE.match(line)
55        if string_constant_match:
56            string_constants[string_constant_match.group(
57                1)] = string_constant_match.group(2)
58            continue
59
60        if '@syscall_table' in line or '@constant_table' in line:
61            if '@syscall_table' in line:
62                table_name = 'syscalls'
63            else:
64                table_name = 'constants'
65            for entry in _TABLE_ENTRY_RE.findall(line):
66                groups = _TABLE_ENTRY_CONTENTS.match(entry)
67                if not groups:
68                    raise ValueError('Failed to parse table entry %r' % entry)
69                name, value = groups.groups()
70                if name == 'null':
71                    # This is the end-of-table marker.
72                    break
73                table_entries[string_constants[name]] = int(value)
74
75    return ParseResults(table_name=table_name, table_entries=table_entries)
76
77
78def main(argv=None):
79    """Main entrypoint."""
80
81    if argv is None:
82        argv = sys.argv[1:]
83
84    parser = argparse.ArgumentParser(description=__doc__, epilog=HELP_EPILOG)
85    parser.add_argument('--output',
86                        help='The path of the generated constants.json file.',
87                        type=argparse.FileType('w'),
88                        required=True)
89    parser.add_argument(
90        'llvm_ir_files',
91        help='An LLVM IR file with one of the {constants,syscall} table.',
92        metavar='llvm_ir_file',
93        nargs='+',
94        type=argparse.FileType('r'))
95    opts = parser.parse_args(argv)
96
97    constants_json = {}
98    for ir in opts.llvm_ir_files:
99        parse_results = parse_llvm_ir(ir)
100        constants_json[parse_results.table_name] = parse_results.table_entries
101
102    # Populate the top-level fields.
103    constants_json['arch_nr'] = constants_json['constants']['MINIJAIL_ARCH_NR']
104    constants_json['bits'] = constants_json['constants']['MINIJAIL_ARCH_BITS']
105
106    # It is a bit more complicated to generate the arch_name, since the
107    # constants can only output numeric values. Use a hardcoded mapping instead.
108    if constants_json['arch_nr'] == 0xC000003E:
109        constants_json['arch_name'] = 'x86_64'
110    elif constants_json['arch_nr'] == 0x40000003:
111        constants_json['arch_name'] = 'x86'
112    elif constants_json['arch_nr'] == 0xC00000B7:
113        constants_json['arch_name'] = 'arm64'
114    elif constants_json['arch_nr'] == 0x40000028:
115        constants_json['arch_name'] = 'arm'
116    else:
117        raise ValueError('Unknown architecture: 0x%08X' %
118                         constants_json['arch_nr'])
119
120    json.dump(constants_json, opts.output, indent='  ')
121    return 0
122
123
124if __name__ == '__main__':
125    sys.exit(main(sys.argv[1:]))
126