1# Copyright (C) 2018 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#            http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""Utility for ICU4C code generation"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import logging
23import os
24import site
25import sys
26import textwrap
27from collections import deque
28
29import jinja2
30
31THIS_DIR = os.path.dirname(os.path.realpath(__file__))
32ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..'))
33
34JINJA_ENV = jinja2.Environment(loader=jinja2.FileSystemLoader(
35    os.path.join(THIS_DIR, 'jinja_templates')))
36JINJA_ENV.trim_blocks = True
37JINJA_ENV.lstrip_blocks = True
38
39def generate_shim(functions, includes, suffix, template_file):
40    """Generates the library source file from the given functions."""
41    data = {
42        'functions': functions,
43        'icu_headers': includes,
44        'suffix': suffix,
45    }
46    return JINJA_ENV.get_template(template_file).render(data)
47
48def generate_symbol_txt(shim_functions, extra_function_names, template_file):
49    """Generates the symbol txt file from the given functions."""
50    data = {
51        # Each shim_function is given a suffix.
52        'shim_functions' : shim_functions,
53        # Each extra function name is included as given.
54        'extra_function_names': extra_function_names,
55    }
56    return JINJA_ENV.get_template(template_file).render(data)
57
58def get_allowlisted_apis(allowlist_file):
59    """Return all allowlisted API in allowlist_file"""
60    allowlisted_apis = set()
61    with open(os.path.join(THIS_DIR, allowlist_file), 'r') as file:
62        for line in file:
63            line = line.strip()
64            if line and not line.startswith("#"):
65                allowlisted_apis.add(line)
66    return allowlisted_apis
67
68def android_path(*args):
69    """Returns the absolute path to a directory within the Android tree."""
70    return os.path.join(ANDROID_TOP, *args)
71
72
73# TODO: Include clang bindings in prebuilt package. http://b/119270767
74site.addsitedir(android_path('external/clang/bindings/python'))
75import clang.cindex  # pylint: disable=import-error,wrong-import-position
76
77# TODO: Do not hardcode clang version. http://b/119270767
78CLANG_REVISION = 'r383902c'
79CLANG_LIB_VERSION = '11git'
80CLANG_HEADER_VERSION = '11.0.3'
81CLANG_PATH = android_path('prebuilts/clang/host/linux-x86/clang-%s' % CLANG_REVISION)
82
83
84class Function:
85    """A visible function found in an ICU header."""
86
87    def __init__(self, name, result_type, params, is_variadic, module):
88        self.name = name
89        self.result_type = result_type
90        self.params = params
91        self.is_variadic = is_variadic
92        self.va_list_insert_position = -1
93
94        # callee will be used in dlsym and may be identical to others for
95        # functions with variable argument lists.
96        self.callee = self.name
97        if self.is_variadic:
98            self.last_param = self.params[-1][1]
99        self.handle = 'handle_' + module
100        self.return_void = self.result_type == 'void'
101
102    @property
103    def param_str(self):
104        """Returns a string usable as a parameter list in a function decl."""
105        params = []
106        for param_type, param_name in self.params:
107            if '[' in param_type:
108                # `int foo[42]` will be a param_type of `int [42]` and a
109                # param_name of `foo`. We need to put these back in the right
110                # order.
111                param_name += param_type[param_type.find('['):]
112                param_type = param_type[:param_type.find('[')]
113            params.append('{} {}'.format(param_type, param_name))
114        if self.is_variadic:
115            params.append('...')
116        return ', '.join(params)
117
118    @property
119    def arg_str(self):
120        """Returns a string usable as an argument list in a function call."""
121        args = []
122        for _, param_name in self.params:
123            args.append(param_name)
124        if self.is_variadic:
125            if self.va_list_insert_position >= 0:
126                args.insert(self.va_list_insert_position, 'args')
127            else:
128                raise ValueError(textwrap.dedent("""\
129                {}({}) is variadic, but has no valid \
130                inserted position""".format(
131                    self.name,
132                    self.param_str)))
133        return ', '.join(args)
134
135    def set_variadic_callee(self, callee, inserted_position):
136        """Set variadic callee with callee name and inserted position"""
137        if self.is_variadic:
138            self.callee = callee
139            self.va_list_insert_position = inserted_position
140
141
142def logger():
143    """Returns the module level logger."""
144    return logging.getLogger(__name__)
145
146
147class DeclaredFunctionsParser:
148    """Parser to get declared functions from ICU4C headers. """
149
150    def __init__(self, decl_filters, allowlisted_decl_filter):
151        """
152        Args:
153            decl_filters: A list of filters for declared functions.
154            allowlisted_decl_filter: A list of allowlisting filters for declared functions.
155            If the function is allowlisted here, the function will not filtered by the filter added
156            in decl_filters
157        """
158        self.decl_filters = decl_filters
159        self.allowlisted_decl_filters = allowlisted_decl_filter
160        self.va_functions_mapping = {}
161        self.ignored_include_dependency = {}
162
163        # properties to store the parsing result
164        self.all_headers = []
165        self.all_header_paths_to_copy = set()
166        self.all_declared_functions = []
167        self.seen_functions = set()
168        self.all_header_to_function_names = {}
169
170        # Configures libclang to load in our environment
171        # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc.  Note
172        # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
173        # clang.cindex.Config.set_library_path(os.path.join(CLANG_PATH, 'lib64'))
174        clang.cindex.Config.set_library_file(
175            os.path.join(CLANG_PATH, 'lib64', 'libclang.so.%s' % CLANG_LIB_VERSION))
176
177    def set_va_functions_mapping(self, mapping):
178        """Set mapping from a variable argument function to an implementation.
179
180        Functions w/ variable argument lists (...) need special care to call
181        their corresponding v- versions that accept a va_list argument. Note that
182        although '...' will always appear as the last parameter, its v- version
183        may put the va_list arg in a different place. Hence we provide an index
184        to indicate the position.
185        e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
186        'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
187        inserted as the 3rd argument."""
188        self.va_functions_mapping = mapping
189
190    def set_ignored_include_dependency(self, mapping):
191        """
192        A sample mapping is { "ulocdata.h" : [ "uloc.h", "ures.h" ] }.
193        The include dependencies will explicitly be ignored when producing header_paths_to_copy.
194        """
195        self.ignored_include_dependency = mapping
196
197    @property
198    def header_includes(self):
199        """Return all headers declaring the functions returned in get_all_declared_functions.
200
201        If all functions in the header are filtered, the header is not included in here."""
202        return [DeclaredFunctionsParser.short_header_path(header) for header in self.all_headers]
203
204    @property
205    def header_paths_to_copy(self):
206        """Return all headers needed to be copied"""
207        return self.all_header_paths_to_copy
208
209    @property
210    def declared_functions(self):
211        """Return all declared functions after filtering"""
212        return self.all_declared_functions
213
214    @property
215    def header_to_function_names(self):
216        """Return the mapping from the header file name to a list of function names in the file"""
217        return self.all_header_to_function_names
218
219    @staticmethod
220    def get_cflags():
221        """Returns the cflags that should be used for parsing."""
222        clang_flags = [
223            '-x',
224            'c',
225            '-std=c99',
226            '-DU_DISABLE_RENAMING=1',
227            '-DU_SHOW_CPLUSPLUS_API=0',
228            '-DU_HIDE_DRAFT_API',
229            '-DU_HIDE_DEPRECATED_API',
230            '-DU_HIDE_INTERNAL_API',
231            '-DANDROID_LINK_SHARED_ICU4C',
232        ]
233
234        include_dirs = [
235            # TODO: Do not hardcode clang version. http://b/119270767
236            os.path.join(CLANG_PATH, 'lib64/clang/', CLANG_HEADER_VERSION, 'include/'),
237            android_path('bionic/libc/include'),
238            android_path('external/icu/android_icu4c/include'),
239            android_path('external/icu/icu4c/source/common'),
240            android_path('external/icu/icu4c/source/i18n'),
241        ]
242
243        for include_dir in include_dirs:
244            clang_flags.append('-I' + include_dir)
245        return clang_flags
246
247    @staticmethod
248    def get_all_cpp_headers():
249        """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt"""
250        cpp_headers = []
251        with open(android_path('external/icu/tools/icu4c_srcgen/cxxfiles.txt'), 'r') as file:
252            for line in file:
253                line = line.strip()
254                if not line.startswith("#"):
255                    cpp_headers.append(line)
256        return cpp_headers
257
258    def parse(self):
259        """Parse the headers and collect the declared functions after filtering
260        and the headers containing the functions."""
261        index = clang.cindex.Index.create()
262
263        icu_modules = (
264            'common',
265            'i18n',
266        )
267        header_dependencies = {}
268        for module in icu_modules:
269            path = android_path(android_path('external/icu/icu4c/source', module, 'unicode'))
270            files = [os.path.join(path, f)
271                     for f in os.listdir(path) if f.endswith('.h')]
272
273            for file_path in files:
274                base_header_name = os.path.basename(file_path)
275                # Ignore C++ headers.
276                if base_header_name in DeclaredFunctionsParser.get_all_cpp_headers():
277                    continue
278
279                tunit = index.parse(file_path, DeclaredFunctionsParser.get_cflags())
280                DeclaredFunctionsParser.handle_diagnostics(tunit)
281                header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion
282                                                  in tunit.get_includes()]
283                visible_functions = self.get_visible_functions(
284                    tunit.cursor, module, file_path)
285                self.all_header_to_function_names[base_header_name] = \
286                    [f.name for f in visible_functions]
287                for function in visible_functions:
288                    self.seen_functions.add(function.name)
289                    self.all_declared_functions.append(function)
290                if visible_functions:
291                    self.all_headers.append(file_path)
292
293        # Sort to produce an deterministic output
294        self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name)
295        self.all_headers = sorted(self.all_headers)
296
297        # Build the headers required for using your restricted API set, and put the set into
298        # all_header_files_to_copy.
299        # header_dependencies is a map from icu4c header file path to a list of included headers.
300        # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g.
301        # {
302        #   ".../icu4c/source/common/unicode/utype.h": [
303        #      ".../icu4c/source/common/unicode/uversion.h",
304        #      ".../bionic/libc/include/ctype.h",
305        #    ],
306        #    ...
307        # }
308        file_queue = deque()
309        file_processed = set()
310        for header in self.all_headers:
311            file_queue.appendleft(header)
312            self.all_header_paths_to_copy.add(header)
313        while file_queue:
314            file = file_queue.pop()
315            file_basename = os.path.basename(file)
316            if file in file_processed:
317                continue
318            file_processed.add(file)
319            for header in header_dependencies[file]:
320                header_basename = os.path.basename(header)
321                # Skip this header if this dependency is explicitly ignored
322                if file_basename in self.ignored_include_dependency and \
323                    header_basename in self.ignored_include_dependency[file_basename]:
324                    continue
325                if header in header_dependencies:  # Do not care non-icu4c headers
326                    self.all_header_paths_to_copy.add(header)
327                    file_queue.appendleft(header)
328
329    @staticmethod
330    def handle_diagnostics(tunit):
331        """Prints compiler diagnostics to stdout. Exits if errors occurred."""
332        errors = 0
333        for diag in tunit.diagnostics:
334            if diag.severity == clang.cindex.Diagnostic.Fatal:
335                level = logging.CRITICAL
336                errors += 1
337            elif diag.severity == clang.cindex.Diagnostic.Error:
338                level = logging.ERROR
339                errors += 1
340            elif diag.severity == clang.cindex.Diagnostic.Warning:
341                level = logging.WARNING
342            elif diag.severity == clang.cindex.Diagnostic.Note:
343                level = logging.INFO
344            logger().log(
345                level, '%s:%s:%s %s', diag.location.file, diag.location.line,
346                diag.location.column, diag.spelling)
347        if errors:
348            sys.exit('Errors occurred during parsing. Exiting.')
349
350    def get_visible_functions(self, cursor, module, file_name):
351        """Returns a list of all visible functions in a header file."""
352        functions = []
353        for child in cursor.get_children():
354            if self.should_process_decl(child, file_name):
355                functions.append(self.from_cursor(child, module))
356        return functions
357
358    def should_process_decl(self, decl, file_name):
359        """Returns True if this function needs to be processed."""
360        if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL:
361            return False
362        if decl.location.file.name != file_name:
363            return False
364        if decl.spelling in self.seen_functions:
365            return False
366        if not DeclaredFunctionsParser.is_function_visible(decl):
367            return False
368        for allowlisted_decl_filter in self.allowlisted_decl_filters:
369            if allowlisted_decl_filter(decl):
370                return True
371        for decl_filter in self.decl_filters:
372            if not decl_filter(decl):
373                return False
374        return True
375
376    @staticmethod
377    def is_function_visible(decl):
378        """Returns True if the function has default visibility."""
379        visible = False
380        vis_attrs = DeclaredFunctionsParser.get_children_by_kind(
381            decl, clang.cindex.CursorKind.VISIBILITY_ATTR)
382        for child in vis_attrs:
383            visible = child.spelling == 'default'
384        return visible
385
386    @staticmethod
387    def get_children_by_kind(cursor, kind):
388        """Returns a generator of cursor's children of a specific kind."""
389        for child in cursor.get_children():
390            if child.kind == kind:
391                yield child
392
393    @staticmethod
394    def short_header_path(name):
395        """Trim the given file name to 'unicode/xyz.h'."""
396        return name[name.rfind('unicode/'):]
397
398    def from_cursor(self, cursor, module):
399        """Creates a Function object from the decl at the cursor."""
400        if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO:
401            raise ValueError(textwrap.dedent("""\
402                {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO.
403                {} Line {} Column {}""".format(
404                    cursor.spelling,
405                    cursor.type.kind,
406                    cursor.location.file,
407                    cursor.location.line,
408                    cursor.location.column)))
409
410        name = cursor.spelling
411        result_type = cursor.result_type.spelling
412        is_variadic = cursor.type.is_function_variadic()
413        params = []
414        for arg in cursor.get_arguments():
415            params.append((arg.type.spelling, arg.spelling))
416        function = Function(name, result_type, params, is_variadic, module)
417        # For variadic function, set the callee and va_list position
418        if function.is_variadic and function.name in self.va_functions_mapping:
419            va_func = self.va_functions_mapping[function.name]
420            function.set_variadic_callee(va_func[0], va_func[1])
421        return function
422
423
424class StableDeclarationFilter:
425    """Return true if it's @stable API"""
426    def __call__(self, decl):
427        """Returns True if the given decl has a doxygen stable tag."""
428        if not decl.raw_comment:
429            return False
430        if '@stable' in decl.raw_comment:
431            return True
432        return False
433
434
435class AllowlistedDeclarationFilter:
436    """A filter for allowlisting function declarations."""
437    def __init__(self, allowlisted_function_names):
438        self.allowlisted_function_names = allowlisted_function_names
439
440    def __call__(self, decl):
441        """Returns True if the given decl is allowlisted"""
442        return decl.spelling in self.allowlisted_function_names
443
444
445class BlocklistedlistedDeclarationFilter:
446    """A filter for blocklisting function declarations."""
447    def __init__(self, blocklisted_function_names):
448        self.blocklisted_function_names = blocklisted_function_names
449
450    def __call__(self, decl):
451        """Returns True if the given decl is nor blocklisted"""
452        return decl.spelling not in self.blocklisted_function_names
453
454
455# Functions w/ variable argument lists (...) need special care to call
456# their corresponding v- versions that accept a va_list argument. Note that
457# although '...' will always appear as the last parameter, its v- version
458# may put the va_list arg in a different place. Hence we provide an index
459# to indicate the position.
460#
461# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
462# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
463# inserted as the 3rd argument.
464
465# We need to insert the va_list (named args) at the position
466# indicated by the KNOWN_VA_FUNCTIONS map.
467KNOWN_VA_FUNCTIONS = {
468    'u_formatMessage': ('u_vformatMessage', 5),
469    'u_parseMessage': ('u_vparseMessage', 5),
470    'u_formatMessageWithError': ('u_vformatMessageWithError', 6),
471    'u_parseMessageWithError': ('u_vparseMessageWithError', 5),
472    'umsg_format': ('umsg_vformat', 3),
473    'umsg_parse': ('umsg_vparse', 4),
474    'utrace_format': ('utrace_vformat', 4),
475}
476
477# The following functions are not @stable
478ALLOWLISTED_FUNCTION_NAMES = (
479    # Not intended to be called directly, but are used by @stable macros.
480    'utf8_nextCharSafeBody',
481    'utf8_appendCharSafeBody',
482    'utf8_prevCharSafeBody',
483    'utf8_back1SafeBody',
484)
485