1# Copyright (C) 2018 The Android Open Source Project
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7#            http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
14#
15
16"""Utility for ICU4C code generation"""
17
18from __future__ import absolute_import
19from __future__ import division
20from __future__ import print_function
21
22import logging
23import os
24import site
25import sys
26import textwrap
27from collections import deque
28
29THIS_DIR = os.path.dirname(os.path.realpath(__file__))
30ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..'))
31
32
33def android_path(*args):
34    """Returns the absolute path to a directory within the Android tree."""
35    return os.path.join(ANDROID_TOP, *args)
36
37
38# TODO: Include clang bindings in prebuilt package. http://b/119270767
39site.addsitedir(android_path('external/clang/bindings/python'))
40import clang.cindex  # pylint: disable=import-error,wrong-import-position
41
42# TODO: Do not hardcode clang version. http://b/119270767
43CLANG_REVISION = 'r346389b'
44CLANG_LIB_VERSION = '8svn'
45CLANG_HEADER_VERSION = '8.0.6'
46CLANG_PATH = android_path('prebuilts/clang/host/linux-x86/clang-%s' % CLANG_REVISION)
47
48
49class Function(object):
50    """A visible function found in an ICU header."""
51
52    def __init__(self, name, result_type, params, is_variadic, module):
53        self.name = name
54        self.result_type = result_type
55        self.params = params
56        self.is_variadic = is_variadic
57        self.va_list_insert_position = -1
58
59        # callee will be used in dlsym and may be identical to others for
60        # functions with variable argument lists.
61        self.callee = self.name
62        if self.is_variadic:
63            self.last_param = self.params[-1][1]
64        self.handle = 'handle_' + module
65        self.return_void = self.result_type == 'void'
66
67    @property
68    def param_str(self):
69        """Returns a string usable as a parameter list in a function decl."""
70        params = []
71        for param_type, param_name in self.params:
72            if '[' in param_type:
73                # `int foo[42]` will be a param_type of `int [42]` and a
74                # param_name of `foo`. We need to put these back in the right
75                # order.
76                param_name += param_type[param_type.find('['):]
77                param_type = param_type[:param_type.find('[')]
78            params.append('{} {}'.format(param_type, param_name))
79        if self.is_variadic:
80            params.append('...')
81        return ', '.join(params)
82
83    @property
84    def arg_str(self):
85        """Returns a string usable as an argument list in a function call."""
86        args = []
87        for _, param_name in self.params:
88            args.append(param_name)
89        if self.is_variadic:
90            if self.va_list_insert_position >= 0:
91                args.insert(self.va_list_insert_position, 'args')
92            else:
93                raise ValueError(textwrap.dedent("""\
94                {}({}) is variadic, but has no valid \
95                inserted position""".format(
96                    self.name,
97                    self.param_str)))
98        return ', '.join(args)
99
100    def set_variadic_callee(self, callee, inserted_position):
101        """Set variadic callee with callee name and inserted position"""
102        if self.is_variadic:
103            self.callee = callee
104            self.va_list_insert_position = inserted_position
105
106
107def logger():
108    """Returns the module level logger."""
109    return logging.getLogger(__name__)
110
111
112class DeclaredFunctionsParser(object):
113    """Parser to get declared functions from ICU4C headers. """
114
115    def __init__(self, decl_filters, whitelisted_decl_filter):
116        """
117        Args:
118            decl_filters: A list of filters for declared functions.
119            whitelisted_decl_filter: A list of whitelisting filters for declared functions.
120            If the function is whitelisted here, the function will not filtered by the filter added
121            in decl_filters
122        """
123        self.decl_filters = decl_filters
124        self.whitelisted_decl_filters = whitelisted_decl_filter
125        self.va_functions_mapping = {}
126
127        # properties to store the parsing result
128        self.all_headers = []
129        self.all_header_paths_to_copy = set()
130        self.all_declared_functions = []
131        self.seen_functions = set()
132
133        # Configures libclang to load in our environment
134        # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc.  Note
135        # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help.
136        # clang.cindex.Config.set_library_path(os.path.join(CLANG_PATH, 'lib64'))
137        clang.cindex.Config.set_library_file(
138            os.path.join(CLANG_PATH, 'lib64', 'libclang.so.%s' % CLANG_LIB_VERSION))
139
140    def set_va_functions_mapping(self, mapping):
141        """Set mapping from a variable argument function to an implementation.
142
143        Functions w/ variable argument lists (...) need special care to call
144        their corresponding v- versions that accept a va_list argument. Note that
145        although '...' will always appear as the last parameter, its v- version
146        may put the va_list arg in a different place. Hence we provide an index
147        to indicate the position.
148        e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
149        'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
150        inserted as the 3rd argument."""
151        self.va_functions_mapping = mapping
152
153    @property
154    def header_includes(self):
155        """Return all headers declaring the functions returned in get_all_declared_functions.
156
157        If all functions in the header are filtered, the header is not included in here."""
158        return [self.short_header_path(header) for header in self.all_headers]
159
160    @property
161    def header_paths_to_copy(self):
162        """Return all headers needed to be copied"""
163        return self.all_header_paths_to_copy
164
165    @property
166    def declared_functions(self):
167        """Return all declared functions after filtering"""
168        return self.all_declared_functions
169
170    def get_cflags(self):
171        """Returns the cflags that should be used for parsing."""
172        clang_flags = [
173            '-x',
174            'c',
175            '-std=c99',
176            '-DU_DISABLE_RENAMING=1',
177            '-DU_SHOW_CPLUSPLUS_API=0',
178            '-DU_HIDE_DRAFT_API',
179            '-DU_HIDE_DEPRECATED_API',
180            '-DU_HIDE_INTERNAL_API',
181            '-DANDROID_LINK_SHARED_ICU4C',
182        ]
183
184        include_dirs = [
185            # TODO: Do not hardcode clang version. http://b/119270767
186            os.path.join(CLANG_PATH, 'lib64/clang/', CLANG_HEADER_VERSION, 'include/'),
187            android_path('bionic/libc/include'),
188            android_path('external/icu/android_icu4c/include'),
189            android_path('external/icu/icu4c/source/common'),
190            android_path('external/icu/icu4c/source/i18n'),
191        ]
192
193        for include_dir in include_dirs:
194            clang_flags.append('-I' + include_dir)
195        return clang_flags
196
197    def get_all_cpp_headers(self):
198        """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt"""
199        cpp_headers = []
200        with open(android_path('external/icu/icu4c/source/test/hdrtst/cxxfiles.txt'),
201                  'r') as f:
202            for line in f:
203                line = line.strip()
204                if not line.startswith("#"):
205                    cpp_headers.append(line)
206        return cpp_headers
207
208    def parse(self):
209        """Parse the headers and collect the declared functions after filtering
210        and the headers containing the functions."""
211        index = clang.cindex.Index.create()
212
213        icu_modules = (
214            'common',
215            'i18n',
216        )
217        header_dependencies = {}
218        for module in icu_modules:
219            path = android_path(android_path('external/icu/icu4c/source', module, 'unicode'))
220            files = [os.path.join(path, f)
221                     for f in os.listdir(path) if f.endswith('.h')]
222
223            for file_path in files:
224                # Ignore C++ headers.
225                if os.path.basename(file_path) in self.get_all_cpp_headers():
226                    continue
227
228                tunit = index.parse(file_path, self.get_cflags())
229                self.handle_diagnostics(tunit)
230                header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion
231                                                  in tunit.get_includes()]
232                visible_functions = self.get_visible_functions(
233                    tunit.cursor, module, file_path)
234                for function in visible_functions:
235                    self.seen_functions.add(function.name)
236                    self.all_declared_functions.append(function)
237                if visible_functions:
238                    self.all_headers.append(file_path)
239
240        # Sort to produce an deterministic output
241        self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name)
242        self.all_headers = sorted(self.all_headers)
243
244        # Build the headers required for using your restricted API set, and put the set into
245        # all_header_files_to_copy.
246        # header_dependencies is a map from icu4c header file path to a list of included headers.
247        # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g.
248        # {
249        #   ".../icu4c/source/common/unicode/utype.h": [
250        #      ".../icu4c/source/common/unicode/uversion.h",
251        #      ".../bionic/libc/include/ctype.h",
252        #    ],
253        #    ...
254        # }
255        file_queue = deque()
256        file_processed = set()
257        for header in self.all_headers:
258            file_queue.appendleft(header)
259            self.all_header_paths_to_copy.add(header)
260        while file_queue:
261            f = file_queue.pop()
262            if f in file_processed:
263                continue
264            file_processed.add(f)
265            for header in header_dependencies[f]:
266                if header in header_dependencies:  # Do not care non-icu4c headers
267                    self.all_header_paths_to_copy.add(header)
268                    file_queue.appendleft(header)
269
270    def handle_diagnostics(self, tunit):
271        """Prints compiler diagnostics to stdout. Exits if errors occurred."""
272        errors = 0
273        for diag in tunit.diagnostics:
274            if diag.severity == clang.cindex.Diagnostic.Fatal:
275                level = logging.CRITICAL
276                errors += 1
277            elif diag.severity == clang.cindex.Diagnostic.Error:
278                level = logging.ERROR
279                errors += 1
280            elif diag.severity == clang.cindex.Diagnostic.Warning:
281                level = logging.WARNING
282            elif diag.severity == clang.cindex.Diagnostic.Note:
283                level = logging.INFO
284            logger().log(
285                level, '%s:%s:%s %s', diag.location.file, diag.location.line,
286                diag.location.column, diag.spelling)
287        if errors:
288            sys.exit('Errors occurred during parsing. Exiting.')
289
290    def get_visible_functions(self, cursor, module, file_name):
291        """Returns a list of all visible functions in a header file."""
292        functions = []
293        for child in cursor.get_children():
294            if self.should_process_decl(child, file_name):
295                functions.append(self.from_cursor(child, module))
296        return functions
297
298    def should_process_decl(self, decl, file_name):
299        """Returns True if this function needs to be processed."""
300        if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL:
301            return False
302        if decl.location.file.name != file_name:
303            return False
304        if decl.spelling in self.seen_functions:
305            return False
306        if not self.is_function_visible(decl):
307            return False
308        for whitlisted_decl_filter in self.whitelisted_decl_filters:
309            if whitlisted_decl_filter(decl):
310                return True
311        for decl_filter in self.decl_filters:
312            if not decl_filter(decl):
313                return False
314        return True
315
316    def is_function_visible(self, decl):
317        """Returns True if the function has default visibility."""
318        visible = False
319        vis_attrs = self.get_children_by_kind(
320            decl, clang.cindex.CursorKind.VISIBILITY_ATTR)
321        for child in vis_attrs:
322            visible = child.spelling == 'default'
323        return visible
324
325    def get_children_by_kind(self, cursor, kind):
326        """Returns a generator of cursor's children of a specific kind."""
327        for child in cursor.get_children():
328            if child.kind == kind:
329                yield child
330
331    def short_header_path(self, name):
332        """Trim the given file name to 'unicode/xyz.h'."""
333        return name[name.rfind('unicode/'):]
334
335    def from_cursor(self, cursor, module):
336        """Creates a Function object from the decl at the cursor."""
337        if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO:
338            raise ValueError(textwrap.dedent("""\
339                {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO.
340                {} Line {} Column {}""".format(
341                    cursor.spelling,
342                    cursor.type.kind,
343                    cursor.location.file,
344                    cursor.location.line,
345                    cursor.location.column)))
346
347        name = cursor.spelling
348        result_type = cursor.result_type.spelling
349        is_variadic = cursor.type.is_function_variadic()
350        params = []
351        for arg in cursor.get_arguments():
352            params.append((arg.type.spelling, arg.spelling))
353        function = Function(name, result_type, params, is_variadic, module)
354        # For variadic function, set the callee and va_list position
355        if function.is_variadic and function.name in self.va_functions_mapping:
356            m = self.va_functions_mapping[function.name]
357            function.set_variadic_callee(m[0], m[1])
358        return function
359
360
361class StableDeclarationFilter(object):
362    """Return true if it's @stable API"""
363    def __call__(self, decl):
364        """Returns True if the given decl has a doxygen stable tag."""
365        if not decl.raw_comment:
366            return False
367        if '@stable' in decl.raw_comment:
368            return True
369        return False
370
371
372class WhitelistedDeclarationFilter(object):
373    """A filter for whitelisting function declarations."""
374    def __init__(self, whitelisted_function_names):
375        self.whitelisted_function_names = whitelisted_function_names
376
377    def __call__(self, decl):
378        """Returns True if the given decl is whitelisted"""
379        return decl.spelling in self.whitelisted_function_names
380
381
382class BlacklistedlistedDeclarationFilter(object):
383    """A filter for blacklisting function declarations."""
384    def __init__(self, blacklisted_function_names):
385        self.blacklisted_function_names = blacklisted_function_names
386
387    def __call__(self, decl):
388        """Returns True if the given decl is nor blacklisted"""
389        return decl.spelling not in self.blacklisted_function_names
390
391
392# Functions w/ variable argument lists (...) need special care to call
393# their corresponding v- versions that accept a va_list argument. Note that
394# although '...' will always appear as the last parameter, its v- version
395# may put the va_list arg in a different place. Hence we provide an index
396# to indicate the position.
397#
398# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of
399# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg
400# inserted as the 3rd argument.
401
402# We need to insert the va_list (named args) at the position
403# indicated by the KNOWN_VA_FUNCTIONS map.
404KNOWN_VA_FUNCTIONS = {
405    'u_formatMessage': ('u_vformatMessage', 5),
406    'u_parseMessage': ('u_vparseMessage', 5),
407    'u_formatMessageWithError': ('u_vformatMessageWithError', 6),
408    'u_parseMessageWithError': ('u_vparseMessageWithError', 5),
409    'umsg_format': ('umsg_vformat', 3),
410    'umsg_parse': ('umsg_vparse', 4),
411    'utrace_format': ('utrace_vformat', 4),
412}
413
414# The following functions are not @stable
415WHITELISTED_FUNCTION_NAMES = (
416    # Not intended to be called directly, but are used by @stable macros.
417    'utf8_nextCharSafeBody',
418    'utf8_appendCharSafeBody',
419    'utf8_prevCharSafeBody',
420    'utf8_back1SafeBody',
421)
422