1# Copyright (C) 2018 The Android Open Source Project 2# 3# Licensed under the Apache License, Version 2.0 (the "License"); 4# you may not use this file except in compliance with the License. 5# You may obtain a copy of the License at 6# 7# http://www.apache.org/licenses/LICENSE-2.0 8# 9# Unless required by applicable law or agreed to in writing, software 10# distributed under the License is distributed on an "AS IS" BASIS, 11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12# See the License for the specific language governing permissions and 13# limitations under the License. 14# 15 16"""Utility for ICU4C code generation""" 17 18from __future__ import absolute_import 19from __future__ import division 20from __future__ import print_function 21 22import logging 23import os 24import site 25import sys 26import textwrap 27from collections import deque 28 29import jinja2 30 31THIS_DIR = os.path.dirname(os.path.realpath(__file__)) 32ANDROID_TOP = os.path.realpath(os.path.join(THIS_DIR, '../../../..')) 33 34JINJA_ENV = jinja2.Environment(loader=jinja2.FileSystemLoader( 35 os.path.join(THIS_DIR, 'jinja_templates'))) 36JINJA_ENV.trim_blocks = True 37JINJA_ENV.lstrip_blocks = True 38 39def generate_shim(functions, includes, suffix, template_file): 40 """Generates the library source file from the given functions.""" 41 data = { 42 'functions': functions, 43 'icu_headers': includes, 44 'suffix': suffix, 45 } 46 return JINJA_ENV.get_template(template_file).render(data) 47 48def generate_symbol_txt(shim_functions, extra_function_names, template_file): 49 """Generates the symbol txt file from the given functions.""" 50 data = { 51 # Each shim_function is given a suffix. 52 'shim_functions' : shim_functions, 53 # Each extra function name is included as given. 54 'extra_function_names': extra_function_names, 55 } 56 return JINJA_ENV.get_template(template_file).render(data) 57 58def get_allowlisted_apis(allowlist_file): 59 """Return all allowlisted API in allowlist_file""" 60 allowlisted_apis = set() 61 with open(os.path.join(THIS_DIR, allowlist_file), 'r') as file: 62 for line in file: 63 line = line.strip() 64 if line and not line.startswith("#"): 65 allowlisted_apis.add(line) 66 return allowlisted_apis 67 68def android_path(*args): 69 """Returns the absolute path to a directory within the Android tree.""" 70 return os.path.join(ANDROID_TOP, *args) 71 72 73# TODO: Include clang bindings in prebuilt package. http://b/119270767 74site.addsitedir(android_path('external/clang/bindings/python')) 75import clang.cindex # pylint: disable=import-error,wrong-import-position 76 77# TODO: Do not hardcode clang version. http://b/119270767 78CLANG_REVISION = 'r383902c' 79CLANG_LIB_VERSION = '11git' 80CLANG_HEADER_VERSION = '11.0.3' 81CLANG_PATH = android_path('prebuilts/clang/host/linux-x86/clang-%s' % CLANG_REVISION) 82 83 84class Function: 85 """A visible function found in an ICU header.""" 86 87 def __init__(self, name, result_type, params, is_variadic, module): 88 self.name = name 89 self.result_type = result_type 90 self.params = params 91 self.is_variadic = is_variadic 92 self.va_list_insert_position = -1 93 94 # callee will be used in dlsym and may be identical to others for 95 # functions with variable argument lists. 96 self.callee = self.name 97 if self.is_variadic: 98 self.last_param = self.params[-1][1] 99 self.handle = 'handle_' + module 100 self.return_void = self.result_type == 'void' 101 102 @property 103 def param_str(self): 104 """Returns a string usable as a parameter list in a function decl.""" 105 params = [] 106 for param_type, param_name in self.params: 107 if '[' in param_type: 108 # `int foo[42]` will be a param_type of `int [42]` and a 109 # param_name of `foo`. We need to put these back in the right 110 # order. 111 param_name += param_type[param_type.find('['):] 112 param_type = param_type[:param_type.find('[')] 113 params.append('{} {}'.format(param_type, param_name)) 114 if self.is_variadic: 115 params.append('...') 116 return ', '.join(params) 117 118 @property 119 def arg_str(self): 120 """Returns a string usable as an argument list in a function call.""" 121 args = [] 122 for _, param_name in self.params: 123 args.append(param_name) 124 if self.is_variadic: 125 if self.va_list_insert_position >= 0: 126 args.insert(self.va_list_insert_position, 'args') 127 else: 128 raise ValueError(textwrap.dedent("""\ 129 {}({}) is variadic, but has no valid \ 130 inserted position""".format( 131 self.name, 132 self.param_str))) 133 return ', '.join(args) 134 135 def set_variadic_callee(self, callee, inserted_position): 136 """Set variadic callee with callee name and inserted position""" 137 if self.is_variadic: 138 self.callee = callee 139 self.va_list_insert_position = inserted_position 140 141 142def logger(): 143 """Returns the module level logger.""" 144 return logging.getLogger(__name__) 145 146 147class DeclaredFunctionsParser: 148 """Parser to get declared functions from ICU4C headers. """ 149 150 def __init__(self, decl_filters, allowlisted_decl_filter): 151 """ 152 Args: 153 decl_filters: A list of filters for declared functions. 154 allowlisted_decl_filter: A list of allowlisting filters for declared functions. 155 If the function is allowlisted here, the function will not filtered by the filter added 156 in decl_filters 157 """ 158 self.decl_filters = decl_filters 159 self.allowlisted_decl_filters = allowlisted_decl_filter 160 self.va_functions_mapping = {} 161 self.ignored_include_dependency = {} 162 163 # properties to store the parsing result 164 self.all_headers = [] 165 self.all_header_paths_to_copy = set() 166 self.all_declared_functions = [] 167 self.seen_functions = set() 168 self.all_header_to_function_names = {} 169 170 # Configures libclang to load in our environment 171 # Set up LD_LIBRARY_PATH to include libclang.so, libLLVM.so, etc. Note 172 # that setting LD_LIBRARY_PATH with os.putenv() sometimes doesn't help. 173 # clang.cindex.Config.set_library_path(os.path.join(CLANG_PATH, 'lib64')) 174 clang.cindex.Config.set_library_file( 175 os.path.join(CLANG_PATH, 'lib64', 'libclang.so.%s' % CLANG_LIB_VERSION)) 176 177 def set_va_functions_mapping(self, mapping): 178 """Set mapping from a variable argument function to an implementation. 179 180 Functions w/ variable argument lists (...) need special care to call 181 their corresponding v- versions that accept a va_list argument. Note that 182 although '...' will always appear as the last parameter, its v- version 183 may put the va_list arg in a different place. Hence we provide an index 184 to indicate the position. 185 e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 186 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 187 inserted as the 3rd argument.""" 188 self.va_functions_mapping = mapping 189 190 def set_ignored_include_dependency(self, mapping): 191 """ 192 A sample mapping is { "ulocdata.h" : [ "uloc.h", "ures.h" ] }. 193 The include dependencies will explicitly be ignored when producing header_paths_to_copy. 194 """ 195 self.ignored_include_dependency = mapping 196 197 @property 198 def header_includes(self): 199 """Return all headers declaring the functions returned in get_all_declared_functions. 200 201 If all functions in the header are filtered, the header is not included in here.""" 202 return [DeclaredFunctionsParser.short_header_path(header) for header in self.all_headers] 203 204 @property 205 def header_paths_to_copy(self): 206 """Return all headers needed to be copied""" 207 return self.all_header_paths_to_copy 208 209 @property 210 def declared_functions(self): 211 """Return all declared functions after filtering""" 212 return self.all_declared_functions 213 214 @property 215 def header_to_function_names(self): 216 """Return the mapping from the header file name to a list of function names in the file""" 217 return self.all_header_to_function_names 218 219 @staticmethod 220 def get_cflags(): 221 """Returns the cflags that should be used for parsing.""" 222 clang_flags = [ 223 '-x', 224 'c', 225 '-std=c99', 226 '-DU_DISABLE_RENAMING=1', 227 '-DU_SHOW_CPLUSPLUS_API=0', 228 '-DU_HIDE_DRAFT_API', 229 '-DU_HIDE_DEPRECATED_API', 230 '-DU_HIDE_INTERNAL_API', 231 '-DANDROID_LINK_SHARED_ICU4C', 232 ] 233 234 include_dirs = [ 235 # TODO: Do not hardcode clang version. http://b/119270767 236 os.path.join(CLANG_PATH, 'lib64/clang/', CLANG_HEADER_VERSION, 'include/'), 237 android_path('bionic/libc/include'), 238 android_path('external/icu/android_icu4c/include'), 239 android_path('external/icu/icu4c/source/common'), 240 android_path('external/icu/icu4c/source/i18n'), 241 ] 242 243 for include_dir in include_dirs: 244 clang_flags.append('-I' + include_dir) 245 return clang_flags 246 247 @staticmethod 248 def get_all_cpp_headers(): 249 """Return all C++ header names in icu4c/source/test/hdrtst/cxxfiles.txt""" 250 cpp_headers = [] 251 with open(android_path('external/icu/tools/icu4c_srcgen/cxxfiles.txt'), 'r') as file: 252 for line in file: 253 line = line.strip() 254 if not line.startswith("#"): 255 cpp_headers.append(line) 256 return cpp_headers 257 258 def parse(self): 259 """Parse the headers and collect the declared functions after filtering 260 and the headers containing the functions.""" 261 index = clang.cindex.Index.create() 262 263 icu_modules = ( 264 'common', 265 'i18n', 266 ) 267 header_dependencies = {} 268 for module in icu_modules: 269 path = android_path(android_path('external/icu/icu4c/source', module, 'unicode')) 270 files = [os.path.join(path, f) 271 for f in os.listdir(path) if f.endswith('.h')] 272 273 for file_path in files: 274 base_header_name = os.path.basename(file_path) 275 # Ignore C++ headers. 276 if base_header_name in DeclaredFunctionsParser.get_all_cpp_headers(): 277 continue 278 279 tunit = index.parse(file_path, DeclaredFunctionsParser.get_cflags()) 280 DeclaredFunctionsParser.handle_diagnostics(tunit) 281 header_dependencies[file_path] = [file_inclusion.include.name for file_inclusion 282 in tunit.get_includes()] 283 visible_functions = self.get_visible_functions( 284 tunit.cursor, module, file_path) 285 self.all_header_to_function_names[base_header_name] = \ 286 [f.name for f in visible_functions] 287 for function in visible_functions: 288 self.seen_functions.add(function.name) 289 self.all_declared_functions.append(function) 290 if visible_functions: 291 self.all_headers.append(file_path) 292 293 # Sort to produce an deterministic output 294 self.all_declared_functions = sorted(self.all_declared_functions, key=lambda f: f.name) 295 self.all_headers = sorted(self.all_headers) 296 297 # Build the headers required for using your restricted API set, and put the set into 298 # all_header_files_to_copy. 299 # header_dependencies is a map from icu4c header file path to a list of included headers. 300 # The key must be a ICU4C header, but the value could contain non-ICU4C headers, e.g. 301 # { 302 # ".../icu4c/source/common/unicode/utype.h": [ 303 # ".../icu4c/source/common/unicode/uversion.h", 304 # ".../bionic/libc/include/ctype.h", 305 # ], 306 # ... 307 # } 308 file_queue = deque() 309 file_processed = set() 310 for header in self.all_headers: 311 file_queue.appendleft(header) 312 self.all_header_paths_to_copy.add(header) 313 while file_queue: 314 file = file_queue.pop() 315 file_basename = os.path.basename(file) 316 if file in file_processed: 317 continue 318 file_processed.add(file) 319 for header in header_dependencies[file]: 320 header_basename = os.path.basename(header) 321 # Skip this header if this dependency is explicitly ignored 322 if file_basename in self.ignored_include_dependency and \ 323 header_basename in self.ignored_include_dependency[file_basename]: 324 continue 325 if header in header_dependencies: # Do not care non-icu4c headers 326 self.all_header_paths_to_copy.add(header) 327 file_queue.appendleft(header) 328 329 @staticmethod 330 def handle_diagnostics(tunit): 331 """Prints compiler diagnostics to stdout. Exits if errors occurred.""" 332 errors = 0 333 for diag in tunit.diagnostics: 334 if diag.severity == clang.cindex.Diagnostic.Fatal: 335 level = logging.CRITICAL 336 errors += 1 337 elif diag.severity == clang.cindex.Diagnostic.Error: 338 level = logging.ERROR 339 errors += 1 340 elif diag.severity == clang.cindex.Diagnostic.Warning: 341 level = logging.WARNING 342 elif diag.severity == clang.cindex.Diagnostic.Note: 343 level = logging.INFO 344 logger().log( 345 level, '%s:%s:%s %s', diag.location.file, diag.location.line, 346 diag.location.column, diag.spelling) 347 if errors: 348 sys.exit('Errors occurred during parsing. Exiting.') 349 350 def get_visible_functions(self, cursor, module, file_name): 351 """Returns a list of all visible functions in a header file.""" 352 functions = [] 353 for child in cursor.get_children(): 354 if self.should_process_decl(child, file_name): 355 functions.append(self.from_cursor(child, module)) 356 return functions 357 358 def should_process_decl(self, decl, file_name): 359 """Returns True if this function needs to be processed.""" 360 if decl.kind != clang.cindex.CursorKind.FUNCTION_DECL: 361 return False 362 if decl.location.file.name != file_name: 363 return False 364 if decl.spelling in self.seen_functions: 365 return False 366 if not DeclaredFunctionsParser.is_function_visible(decl): 367 return False 368 for allowlisted_decl_filter in self.allowlisted_decl_filters: 369 if allowlisted_decl_filter(decl): 370 return True 371 for decl_filter in self.decl_filters: 372 if not decl_filter(decl): 373 return False 374 return True 375 376 @staticmethod 377 def is_function_visible(decl): 378 """Returns True if the function has default visibility.""" 379 visible = False 380 vis_attrs = DeclaredFunctionsParser.get_children_by_kind( 381 decl, clang.cindex.CursorKind.VISIBILITY_ATTR) 382 for child in vis_attrs: 383 visible = child.spelling == 'default' 384 return visible 385 386 @staticmethod 387 def get_children_by_kind(cursor, kind): 388 """Returns a generator of cursor's children of a specific kind.""" 389 for child in cursor.get_children(): 390 if child.kind == kind: 391 yield child 392 393 @staticmethod 394 def short_header_path(name): 395 """Trim the given file name to 'unicode/xyz.h'.""" 396 return name[name.rfind('unicode/'):] 397 398 def from_cursor(self, cursor, module): 399 """Creates a Function object from the decl at the cursor.""" 400 if cursor.type.kind != clang.cindex.TypeKind.FUNCTIONPROTO: 401 raise ValueError(textwrap.dedent("""\ 402 {}'s type kind is {}, expected TypeKind.FUNCTIONPROTO. 403 {} Line {} Column {}""".format( 404 cursor.spelling, 405 cursor.type.kind, 406 cursor.location.file, 407 cursor.location.line, 408 cursor.location.column))) 409 410 name = cursor.spelling 411 result_type = cursor.result_type.spelling 412 is_variadic = cursor.type.is_function_variadic() 413 params = [] 414 for arg in cursor.get_arguments(): 415 params.append((arg.type.spelling, arg.spelling)) 416 function = Function(name, result_type, params, is_variadic, module) 417 # For variadic function, set the callee and va_list position 418 if function.is_variadic and function.name in self.va_functions_mapping: 419 va_func = self.va_functions_mapping[function.name] 420 function.set_variadic_callee(va_func[0], va_func[1]) 421 return function 422 423 424class StableDeclarationFilter: 425 """Return true if it's @stable API""" 426 def __call__(self, decl): 427 """Returns True if the given decl has a doxygen stable tag.""" 428 if not decl.raw_comment: 429 return False 430 if '@stable' in decl.raw_comment: 431 return True 432 return False 433 434 435class AllowlistedDeclarationFilter: 436 """A filter for allowlisting function declarations.""" 437 def __init__(self, allowlisted_function_names): 438 self.allowlisted_function_names = allowlisted_function_names 439 440 def __call__(self, decl): 441 """Returns True if the given decl is allowlisted""" 442 return decl.spelling in self.allowlisted_function_names 443 444 445class BlocklistedlistedDeclarationFilter: 446 """A filter for blocklisting function declarations.""" 447 def __init__(self, blocklisted_function_names): 448 self.blocklisted_function_names = blocklisted_function_names 449 450 def __call__(self, decl): 451 """Returns True if the given decl is nor blocklisted""" 452 return decl.spelling not in self.blocklisted_function_names 453 454 455# Functions w/ variable argument lists (...) need special care to call 456# their corresponding v- versions that accept a va_list argument. Note that 457# although '...' will always appear as the last parameter, its v- version 458# may put the va_list arg in a different place. Hence we provide an index 459# to indicate the position. 460# 461# e.g. 'umsg_format': ('umsg_vformat', 3) means in the wrapper function of 462# 'umsg_format', it will call 'umsg_vformat' instead, with the va_list arg 463# inserted as the 3rd argument. 464 465# We need to insert the va_list (named args) at the position 466# indicated by the KNOWN_VA_FUNCTIONS map. 467KNOWN_VA_FUNCTIONS = { 468 'u_formatMessage': ('u_vformatMessage', 5), 469 'u_parseMessage': ('u_vparseMessage', 5), 470 'u_formatMessageWithError': ('u_vformatMessageWithError', 6), 471 'u_parseMessageWithError': ('u_vparseMessageWithError', 5), 472 'umsg_format': ('umsg_vformat', 3), 473 'umsg_parse': ('umsg_vparse', 4), 474 'utrace_format': ('utrace_vformat', 4), 475} 476 477# The following functions are not @stable 478ALLOWLISTED_FUNCTION_NAMES = ( 479 # Not intended to be called directly, but are used by @stable macros. 480 'utf8_nextCharSafeBody', 481 'utf8_appendCharSafeBody', 482 'utf8_prevCharSafeBody', 483 'utf8_back1SafeBody', 484) 485