1# 2# Copyright (C) 2016 The Android Open Source Project 3# 4# Licensed under the Apache License, Version 2.0 (the "License"); 5# you may not use this file except in compliance with the License. 6# You may obtain a copy of the License at 7# 8# http://www.apache.org/licenses/LICENSE-2.0 9# 10# Unless required by applicable law or agreed to in writing, software 11# distributed under the License is distributed on an "AS IS" BASIS, 12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13# See the License for the specific language governing permissions and 14# limitations under the License. 15# 16"""Parser for Android's version script information.""" 17from dataclasses import dataclass 18import logging 19import re 20from typing import ( 21 Dict, 22 Iterable, 23 List, 24 Mapping, 25 NewType, 26 Optional, 27 TextIO, 28 Tuple, 29) 30 31 32ApiMap = Mapping[str, int] 33Arch = NewType('Arch', str) 34Tag = NewType('Tag', str) 35 36 37ALL_ARCHITECTURES = ( 38 Arch('arm'), 39 Arch('arm64'), 40 Arch('x86'), 41 Arch('x86_64'), 42) 43 44 45# Arbitrary magic number. We use the same one in api-level.h for this purpose. 46FUTURE_API_LEVEL = 10000 47 48 49def logger() -> logging.Logger: 50 """Return the main logger for this module.""" 51 return logging.getLogger(__name__) 52 53 54@dataclass 55class Symbol: 56 """A symbol definition from a symbol file.""" 57 58 name: str 59 tags: List[Tag] 60 61 62@dataclass 63class Version: 64 """A version block of a symbol file.""" 65 66 name: str 67 base: Optional[str] 68 tags: List[Tag] 69 symbols: List[Symbol] 70 71 72def get_tags(line: str) -> List[Tag]: 73 """Returns a list of all tags on this line.""" 74 _, _, all_tags = line.strip().partition('#') 75 return [Tag(e) for e in re.split(r'\s+', all_tags) if e.strip()] 76 77 78def is_api_level_tag(tag: Tag) -> bool: 79 """Returns true if this tag has an API level that may need decoding.""" 80 if tag.startswith('introduced='): 81 return True 82 if tag.startswith('introduced-'): 83 return True 84 if tag.startswith('versioned='): 85 return True 86 return False 87 88 89def decode_api_level(api: str, api_map: ApiMap) -> int: 90 """Decodes the API level argument into the API level number. 91 92 For the average case, this just decodes the integer value from the string, 93 but for unreleased APIs we need to translate from the API codename (like 94 "O") to the future API level for that codename. 95 """ 96 try: 97 return int(api) 98 except ValueError: 99 pass 100 101 if api == "current": 102 return FUTURE_API_LEVEL 103 104 return api_map[api] 105 106 107def decode_api_level_tags(tags: Iterable[Tag], api_map: ApiMap) -> List[Tag]: 108 """Decodes API level code names in a list of tags. 109 110 Raises: 111 ParseError: An unknown version name was found in a tag. 112 """ 113 decoded_tags = list(tags) 114 for idx, tag in enumerate(tags): 115 if not is_api_level_tag(tag): 116 continue 117 name, value = split_tag(tag) 118 119 try: 120 decoded = str(decode_api_level(value, api_map)) 121 decoded_tags[idx] = Tag('='.join([name, decoded])) 122 except KeyError: 123 raise ParseError(f'Unknown version name in tag: {tag}') 124 return decoded_tags 125 126 127def split_tag(tag: Tag) -> Tuple[str, str]: 128 """Returns a key/value tuple of the tag. 129 130 Raises: 131 ValueError: Tag is not a key/value type tag. 132 133 Returns: Tuple of (key, value) of the tag. Both components are strings. 134 """ 135 if '=' not in tag: 136 raise ValueError('Not a key/value tag: ' + tag) 137 key, _, value = tag.partition('=') 138 return key, value 139 140 141def get_tag_value(tag: Tag) -> str: 142 """Returns the value of a key/value tag. 143 144 Raises: 145 ValueError: Tag is not a key/value type tag. 146 147 Returns: Value part of tag as a string. 148 """ 149 return split_tag(tag)[1] 150 151 152def version_is_private(version: str) -> bool: 153 """Returns True if the version name should be treated as private.""" 154 return version.endswith('_PRIVATE') or version.endswith('_PLATFORM') 155 156 157def should_omit_version(version: Version, arch: Arch, api: int, llndk: bool, 158 apex: bool) -> bool: 159 """Returns True if the version section should be ommitted. 160 161 We want to omit any sections that do not have any symbols we'll have in the 162 stub library. Sections that contain entirely future symbols or only symbols 163 for certain architectures. 164 """ 165 if version_is_private(version.name): 166 return True 167 if 'platform-only' in version.tags: 168 return True 169 170 no_llndk_no_apex = ('llndk' not in version.tags 171 and 'apex' not in version.tags) 172 keep = no_llndk_no_apex or \ 173 ('llndk' in version.tags and llndk) or \ 174 ('apex' in version.tags and apex) 175 if not keep: 176 return True 177 if not symbol_in_arch(version.tags, arch): 178 return True 179 if not symbol_in_api(version.tags, arch, api): 180 return True 181 return False 182 183 184def should_omit_symbol(symbol: Symbol, arch: Arch, api: int, llndk: bool, 185 apex: bool) -> bool: 186 """Returns True if the symbol should be omitted.""" 187 no_llndk_no_apex = 'llndk' not in symbol.tags and 'apex' not in symbol.tags 188 keep = no_llndk_no_apex or \ 189 ('llndk' in symbol.tags and llndk) or \ 190 ('apex' in symbol.tags and apex) 191 if not keep: 192 return True 193 if not symbol_in_arch(symbol.tags, arch): 194 return True 195 if not symbol_in_api(symbol.tags, arch, api): 196 return True 197 return False 198 199 200def symbol_in_arch(tags: Iterable[Tag], arch: Arch) -> bool: 201 """Returns true if the symbol is present for the given architecture.""" 202 has_arch_tags = False 203 for tag in tags: 204 if tag == arch: 205 return True 206 if tag in ALL_ARCHITECTURES: 207 has_arch_tags = True 208 209 # If there were no arch tags, the symbol is available for all 210 # architectures. If there were any arch tags, the symbol is only available 211 # for the tagged architectures. 212 return not has_arch_tags 213 214 215def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool: 216 """Returns true if the symbol is present for the given API level.""" 217 introduced_tag = None 218 arch_specific = False 219 for tag in tags: 220 # If there is an arch-specific tag, it should override the common one. 221 if tag.startswith('introduced=') and not arch_specific: 222 introduced_tag = tag 223 elif tag.startswith('introduced-' + arch + '='): 224 introduced_tag = tag 225 arch_specific = True 226 elif tag == 'future': 227 return api == FUTURE_API_LEVEL 228 229 if introduced_tag is None: 230 # We found no "introduced" tags, so the symbol has always been 231 # available. 232 return True 233 234 return api >= int(get_tag_value(introduced_tag)) 235 236 237def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool: 238 """Returns true if the symbol should be versioned for the given API. 239 240 This models the `versioned=API` tag. This should be a very uncommonly 241 needed tag, and is really only needed to fix versioning mistakes that are 242 already out in the wild. 243 244 For example, some of libc's __aeabi_* functions were originally placed in 245 the private version, but that was incorrect. They are now in LIBC_N, but 246 when building against any version prior to N we need the symbol to be 247 unversioned (otherwise it won't resolve on M where it is private). 248 """ 249 for tag in tags: 250 if tag.startswith('versioned='): 251 return api >= int(get_tag_value(tag)) 252 # If there is no "versioned" tag, the tag has been versioned for as long as 253 # it was introduced. 254 return True 255 256 257class ParseError(RuntimeError): 258 """An error that occurred while parsing a symbol file.""" 259 260 261class MultiplyDefinedSymbolError(RuntimeError): 262 """A symbol name was multiply defined.""" 263 def __init__(self, multiply_defined_symbols: Iterable[str]) -> None: 264 super().__init__( 265 'Version script contains multiple definitions for: {}'.format( 266 ', '.join(multiply_defined_symbols))) 267 self.multiply_defined_symbols = multiply_defined_symbols 268 269 270class SymbolFileParser: 271 """Parses NDK symbol files.""" 272 def __init__(self, input_file: TextIO, api_map: ApiMap, arch: Arch, 273 api: int, llndk: bool, apex: bool) -> None: 274 self.input_file = input_file 275 self.api_map = api_map 276 self.arch = arch 277 self.api = api 278 self.llndk = llndk 279 self.apex = apex 280 self.current_line: Optional[str] = None 281 282 def parse(self) -> List[Version]: 283 """Parses the symbol file and returns a list of Version objects.""" 284 versions = [] 285 while self.next_line() != '': 286 assert self.current_line is not None 287 if '{' in self.current_line: 288 versions.append(self.parse_version()) 289 else: 290 raise ParseError( 291 f'Unexpected contents at top level: {self.current_line}') 292 293 self.check_no_duplicate_symbols(versions) 294 return versions 295 296 def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None: 297 """Raises errors for multiply defined symbols. 298 299 This situation is the normal case when symbol versioning is actually 300 used, but this script doesn't currently handle that. The error message 301 will be a not necessarily obvious "error: redefition of 'foo'" from 302 stub.c, so it's better for us to catch this situation and raise a 303 better error. 304 """ 305 symbol_names = set() 306 multiply_defined_symbols = set() 307 for version in versions: 308 if should_omit_version(version, self.arch, self.api, self.llndk, 309 self.apex): 310 continue 311 312 for symbol in version.symbols: 313 if should_omit_symbol(symbol, self.arch, self.api, self.llndk, 314 self.apex): 315 continue 316 317 if symbol.name in symbol_names: 318 multiply_defined_symbols.add(symbol.name) 319 symbol_names.add(symbol.name) 320 if multiply_defined_symbols: 321 raise MultiplyDefinedSymbolError( 322 sorted(list(multiply_defined_symbols))) 323 324 def parse_version(self) -> Version: 325 """Parses a single version section and returns a Version object.""" 326 assert self.current_line is not None 327 name = self.current_line.split('{')[0].strip() 328 tags = get_tags(self.current_line) 329 tags = decode_api_level_tags(tags, self.api_map) 330 symbols: List[Symbol] = [] 331 global_scope = True 332 cpp_symbols = False 333 while self.next_line() != '': 334 if '}' in self.current_line: 335 # Line is something like '} BASE; # tags'. Both base and tags 336 # are optional here. 337 base = self.current_line.partition('}')[2] 338 base = base.partition('#')[0].strip() 339 if not base.endswith(';'): 340 raise ParseError( 341 'Unterminated version/export "C++" block (expected ;).') 342 if cpp_symbols: 343 cpp_symbols = False 344 else: 345 base = base.rstrip(';').rstrip() 346 return Version(name, base or None, tags, symbols) 347 elif 'extern "C++" {' in self.current_line: 348 cpp_symbols = True 349 elif not cpp_symbols and ':' in self.current_line: 350 visibility = self.current_line.split(':')[0].strip() 351 if visibility == 'local': 352 global_scope = False 353 elif visibility == 'global': 354 global_scope = True 355 else: 356 raise ParseError('Unknown visiblity label: ' + visibility) 357 elif global_scope and not cpp_symbols: 358 symbols.append(self.parse_symbol()) 359 else: 360 # We're in a hidden scope or in 'extern "C++"' block. Ignore 361 # everything. 362 pass 363 raise ParseError('Unexpected EOF in version block.') 364 365 def parse_symbol(self) -> Symbol: 366 """Parses a single symbol line and returns a Symbol object.""" 367 assert self.current_line is not None 368 if ';' not in self.current_line: 369 raise ParseError( 370 'Expected ; to terminate symbol: ' + self.current_line) 371 if '*' in self.current_line: 372 raise ParseError( 373 'Wildcard global symbols are not permitted.') 374 # Line is now in the format "<symbol-name>; # tags" 375 name, _, _ = self.current_line.strip().partition(';') 376 tags = get_tags(self.current_line) 377 tags = decode_api_level_tags(tags, self.api_map) 378 return Symbol(name, tags) 379 380 def next_line(self) -> str: 381 """Returns the next non-empty non-comment line. 382 383 A return value of '' indicates EOF. 384 """ 385 line = self.input_file.readline() 386 while line.strip() == '' or line.strip().startswith('#'): 387 line = self.input_file.readline() 388 389 # We want to skip empty lines, but '' indicates EOF. 390 if line == '': 391 break 392 self.current_line = line 393 return self.current_line 394