1#
2# Copyright (C) 2016 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16"""Parser for Android's version script information."""
17from dataclasses import dataclass
18import logging
19import re
20from typing import (
21    Dict,
22    Iterable,
23    List,
24    Mapping,
25    NewType,
26    Optional,
27    TextIO,
28    Tuple,
29)
30
31
32ApiMap = Mapping[str, int]
33Arch = NewType('Arch', str)
34Tag = NewType('Tag', str)
35
36
37ALL_ARCHITECTURES = (
38    Arch('arm'),
39    Arch('arm64'),
40    Arch('x86'),
41    Arch('x86_64'),
42)
43
44
45# Arbitrary magic number. We use the same one in api-level.h for this purpose.
46FUTURE_API_LEVEL = 10000
47
48
49def logger() -> logging.Logger:
50    """Return the main logger for this module."""
51    return logging.getLogger(__name__)
52
53
54@dataclass
55class Symbol:
56    """A symbol definition from a symbol file."""
57
58    name: str
59    tags: List[Tag]
60
61
62@dataclass
63class Version:
64    """A version block of a symbol file."""
65
66    name: str
67    base: Optional[str]
68    tags: List[Tag]
69    symbols: List[Symbol]
70
71
72def get_tags(line: str) -> List[Tag]:
73    """Returns a list of all tags on this line."""
74    _, _, all_tags = line.strip().partition('#')
75    return [Tag(e) for e in re.split(r'\s+', all_tags) if e.strip()]
76
77
78def is_api_level_tag(tag: Tag) -> bool:
79    """Returns true if this tag has an API level that may need decoding."""
80    if tag.startswith('introduced='):
81        return True
82    if tag.startswith('introduced-'):
83        return True
84    if tag.startswith('versioned='):
85        return True
86    return False
87
88
89def decode_api_level(api: str, api_map: ApiMap) -> int:
90    """Decodes the API level argument into the API level number.
91
92    For the average case, this just decodes the integer value from the string,
93    but for unreleased APIs we need to translate from the API codename (like
94    "O") to the future API level for that codename.
95    """
96    try:
97        return int(api)
98    except ValueError:
99        pass
100
101    if api == "current":
102        return FUTURE_API_LEVEL
103
104    return api_map[api]
105
106
107def decode_api_level_tags(tags: Iterable[Tag], api_map: ApiMap) -> List[Tag]:
108    """Decodes API level code names in a list of tags.
109
110    Raises:
111        ParseError: An unknown version name was found in a tag.
112    """
113    decoded_tags = list(tags)
114    for idx, tag in enumerate(tags):
115        if not is_api_level_tag(tag):
116            continue
117        name, value = split_tag(tag)
118
119        try:
120            decoded = str(decode_api_level(value, api_map))
121            decoded_tags[idx] = Tag('='.join([name, decoded]))
122        except KeyError:
123            raise ParseError(f'Unknown version name in tag: {tag}')
124    return decoded_tags
125
126
127def split_tag(tag: Tag) -> Tuple[str, str]:
128    """Returns a key/value tuple of the tag.
129
130    Raises:
131        ValueError: Tag is not a key/value type tag.
132
133    Returns: Tuple of (key, value) of the tag. Both components are strings.
134    """
135    if '=' not in tag:
136        raise ValueError('Not a key/value tag: ' + tag)
137    key, _, value = tag.partition('=')
138    return key, value
139
140
141def get_tag_value(tag: Tag) -> str:
142    """Returns the value of a key/value tag.
143
144    Raises:
145        ValueError: Tag is not a key/value type tag.
146
147    Returns: Value part of tag as a string.
148    """
149    return split_tag(tag)[1]
150
151
152def version_is_private(version: str) -> bool:
153    """Returns True if the version name should be treated as private."""
154    return version.endswith('_PRIVATE') or version.endswith('_PLATFORM')
155
156
157def should_omit_version(version: Version, arch: Arch, api: int, llndk: bool,
158                        apex: bool) -> bool:
159    """Returns True if the version section should be ommitted.
160
161    We want to omit any sections that do not have any symbols we'll have in the
162    stub library. Sections that contain entirely future symbols or only symbols
163    for certain architectures.
164    """
165    if version_is_private(version.name):
166        return True
167    if 'platform-only' in version.tags:
168        return True
169
170    no_llndk_no_apex = ('llndk' not in version.tags
171                        and 'apex' not in version.tags)
172    keep = no_llndk_no_apex or \
173           ('llndk' in version.tags and llndk) or \
174           ('apex' in version.tags and apex)
175    if not keep:
176        return True
177    if not symbol_in_arch(version.tags, arch):
178        return True
179    if not symbol_in_api(version.tags, arch, api):
180        return True
181    return False
182
183
184def should_omit_symbol(symbol: Symbol, arch: Arch, api: int, llndk: bool,
185                       apex: bool) -> bool:
186    """Returns True if the symbol should be omitted."""
187    no_llndk_no_apex = 'llndk' not in symbol.tags and 'apex' not in symbol.tags
188    keep = no_llndk_no_apex or \
189           ('llndk' in symbol.tags and llndk) or \
190           ('apex' in symbol.tags and apex)
191    if not keep:
192        return True
193    if not symbol_in_arch(symbol.tags, arch):
194        return True
195    if not symbol_in_api(symbol.tags, arch, api):
196        return True
197    return False
198
199
200def symbol_in_arch(tags: Iterable[Tag], arch: Arch) -> bool:
201    """Returns true if the symbol is present for the given architecture."""
202    has_arch_tags = False
203    for tag in tags:
204        if tag == arch:
205            return True
206        if tag in ALL_ARCHITECTURES:
207            has_arch_tags = True
208
209    # If there were no arch tags, the symbol is available for all
210    # architectures. If there were any arch tags, the symbol is only available
211    # for the tagged architectures.
212    return not has_arch_tags
213
214
215def symbol_in_api(tags: Iterable[Tag], arch: Arch, api: int) -> bool:
216    """Returns true if the symbol is present for the given API level."""
217    introduced_tag = None
218    arch_specific = False
219    for tag in tags:
220        # If there is an arch-specific tag, it should override the common one.
221        if tag.startswith('introduced=') and not arch_specific:
222            introduced_tag = tag
223        elif tag.startswith('introduced-' + arch + '='):
224            introduced_tag = tag
225            arch_specific = True
226        elif tag == 'future':
227            return api == FUTURE_API_LEVEL
228
229    if introduced_tag is None:
230        # We found no "introduced" tags, so the symbol has always been
231        # available.
232        return True
233
234    return api >= int(get_tag_value(introduced_tag))
235
236
237def symbol_versioned_in_api(tags: Iterable[Tag], api: int) -> bool:
238    """Returns true if the symbol should be versioned for the given API.
239
240    This models the `versioned=API` tag. This should be a very uncommonly
241    needed tag, and is really only needed to fix versioning mistakes that are
242    already out in the wild.
243
244    For example, some of libc's __aeabi_* functions were originally placed in
245    the private version, but that was incorrect. They are now in LIBC_N, but
246    when building against any version prior to N we need the symbol to be
247    unversioned (otherwise it won't resolve on M where it is private).
248    """
249    for tag in tags:
250        if tag.startswith('versioned='):
251            return api >= int(get_tag_value(tag))
252    # If there is no "versioned" tag, the tag has been versioned for as long as
253    # it was introduced.
254    return True
255
256
257class ParseError(RuntimeError):
258    """An error that occurred while parsing a symbol file."""
259
260
261class MultiplyDefinedSymbolError(RuntimeError):
262    """A symbol name was multiply defined."""
263    def __init__(self, multiply_defined_symbols: Iterable[str]) -> None:
264        super().__init__(
265            'Version script contains multiple definitions for: {}'.format(
266                ', '.join(multiply_defined_symbols)))
267        self.multiply_defined_symbols = multiply_defined_symbols
268
269
270class SymbolFileParser:
271    """Parses NDK symbol files."""
272    def __init__(self, input_file: TextIO, api_map: ApiMap, arch: Arch,
273                 api: int, llndk: bool, apex: bool) -> None:
274        self.input_file = input_file
275        self.api_map = api_map
276        self.arch = arch
277        self.api = api
278        self.llndk = llndk
279        self.apex = apex
280        self.current_line: Optional[str] = None
281
282    def parse(self) -> List[Version]:
283        """Parses the symbol file and returns a list of Version objects."""
284        versions = []
285        while self.next_line() != '':
286            assert self.current_line is not None
287            if '{' in self.current_line:
288                versions.append(self.parse_version())
289            else:
290                raise ParseError(
291                    f'Unexpected contents at top level: {self.current_line}')
292
293        self.check_no_duplicate_symbols(versions)
294        return versions
295
296    def check_no_duplicate_symbols(self, versions: Iterable[Version]) -> None:
297        """Raises errors for multiply defined symbols.
298
299        This situation is the normal case when symbol versioning is actually
300        used, but this script doesn't currently handle that. The error message
301        will be a not necessarily obvious "error: redefition of 'foo'" from
302        stub.c, so it's better for us to catch this situation and raise a
303        better error.
304        """
305        symbol_names = set()
306        multiply_defined_symbols = set()
307        for version in versions:
308            if should_omit_version(version, self.arch, self.api, self.llndk,
309                                   self.apex):
310                continue
311
312            for symbol in version.symbols:
313                if should_omit_symbol(symbol, self.arch, self.api, self.llndk,
314                                      self.apex):
315                    continue
316
317                if symbol.name in symbol_names:
318                    multiply_defined_symbols.add(symbol.name)
319                symbol_names.add(symbol.name)
320        if multiply_defined_symbols:
321            raise MultiplyDefinedSymbolError(
322                sorted(list(multiply_defined_symbols)))
323
324    def parse_version(self) -> Version:
325        """Parses a single version section and returns a Version object."""
326        assert self.current_line is not None
327        name = self.current_line.split('{')[0].strip()
328        tags = get_tags(self.current_line)
329        tags = decode_api_level_tags(tags, self.api_map)
330        symbols: List[Symbol] = []
331        global_scope = True
332        cpp_symbols = False
333        while self.next_line() != '':
334            if '}' in self.current_line:
335                # Line is something like '} BASE; # tags'. Both base and tags
336                # are optional here.
337                base = self.current_line.partition('}')[2]
338                base = base.partition('#')[0].strip()
339                if not base.endswith(';'):
340                    raise ParseError(
341                        'Unterminated version/export "C++" block (expected ;).')
342                if cpp_symbols:
343                    cpp_symbols = False
344                else:
345                    base = base.rstrip(';').rstrip()
346                    return Version(name, base or None, tags, symbols)
347            elif 'extern "C++" {' in self.current_line:
348                cpp_symbols = True
349            elif not cpp_symbols and ':' in self.current_line:
350                visibility = self.current_line.split(':')[0].strip()
351                if visibility == 'local':
352                    global_scope = False
353                elif visibility == 'global':
354                    global_scope = True
355                else:
356                    raise ParseError('Unknown visiblity label: ' + visibility)
357            elif global_scope and not cpp_symbols:
358                symbols.append(self.parse_symbol())
359            else:
360                # We're in a hidden scope or in 'extern "C++"' block. Ignore
361                # everything.
362                pass
363        raise ParseError('Unexpected EOF in version block.')
364
365    def parse_symbol(self) -> Symbol:
366        """Parses a single symbol line and returns a Symbol object."""
367        assert self.current_line is not None
368        if ';' not in self.current_line:
369            raise ParseError(
370                'Expected ; to terminate symbol: ' + self.current_line)
371        if '*' in self.current_line:
372            raise ParseError(
373                'Wildcard global symbols are not permitted.')
374        # Line is now in the format "<symbol-name>; # tags"
375        name, _, _ = self.current_line.strip().partition(';')
376        tags = get_tags(self.current_line)
377        tags = decode_api_level_tags(tags, self.api_map)
378        return Symbol(name, tags)
379
380    def next_line(self) -> str:
381        """Returns the next non-empty non-comment line.
382
383        A return value of '' indicates EOF.
384        """
385        line = self.input_file.readline()
386        while line.strip() == '' or line.strip().startswith('#'):
387            line = self.input_file.readline()
388
389            # We want to skip empty lines, but '' indicates EOF.
390            if line == '':
391                break
392        self.current_line = line
393        return self.current_line
394