1#
2# Copyright (C) 2018 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16r"""This file contains an ELF vtable abi dumper.
17
18Example usage:
19    from vts.utils.python.library import vtable_dumper
20    with vtable_dumper.VtableDumper(file) as dumper:
21        print('\n\n'.join(str(vtable) for vtable in dumper.DumpVtables()))
22"""
23
24import bisect
25
26from vts.utils.python.library import elf_parser
27from vts.utils.python.library.elf import consts
28
29
30class VtableError(Exception):
31    """The exception raised by VtableDumper."""
32    pass
33
34
35class VtableEntry(object):
36    """This class contains an entry in Vtable.
37
38    The names attribute constains all the possible symbol names for this entry
39    due to symbol aliasing.
40
41    Attributes:
42        offset: Offset with respect to vtable.
43        names: A list of possible symbol names of the entry.
44        value: Value of the entry.
45        is_undefined: If entry has a symbol, whether symbol is undefined or not.
46    """
47
48    def __init__(self, offset, names, value, is_undefined):
49        self.offset = offset
50        self.names = names
51        self.value = value
52        self.is_undefined = is_undefined
53
54    def __cmp__(self, other):
55        return cmp(self.offset, other.offset)
56
57
58class Vtable(object):
59    """This class contains a vtable and its entries.
60
61    Attributes:
62        name: Symbol name of vtable.
63        begin_addr: Begin address of vtable.
64        end_addr: End Address of vtable.
65        entries: A list of VtableEntry.
66    """
67
68    def __init__(self, name, begin_addr, end_addr):
69        self.name = name
70        self.begin_addr = begin_addr
71        self.end_addr = end_addr
72        self.entries = []
73
74    def __cmp__(self, other):
75        if isinstance(other, Vtable):
76            key = other.begin_addr
77        else:
78            key = other
79        return cmp(self.begin_addr, key)
80
81    def __str__(self):
82        msg = ('vtable {} {} entries begin_addr={:#x} size={:#x}'
83               .format(self.name,
84                       len(self.entries),
85                       self.begin_addr,
86                       self.end_addr - self.begin_addr))
87        for entry in self.entries:
88            msg += ('\n{:#x} {} {:#x} {}'
89                    .format(entry.offset,
90                            entry.is_undefined,
91                            entry.value,
92                            entry.names))
93        return msg
94
95
96class VtableDumper(elf_parser.ElfParser):
97    """This class wraps around a ElfParser and dumps vtables from an ELF file.
98    """
99
100    def __init__(self, file_path, begin_offset=0):
101        """Creates a VtableDumper to open and dump an ELF file's vtable.
102
103        Args:
104            file_path: The path to the file.
105            begin_offset: The offset of the ELF object in the file.
106
107        Raises:
108            ElfError: File is not a valid ELF.
109        """
110        super(VtableDumper, self).__init__(file_path, begin_offset)
111
112    def DumpVtables(self):
113        """Scans the relocation section and dump exported vtables.
114
115        Returns:
116            A list of Vtable.
117
118        Raises:
119            VtableError: Fails to dump vtable.
120            ElfError: ELF decoding fails.
121        """
122        # Determine absolute and relative relocation type from e_machine.
123        machine = self.Ehdr.e_machine
124        rel_type = {
125            consts.EM_ARM: (consts.R_ARM_ABS32, consts.R_ARM_RELATIVE),
126            consts.EM_AARCH64: (consts.R_AARCH64_ABS64, consts.R_AARCH64_RELATIVE),
127            consts.EM_386: (consts.R_386_32, consts.R_386_RELATIVE),
128            consts.EM_X86_64: (consts.R_X86_64_64, consts.R_X86_64_RELATIVE),
129        }
130        if machine in rel_type:
131            rel_abs_type, rel_relative_type = rel_type[machine]
132        else:
133            raise VtableError('Unexpected machine type: {}'.format(machine))
134        # Initialize vtable ranges.
135        vtables = self._PrepareVtables()
136        inv_table = self._FunctionSymbolInverseTable()
137        # Scan relocation sections.
138        for rel_sh in self._RelocationSections():
139            is_rela = rel_sh.sh_type in (consts.SHT_RELA,
140                                         consts.SHT_ANDROID_RELA)
141            is_relr = rel_sh.sh_type in (consts.SHT_RELR,
142                                         consts.SHT_ANDROID_RELR)
143            symtab = self.Shdr[rel_sh.sh_link]
144            strtab = self.Shdr[symtab.sh_link]
145            for reloc in self.GetRelocations(rel_sh):
146                # RELR is relative and has no type.
147                is_absolute_type = (not is_relr and
148                                    reloc.GetType() == rel_abs_type)
149                is_relative_type = (is_relr or
150                                    reloc.GetType() == rel_relative_type)
151                if not is_absolute_type and not is_relative_type:
152                    continue
153                # If relocation target is a vtable entry, find the vtable.
154                vtable = self._LocateVtable(vtables, reloc.r_offset)
155                if not vtable:
156                    continue
157                # *_RELA sections have explicit addend.
158                # *_REL and *_RELR sections have implicit addend.
159                if is_rela:
160                    addend = reloc.r_addend
161                else:
162                    addend = self._ReadRelocationAddend(reloc)
163                if is_absolute_type:
164                    # Absolute relocations uses symbol value + addend.
165                    sym = self.GetRelocationSymbol(symtab, reloc)
166                    reloc_value = sym.st_value + addend
167                    sym_is_undefined = (sym.st_shndx == consts.SHN_UNDEF)
168                    if reloc_value in inv_table:
169                        entry_names = inv_table[reloc_value]
170                    else:
171                        sym_name = self.GetString(strtab, sym.st_name)
172                        entry_names = [sym_name]
173                elif is_relative_type:
174                    # Relative relocations don't have symbol table entry,
175                    # instead it uses a vaddr offset which is stored
176                    # in the addend value.
177                    reloc_value = addend
178                    sym_is_undefined = False
179                    if reloc_value in inv_table:
180                        entry_names = inv_table[reloc_value]
181                    else:
182                        entry_names = []
183                vtable.entries.append(VtableEntry(
184                    reloc.r_offset - vtable.begin_addr,
185                    entry_names, reloc_value, sym_is_undefined))
186        # Sort the vtable entries.
187        for vtable in vtables:
188            vtable.entries.sort()
189        return vtables
190
191    def _PrepareVtables(self):
192        """Collects vtable symbols from symbol table / dynamic symbol table.
193
194        Returns:
195            A list of Vtable.
196
197        Raises:
198            ElfError: ELF decoding fails.
199        """
200        vtables = []
201        vtable_names = set()
202        symtab_names = ('.symtab', '.dynsym')
203        for symtab_name in symtab_names:
204            # Object files may have one section of each type
205            symtab = self.GetSectionByName(symtab_name)
206            if not symtab:
207                continue
208            strtab = self.Shdr[symtab.sh_link]
209            for sym in self.GetSymbols(symtab):
210                if sym.st_shndx == consts.SHN_UNDEF:
211                    continue
212                sym_name = self.GetString(strtab, sym.st_name)
213                if sym_name.startswith('_ZTV') and sym_name not in vtable_names:
214                    vtable_begin = sym.st_value
215                    vtable_end = sym.st_value + sym.st_size
216                    vtable = Vtable(sym_name, vtable_begin, vtable_end)
217                    vtables.append(vtable)
218                    vtable_names.add(sym_name)
219        # Sort the vtables with Vtable.begin_addr so that we can use binary
220        # search to speed up _LocateVtable()'s query.
221        vtables.sort()
222        return vtables
223
224    def _FunctionSymbolInverseTable(self):
225        """Returns an address to symbol name inverse lookup table.
226
227        For symbols in .symtab and .dynsym that are not undefined,
228        construct an address to symbol name lookup table.
229
230        Returns:
231            A dictionary of {address: [symbol names]}.
232
233        Raises:
234            ElfError: ELF decoding fails.
235        """
236        inv_table = dict()
237        symtab_names = ('.symtab', '.dynsym')
238        for symtab_name in symtab_names:
239            # Object files may have one section of each type
240            symtab = self.GetSectionByName(symtab_name)
241            if not symtab:
242                continue
243            strtab = self.Shdr[symtab.sh_link]
244            for sym in self.GetSymbols(symtab):
245                if (sym.GetType() in (consts.STT_OBJECT, consts.STT_FUNC)
246                        and sym.st_shndx != consts.SHN_UNDEF):
247                    sym_name = self.GetString(strtab, sym.st_name)
248                    if sym.st_value in inv_table:
249                        inv_table[sym.st_value].append(sym_name)
250                    else:
251                        inv_table[sym.st_value] = [sym_name]
252        for key in inv_table:
253            inv_table[key] = sorted(set(inv_table[key]))
254        return inv_table
255
256    def _LocateVtable(self, vtables, offset):
257        """Searches for the vtable that contains the offset.
258
259        Args:
260            vtables: A list of Vtable to search from.
261            offset: The offset value to search for.
262
263        Returns:
264            The vtable whose begin_addr <= offset and offset < end_addr.
265            None if no such vtable cound be found.
266        """
267        idx = bisect.bisect(vtables, offset)
268        if idx <= 0:
269            return None
270        vtable = vtables[idx-1]
271        if vtable.begin_addr <= offset and offset < vtable.end_addr:
272            return vtable
273        return None
274
275    def _ReadRelocationAddend(self, reloc):
276        """Reads the addend value from the location to be modified.
277
278        Args:
279            reloc: A Elf_Rel containing the relocation.
280
281        Returns:
282            An integer, the addend value.
283
284        Raises:
285            VtableError: reloc is not a valid relocation.
286            ElfError: ELF decoding fails.
287        """
288        for sh in self.Shdr:
289            sh_begin = sh.sh_addr
290            sh_end = sh.sh_addr + sh.sh_size
291            if sh_begin <= reloc.r_offset and reloc.r_offset < sh_end:
292                if sh.sh_type == consts.SHT_NOBITS:
293                    return 0
294                offset = reloc.r_offset - sh.sh_addr + sh.sh_offset
295                addend = self._SeekReadStruct(offset, self.Elf_Addr)
296                return addend.value
297        raise VtableError('Invalid relocation: '
298                          'Cannot find relocation target section '
299                          'r_offset = {:#x}, r_info = {:#x}'
300                          .format(reloc.r_offset, reloc.r_info))
301
302    def _RelocationSections(self):
303        """Yields section headers that contain relocation data."""
304        sh_rel_types = (consts.SHT_REL, consts.SHT_RELA, consts.SHT_RELR,
305                        consts.SHT_ANDROID_REL, consts.SHT_ANDROID_RELA,
306                        consts.SHT_ANDROID_RELR)
307        for sh in self.Shdr:
308            if sh.sh_type in sh_rel_types:
309                yield sh
310