1#
2# Copyright (C) 2017 The Android Open Source Project
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#      http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15#
16r"""This file contains an ELF parser and ELF header structures.
17
18Example usage:
19    import elf_parser
20    with elf_parser.ElfParser(file) as e:
21        print('\n'.join(e.ListGlobalDynamicSymbols()))
22        print('\n'.join(e.ListDependencies()[0]))
23"""
24
25import ctypes
26import os
27import struct
28
29from vts.utils.python.library.elf import consts
30from vts.utils.python.library.elf import structs
31from vts.utils.python.library.elf import utils
32
33
34class ElfError(Exception):
35    """The exception raised by ElfParser."""
36    pass
37
38
39class ElfParser(object):
40    """The class reads information from an ELF file.
41
42    Attributes:
43        _file: The ELF file object.
44        _begin_offset: The offset of the ELF object in the file. The value is
45                       non-zero if the ELF is in an archive, such as .a file.
46        _file_size: Size of the file.
47        bitness: Bitness of the ELF.
48        Ehdr: An Elf_Endr, the ELF header structure of the file.
49        Shdr: A list of Elf_Shdr, the section headers of the file.
50        Elf_Addr: ELF unsigned program address type.
51        Elf_Off: ELF unsigned file offset type.
52        Elf_Half: ELF unsigned medium integer type.
53        Elf_Word: ELF unsigned integer type.
54        Elf_Sword: ELF signed integer type.
55        Elf_Ehdr: ELF header class.
56        Elf_Shdr: ELF section header class.
57        Elf_Dyn: ELF dynamic entry class.
58        Elf_Sym: ELF symbol entry class.
59        Elf_Rel: ELF relocation entry class.
60        Elf_Rela: ELF relocation entry class with explicit addend.
61        Elf_Phdr: ELF program header class.
62        Elf_Nhdr: ELF note header class.
63    """
64
65    def __init__(self, file_path, begin_offset=0):
66        """Creates a parser to open and read an ELF file.
67
68        Args:
69            file_path: The path to the file.
70            begin_offset: The offset of the ELF object in the file.
71
72        Raises:
73            ElfError: File is not a valid ELF.
74        """
75        self._begin_offset = begin_offset
76        try:
77            self._file = open(file_path, 'rb')
78        except IOError as e:
79            raise ElfError(e)
80        try:
81            self._file_size = os.fstat(self._file.fileno()).st_size
82        except OSError as e:
83            self.Close()
84            raise ElfError(e)
85
86        try:
87            e_ident = self._SeekRead(0, consts.EI_NIDENT)
88
89            if e_ident[:4] != consts.ELF_MAGIC_NUMBER:
90                raise ElfError('Unexpected magic bytes: {}'.format(e_ident[:4]))
91
92            if utils.ByteToInt(e_ident[consts.EI_CLASS]) not in (
93                    consts.ELFCLASS32, consts.ELFCLASS64):
94                raise ElfError('Unexpected file class: {}'
95                               .format(e_ident[consts.EI_CLASS]))
96
97            if utils.ByteToInt(e_ident[consts.EI_DATA]) != consts.ELFDATA2LSB:
98                raise ElfError('Unexpected data encoding: {}'
99                               .format(e_ident[consts.EI_DATA]))
100        except ElfError:
101            self.Close()
102            raise
103
104        if utils.ByteToInt(e_ident[consts.EI_CLASS]) == consts.ELFCLASS32:
105            self.bitness = 32
106            self.Elf_Addr = structs.Elf32_Addr
107            self.Elf_Off = structs.Elf32_Off
108            self.Elf_Half = structs.Elf32_Half
109            self.Elf_Word = structs.Elf32_Word
110            self.Elf_Sword = structs.Elf32_Sword
111            self.Elf_Ehdr = structs.Elf32_Ehdr
112            self.Elf_Shdr = structs.Elf32_Shdr
113            self.Elf_Dyn = structs.Elf32_Dyn
114            self.Elf_Sym = structs.Elf32_Sym
115            self.Elf_Rel = structs.Elf32_Rel
116            self.Elf_Rela = structs.Elf32_Rela
117            self.Elf_Phdr = structs.Elf32_Phdr
118            self.Elf_Nhdr = structs.Elf32_Nhdr
119        else:
120            self.bitness = 64
121            self.Elf_Addr = structs.Elf64_Addr
122            self.Elf_Off = structs.Elf64_Off
123            self.Elf_Half = structs.Elf64_Half
124            self.Elf_Word = structs.Elf64_Word
125            self.Elf_Sword = structs.Elf64_Sword
126            self.Elf_Ehdr = structs.Elf64_Ehdr
127            self.Elf_Shdr = structs.Elf64_Shdr
128            self.Elf_Dyn = structs.Elf64_Dyn
129            self.Elf_Sym = structs.Elf64_Sym
130            self.Elf_Rel = structs.Elf64_Rel
131            self.Elf_Rela = structs.Elf64_Rela
132            self.Elf_Phdr = structs.Elf64_Phdr
133            self.Elf_Nhdr = structs.Elf64_Nhdr
134
135        try:
136            self.Ehdr = self._SeekReadStruct(0, self.Elf_Ehdr)
137            shoff = self.Ehdr.e_shoff
138            shentsize = self.Ehdr.e_shentsize
139            self.Shdr = [self._SeekReadStruct(shoff + i * shentsize,
140                                              self.Elf_Shdr)
141                         for i in range(self.Ehdr.e_shnum)]
142        except ElfError:
143            self.Close()
144            raise
145
146    def __del__(self):
147        """Closes the ELF file."""
148        self.Close()
149
150    def __enter__(self):
151        return self
152
153    def __exit__(self, exc_type, exc_value, traceback):
154        """Closes the ELF file."""
155        self.Close()
156
157    def Close(self):
158        """Closes the ELF file."""
159        if hasattr(self, "_file"):
160            self._file.close()
161
162    def _SeekRead(self, offset, read_size):
163        """Reads a byte string at specific offset in the file.
164
165        Args:
166            offset: An integer, the offset from the beginning of the ELF.
167            read_size: An integer, number of bytes to read.
168
169        Returns:
170            A bytes object which is the file content.
171
172        Raises:
173            ElfError: Fails to seek and read.
174        """
175        if offset + read_size > self._file_size:
176            raise ElfError("Read beyond end of file.")
177        try:
178            self._file.seek(self._begin_offset + offset)
179            return self._file.read(read_size)
180        except IOError as e:
181            raise ElfError(e)
182
183    def _SeekRead8(self, offset):
184        """Reads an 1-byte integer from file."""
185        return struct.unpack("B", self._SeekRead(offset, 1))[0]
186
187    def _SeekRead16(self, offset):
188        """Reads a 2-byte integer from file."""
189        return struct.unpack("H", self._SeekRead(offset, 2))[0]
190
191    def _SeekRead32(self, offset):
192        """Reads a 4-byte integer from file."""
193        return struct.unpack("I", self._SeekRead(offset, 4))[0]
194
195    def _SeekRead64(self, offset):
196        """Reads an 8-byte integer from file."""
197        return struct.unpack("Q", self._SeekRead(offset, 8))[0]
198
199    def _SeekReadString(self, offset):
200        """Reads a null-terminated string starting from specific offset.
201
202        Args:
203            offset: The offset from the beginning of the ELF object.
204
205        Returns:
206            A string, excluding the null character.
207
208        Raises:
209            ElfError: String reaches end of file without null terminator.
210        """
211        ret = b""
212        buf_size = 16
213        self._file.seek(self._begin_offset + offset)
214        while True:
215            try:
216                buf = self._file.read(buf_size)
217            except IOError as e:
218                raise ElfError(e)
219            end_index = buf.find(b"\0")
220            if end_index < 0:
221                ret += buf
222            else:
223                ret += buf[:end_index]
224                return utils.BytesToString(ret)
225            if len(buf) != buf_size:
226                raise ElfError("Null-terminated string reaches end of file.")
227
228    def _SeekReadStruct(self, offset, struct_type):
229        """Reads a ctypes.Structure / ctypes.Union from file.
230
231        Args:
232            offset: An integer, the offset from the beginning of the ELF.
233            struct_type: A class, the structure type to read.
234
235        Returns:
236            An object of struct_type.
237
238        Raises:
239            ElfError: Fails to seek and read.
240                      Fails to create struct_type instance.
241        """
242        raw_bytes = self._SeekRead(offset, ctypes.sizeof(struct_type))
243        try:
244            return struct_type.from_buffer_copy(raw_bytes)
245        except ValueError as e:
246            raise ElfError(e)
247
248    def GetString(self, strtab, offset):
249        """Retrieves a null-terminated string from string table.
250
251        Args:
252            strtab: Section header of the string table.
253            offset: Section offset (string index) to start reading from.
254
255        Returns:
256            A string without the null terminator.
257
258        Raises:
259            ElfError: Fails to seek and read.
260        """
261        return self._SeekReadString(strtab.sh_offset + offset)
262
263    def GetSectionName(self, sh):
264        """Returns a section name.
265
266        Args:
267            sh: A section header.
268
269        Returns:
270            A String.
271
272        Raises:
273            ElfError: Fails to seek and read.
274        """
275        strtab = self.Shdr[self.Ehdr.e_shstrndx]
276        return self.GetString(strtab, sh.sh_name)
277
278    def GetSectionsByName(self, name):
279        """Returns a generator of section headers from a given name.
280
281        If multiple sections have the same name, yield them all.
282
283        Args:
284            name: The section name to search for.
285
286        Returns:
287            A generator of Elf_Shdr.
288
289        Raises:
290            ElfError: Fails to seek and read.
291        """
292        return (sh for sh in self.Shdr if name == self.GetSectionName(sh))
293
294    def GetSectionByName(self, name):
295        """Returns a section header whose name equals a given name.
296
297        Returns only the first match, assuming the section name is unique.
298
299        Args:
300            name: The section name to search for.
301
302        Returns:
303            An Elf_Shdr if found.
304            None if no sections have the given name.
305
306        Raises:
307            ElfError: Fails to seek and read.
308        """
309        for sh in self.GetSectionsByName(name):
310            return sh
311        return None
312
313    def GetDynamic(self, dynamic):
314        """Yields the _DYNAMIC array.
315
316        Args:
317            dynamic: Section header of the dynamic section.
318
319        Yields:
320            Elf_Dyn.
321
322        Raises:
323            ElfError: Fails to seek and read.
324        """
325        off = dynamic.sh_offset
326        num = int(dynamic.sh_size // dynamic.sh_entsize)
327        for _ in range(num):
328            dyn = self._SeekReadStruct(off, self.Elf_Dyn)
329            yield dyn
330            if dyn.d_tag == consts.DT_NULL:
331                break
332            off += dynamic.sh_entsize
333
334    def GetSymbol(self, symtab, idx):
335        """Retrieves a Elf_Sym entry from symbol table.
336
337        Args:
338            symtab: A symbol table.
339            idx: An integer, symbol table index.
340
341        Returns:
342            An Elf_Sym.
343
344        Raises:
345            ElfError: Fails to seek and read.
346        """
347        off = symtab.sh_offset + idx * symtab.sh_entsize
348        return self._SeekReadStruct(off, self.Elf_Sym)
349
350    def GetSymbols(self, symtab):
351        """Returns a generator of Elf_Sym in symbol table.
352
353        Args:
354            symtab: A symbol table.
355
356        Returns:
357            A generator of Elf_Sym.
358
359        Raises:
360            ElfError: Fails to seek and read.
361        """
362        num = int(symtab.sh_size // symtab.sh_entsize)
363        return (self.GetSymbol(symtab, i) for i in range(num))
364
365    def GetRelocationSymbol(self, symtab, rel):
366        """Retrieves the Elf_Sym with respect to an Elf_Rel / Elf_Rela.
367
368        Args:
369            symtab: A symbol table.
370            rel: A Elf_Rel or Elf_Rela.
371
372        Returns:
373            An Elf_Sym.
374
375        Raises:
376            ElfError: Fails to seek and read.
377        """
378        return self.GetSymbol(symtab, rel.GetSymbol())
379
380    def _CreateElfRel(self, offset, info):
381        """Creates an instance of Elf_Rel.
382
383        Args:
384            offset: The initial value of r_offset.
385            info: The initial value of r_info.
386
387        Returns:
388            An Elf_Rel.
389        """
390        elf_rel = self.Elf_Rel()
391        elf_rel.r_offset = offset
392        elf_rel.r_info = info
393        return elf_rel
394
395    def _DecodeAndroidRelr(self, rel):
396        """Decodes a SHT_RELR / SHT_ANDROID_RELR section.
397
398        Args:
399            rel: A relocation table.
400
401        Yields:
402            Elf_Rel.
403
404        Raises:
405            ElfError: Fails to seek and read.
406        """
407        if self.bitness == 32:
408            addr_size = 4
409            seek_read_entry = self._SeekRead32
410        else:
411            addr_size = 8
412            seek_read_entry = self._SeekRead64
413
414        rel_offset = 0
415        for ent_offset in range(rel.sh_offset, rel.sh_offset + rel.sh_size,
416                                rel.sh_entsize):
417            relr_entry = seek_read_entry(ent_offset)
418            if (relr_entry & 1) == 0:
419                # The entry is an address.
420                yield self._CreateElfRel(relr_entry, 0)
421                rel_offset = relr_entry + addr_size
422            else:
423                # The entry is a bitmap.
424                for bit_idx in range(1, rel.sh_entsize * 8):
425                    if (relr_entry >> bit_idx) & 1:
426                        yield self._CreateElfRel(rel_offset, 0)
427                    rel_offset += addr_size
428
429    def GetRelocation(self, rel, idx):
430        """Retrieves a Elf_Rel / Elf_Rela entry from relocation table.
431
432        Args:
433            rel: A relocation table.
434            idx: An integer, relocation table index.
435
436        Returns:
437            An Elf_Rel or Elf_Rela.
438
439        Raises:
440            ElfError: Fails to seek and read.
441        """
442        off = rel.sh_offset + idx * rel.sh_entsize
443        if rel.sh_type == consts.SHT_RELA:
444            return self._SeekReadStruct(off, self.Elf_Rela)
445        return self._SeekReadStruct(off, self.Elf_Rel)
446
447    def GetRelocations(self, rel):
448        """Returns a generator of Elf_Rel / Elf_Rela in relocation table.
449
450        Args:
451            rel: A relocation table.
452
453        Returns:
454            A generator of Elf_Rel or Elf_Rela.
455
456        Raises:
457            ElfError: Fails to seek and read.
458        """
459        if rel.sh_type in (consts.SHT_ANDROID_REL, consts.SHT_ANDROID_RELA):
460            relocations = self._UnpackAndroidRela(rel)
461            if rel.sh_type == consts.SHT_ANDROID_REL:
462                return (self.Elf_Rel(r_offset=rela.r_offset, r_info=rela.r_info)
463                        for rela in relocations)
464            return relocations
465        elif rel.sh_type in (consts.SHT_RELR, consts.SHT_ANDROID_RELR):
466            return self._DecodeAndroidRelr(rel)
467        else:
468            num = int(rel.sh_size // rel.sh_entsize)
469            return (self.GetRelocation(rel, i) for i in range(num))
470
471    def _UnpackAndroidRela(self, android_rela):
472        """Unpacks a SHT_ANDROID_REL / SHT_ANDROID_RELA section.
473
474        Args:
475            android_rela: The packed section's section header.
476
477        Yields:
478            Elf_Rela.
479
480        Raises:
481            ElfError: Fails to decode android rela section.
482        """
483        data = self._SeekRead(android_rela.sh_offset, android_rela.sh_size)
484        # Check packed section header.
485        if len(data) < 4 or data[:4] != b'APS2':
486            raise ElfError('Unexpected SHT_ANDROID_RELA header: {}'
487                           .format(data[:4]))
488        # Decode SLEB128 word stream.
489        def _PackedWordsGen():
490            cur = 4
491            while cur < len(data):
492                try:
493                    value, num = utils.DecodeSLEB128(data, cur)
494                except IndexError:
495                    raise ElfError('Decoding pass end of section.')
496                yield value
497                cur += num
498            raise ElfError('Decoding pass end of section.')
499
500        _packed_words_gen = _PackedWordsGen()
501        _PopWord = lambda: next(_packed_words_gen)
502        # Decode delta encoded relocation data.
503        current_count = 0
504        total_count = _PopWord()
505        offset = _PopWord()
506        addend = 0
507        while current_count < total_count:
508            # Read relocaiton group info.
509            group_size = _PopWord()
510            group_flags = _PopWord()
511            group_offset_delta = 0
512            # Read group flag and prepare delta values.
513            grouped_by_info = (
514                group_flags & consts.RELOCATION_GROUPED_BY_INFO_FLAG)
515            grouped_by_offset_delta = (
516                group_flags & consts.RELOCATION_GROUPED_BY_OFFSET_DELTA_FLAG)
517            grouped_by_addend = (
518                group_flags & consts.RELOCATION_GROUPED_BY_ADDEND_FLAG)
519            group_has_addend = (
520                group_flags & consts.RELOCATION_GROUP_HAS_ADDEND_FLAG)
521            if grouped_by_offset_delta:
522                group_offset_delta = _PopWord()
523            if grouped_by_info:
524                info = _PopWord()
525            if group_has_addend and grouped_by_addend:
526                addend += _PopWord()
527            if not group_has_addend:
528                addend = 0
529            # Handle each relocation entry in group.
530            for _ in range(group_size):
531                if grouped_by_offset_delta:
532                    offset += group_offset_delta
533                else:
534                    offset += _PopWord()
535                if not grouped_by_info:
536                    info = _PopWord()
537                if group_has_addend and not grouped_by_addend:
538                    addend += _PopWord()
539
540                relocation = self.Elf_Rela(r_offset=offset,
541                                           r_info=info,
542                                           r_addend=addend)
543                yield relocation
544            current_count += group_size
545
546    def _LoadDynamicSection(self, dynamic):
547        """Reads entries from dynamic section.
548
549        Args:
550            dynamic: Section header of the dynamic section.
551
552        Returns:
553            A dict of {DT_NEEDED: [libraries names], DT_RUNPATH: [paths]}
554            where the library names and the paths are strings.
555
556        Raises:
557            ElfError: Fails to find dynamic string table.
558        """
559        strtab_addr = None
560        dt_needed_offsets = []
561        dt_runpath_offsets = []
562        for dyn in self.GetDynamic(dynamic):
563            if dyn.d_tag == consts.DT_NEEDED:
564                dt_needed_offsets.append(dyn.d_un.d_val)
565            elif dyn.d_tag == consts.DT_RUNPATH:
566                dt_runpath_offsets.append(dyn.d_un.d_val)
567            elif dyn.d_tag == consts.DT_STRTAB:
568                strtab_addr = dyn.d_un.d_ptr
569
570        if strtab_addr is None:
571            raise ElfError("Cannot find string table address in dynamic "
572                           "section.")
573        try:
574            strtab = next(sh for sh in self.Shdr if sh.sh_addr == strtab_addr)
575        except StopIteration:
576            raise ElfError("Cannot find dynamic string table.")
577        dt_needed = [self.GetString(strtab, off) for off in dt_needed_offsets]
578        dt_runpath = []
579        for off in dt_runpath_offsets:
580            dt_runpath.extend(self.GetString(strtab, off).split(":"))
581        return {consts.DT_NEEDED: dt_needed, consts.DT_RUNPATH: dt_runpath}
582
583    def IsExecutable(self):
584        """Returns whether the ELF is executable."""
585        return self.Ehdr.e_type == consts.ET_EXEC
586
587    def IsSharedObject(self):
588        """Returns whether the ELF is a shared object."""
589        return self.Ehdr.e_type == consts.ET_DYN
590
591    def HasAndroidIdent(self):
592        """Returns whether the ELF has a .note.android.ident section."""
593        for sh in self.GetSectionsByName(".note.android.ident"):
594            nh = self._SeekReadStruct(sh.sh_offset, self.Elf_Nhdr)
595            name = self._SeekRead(sh.sh_offset + ctypes.sizeof(self.Elf_Nhdr),
596                                  nh.n_namesz)
597            if name == b"Android\0":
598                return True
599        return False
600
601    def MatchCpuAbi(self, abi):
602        """Returns whether the ELF matches the ABI.
603
604        Args:
605            abi: A string, the name of the ABI.
606
607        Returns:
608            A boolean, whether the ELF matches the ABI.
609        """
610        for abi_prefix, machine in (("arm64", consts.EM_AARCH64),
611                                    ("arm", consts.EM_ARM),
612                                    ("mips64", consts.EM_MIPS),
613                                    ("mips", consts.EM_MIPS),
614                                    ("x86_64", consts.EM_X86_64),
615                                    ("x86", consts.EM_386)):
616            if abi.startswith(abi_prefix):
617                return self.Ehdr.e_machine == machine
618        return False
619
620    def ListDependencies(self):
621        """Lists the shared libraries that the ELF depends on.
622
623        Returns:
624            2 lists of strings, the names of the depended libraries and the
625            search paths.
626        """
627        deps = []
628        runpaths = []
629        for sh in self.Shdr:
630            if sh.sh_type == consts.SHT_DYNAMIC:
631                dynamic = self._LoadDynamicSection(sh)
632                deps.extend(dynamic[consts.DT_NEEDED])
633                runpaths.extend(dynamic[consts.DT_RUNPATH])
634        return deps, runpaths
635
636    def ListGlobalSymbols(self, include_weak=False,
637                          symtab_name=consts.SYMTAB,
638                          strtab_name=consts.STRTAB):
639        """Lists the global symbols defined in the ELF.
640
641        Args:
642            include_weak: A boolean, whether to include weak symbols.
643            symtab_name: A string, the name of the symbol table.
644            strtab_name: A string, the name of the string table.
645
646        Returns:
647            A list of strings, the names of the symbols.
648
649        Raises:
650            ElfError: Fails to find symbol table.
651        """
652        symtab = self.GetSectionByName(symtab_name)
653        strtab = self.GetSectionByName(strtab_name)
654        if not symtab or not strtab or symtab.sh_size == 0:
655            raise ElfError("Cannot find symbol table.")
656
657        include_bindings = [consts.STB_GLOBAL]
658        if include_weak:
659            include_bindings.append(consts.STB_WEAK)
660
661        sym_names = []
662        for sym in self.GetSymbols(symtab):
663            # Global symbols can be defined at most once at link time,
664            # while weak symbols may have multiple definitions.
665            if sym.GetType() == consts.STT_NOTYPE:
666                continue
667            if sym.GetBinding() not in include_bindings:
668                continue
669            if sym.st_shndx == consts.SHN_UNDEF:
670                continue
671            sym_names.append(self.GetString(strtab, sym.st_name))
672        return sym_names
673
674    def ListGlobalDynamicSymbols(self, include_weak=False):
675        """Lists the global dynamic symbols defined in the ELF.
676
677        Args:
678            include_weak: A boolean, whether to include weak symbols.
679
680        Returns:
681            A list of strings, the names of the symbols.
682
683        Raises:
684            ElfError: Fails to find symbol table.
685        """
686        return self.ListGlobalSymbols(include_weak,
687                                      consts.DYNSYM, consts.DYNSTR)
688
689    def GetProgramInterpreter(self):
690        """Gets the path to the program interpreter of the ELF.
691
692        Returns:
693            A string, the contents of .interp section.
694            None if the section is not found.
695        """
696        for ph_index in range(self.Ehdr.e_phnum):
697            ph = self._SeekReadStruct(
698                self.Ehdr.e_phoff + ph_index * self.Ehdr.e_phentsize,
699                self.Elf_Phdr)
700            if ph.p_type == consts.PT_INTERP:
701                return self._SeekReadString(ph.p_offset)
702