1#!/usr/bin/env python
2#
3# Copyright (C) 2017 The Android Open Source Project
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#      http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18import os
19import struct
20
21
22class ElfError(Exception):
23    """The exception raised by ElfParser."""
24    pass
25
26
27class ElfParser(object):
28    """The class reads information from an ELF file.
29
30    Attributes:
31        _file: The ELF file object.
32        _file_size: Size of the ELF.
33        bitness: Bitness of the ELF.
34        _address_size: Size of address or offset in the ELF.
35        _offsets: Offset of each entry in the ELF.
36        _seek_read_address: The function to read an address or offset entry
37                            from the ELF.
38        _sh_offset: Offset of section header table in the file.
39        _sh_size: Size of section header table entry.
40        _sh_count: Number of section header table entries.
41        _section_headers: List of SectionHeader objects read from the ELF.
42    """
43    _MAGIC_OFFSET = 0
44    _MAGIC_BYTES = b"\x7fELF"
45    _BITNESS_OFFSET = 4
46    _BITNESS_32 = 1
47    _BITNESS_64 = 2
48    # Section type
49    _SHT_DYNAMIC = 6
50    # Tag in dynamic section
51    _DT_NULL = 0
52    _DT_NEEDED = 1
53    _DT_STRTAB = 5
54
55    class ElfOffsets32(object):
56        """Offset of each entry in 32-bit ELF"""
57        # offset from ELF header
58        SECTION_HEADER_OFFSET = 0x20
59        SECTION_HEADER_SIZE = 0x2e
60        SECTION_HEADER_COUNT = 0x30
61        # offset from section header
62        SECTION_TYPE = 0x04
63        SECTION_ADDRESS = 0x0c
64        SECTION_OFFSET = 0x10
65
66    class ElfOffsets64(object):
67        """Offset of each entry in 64-bit ELF"""
68        # offset from ELF header
69        SECTION_HEADER_OFFSET = 0x28
70        SECTION_HEADER_SIZE = 0x3a
71        SECTION_HEADER_COUNT = 0x3c
72        # offset from section header
73        SECTION_TYPE = 0x04
74        SECTION_ADDRESS = 0x10
75        SECTION_OFFSET = 0x18
76
77    class SectionHeader(object):
78        """Contains section header entries as attributes.
79
80        Attributes:
81            type: Type of the section.
82            address: The virtual memory address where the section is loaded.
83            offset: The offset of the section in the ELF file.
84        """
85        def __init__(self, type, address, offset):
86            self.type = type
87            self.address = address
88            self.offset = offset
89
90    def __init__(self, file_path):
91        """Creates a parser to open and read an ELF file.
92
93        Args:
94            file_path: The path to the ELF.
95
96        Raises:
97            ElfError if the file is not a valid ELF.
98        """
99        try:
100            self._file = open(file_path, "rb")
101        except IOError as e:
102            raise ElfError(e)
103        try:
104            self._loadElfHeader()
105            self._section_headers = [
106                    self._loadSectionHeader(self._sh_offset + i * self._sh_size)
107                    for i in range(self._sh_count)]
108        except:
109            self._file.close()
110            raise
111
112    def __del__(self):
113        """Closes the ELF file."""
114        self.close()
115
116    def close(self):
117        """Closes the ELF file."""
118        self._file.close()
119
120    def _seekRead(self, offset, read_size):
121        """Reads a byte string at specific offset in the file.
122
123        Args:
124            offset: An integer, the offset from the beginning of the file.
125            read_size: An integer, number of bytes to read.
126
127        Returns:
128            A byte string which is the file content.
129
130        Raises:
131            ElfError if fails to seek and read.
132        """
133        if offset + read_size > self._file_size:
134            raise ElfError("Read beyond end of file.")
135        try:
136            self._file.seek(offset)
137            return self._file.read(read_size)
138        except IOError as e:
139            raise ElfError(e)
140
141    def _seekRead8(self, offset):
142        """Reads an 1-byte integer from file."""
143        return struct.unpack("B", self._seekRead(offset, 1))[0]
144
145    def _seekRead16(self, offset):
146        """Reads a 2-byte integer from file."""
147        return struct.unpack("H", self._seekRead(offset, 2))[0]
148
149    def _seekRead32(self, offset):
150        """Reads a 4-byte integer from file."""
151        return struct.unpack("I", self._seekRead(offset, 4))[0]
152
153    def _seekRead64(self, offset):
154        """Reads an 8-byte integer from file."""
155        return struct.unpack("Q", self._seekRead(offset, 8))[0]
156
157    def _seekReadString(self, offset):
158        """Reads a null-terminated string starting from specific offset.
159
160        Args:
161            offset: The offset from the beginning of the file.
162
163        Returns:
164            A byte string, excluding the null character.
165        """
166        ret = ""
167        buf_size = 16
168        self._file.seek(offset)
169        while True:
170            try:
171                buf = self._file.read(buf_size)
172            except IOError as e:
173                raise ElfError(e)
174            end_index = buf.find('\0')
175            if end_index < 0:
176                ret += buf
177            else:
178                ret += buf[:end_index]
179                return ret
180            if len(buf) != buf_size:
181                raise ElfError("Null-terminated string reaches end of file.")
182
183    def _loadElfHeader(self):
184        """Loads ElfHeader and initializes attributes"""
185        try:
186            self._file_size = os.fstat(self._file.fileno()).st_size
187        except OSError as e:
188            raise ElfError(e)
189
190        magic = self._seekRead(self._MAGIC_OFFSET, 4)
191        if magic != self._MAGIC_BYTES:
192            raise ElfError("Wrong magic bytes.")
193        bitness = self._seekRead8(self._BITNESS_OFFSET)
194        if bitness == self._BITNESS_32:
195            self.bitness = 32
196            self._address_size = 4
197            self._offsets = self.ElfOffsets32
198            self._seek_read_address = self._seekRead32
199        elif bitness == self._BITNESS_64:
200            self.bitness = 64
201            self._address_size = 8
202            self._offsets = self.ElfOffsets64
203            self._seek_read_address = self._seekRead64
204        else:
205            raise ElfError("Wrong bitness value.")
206
207        self._sh_offset = self._seek_read_address(
208                self._offsets.SECTION_HEADER_OFFSET)
209        self._sh_size = self._seekRead16(self._offsets.SECTION_HEADER_SIZE)
210        self._sh_count = self._seekRead16(self._offsets.SECTION_HEADER_COUNT)
211        return True
212
213    def _loadSectionHeader(self, offset):
214        """Loads a section header from ELF file.
215
216        Args:
217            offset: The starting offset of the section header.
218
219        Returns:
220            An instance of SectionHeader.
221        """
222        return self.SectionHeader(
223                self._seekRead32(offset + self._offsets.SECTION_TYPE),
224                self._seek_read_address(offset + self._offsets.SECTION_ADDRESS),
225                self._seek_read_address(offset + self._offsets.SECTION_OFFSET))
226
227    def _loadDtNeeded(self, offset):
228        """Reads DT_NEEDED entries from dynamic section.
229
230        Args:
231            offset: The offset of the dynamic section from the beginning of
232                    the file
233
234        Returns:
235            A list of strings, the names of libraries.
236        """
237        strtab_address = None
238        name_offsets = []
239        while True:
240            tag = self._seek_read_address(offset)
241            offset += self._address_size
242            value = self._seek_read_address(offset)
243            offset += self._address_size
244
245            if tag == self._DT_NULL:
246                break
247            if tag == self._DT_NEEDED:
248                name_offsets.append(value)
249            if tag == self._DT_STRTAB:
250                strtab_address = value
251
252        if strtab_address is None:
253            raise ElfError("Cannot find string table offset in dynamic section")
254
255        try:
256            strtab_offset = next(x.offset for x in self._section_headers
257                                 if x.address == strtab_address)
258        except StopIteration:
259            raise ElfError("Cannot find dynamic string table.")
260
261        names = [self._seekReadString(strtab_offset + x)
262                 for x in name_offsets]
263        return names
264
265    def listDependencies(self):
266        """Lists the shared libraries that the ELF depends on.
267
268        Returns:
269            A list of strings, the names of the depended libraries.
270        """
271        deps = []
272        for sh in self._section_headers:
273            if sh.type == self._SHT_DYNAMIC:
274                deps.extend(self._loadDtNeeded(sh.offset))
275        return deps
276