1#!/usr/bin/env python3
2# Copyright 2020 The Pigweed Authors
3#
4# Licensed under the Apache License, Version 2.0 (the "License"); you may not
5# use this file except in compliance with the License. You may obtain a copy of
6# the License at
7#
8#     https://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
12# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
13# License for the specific language governing permissions and limitations under
14# the License.
15"""Reads data from ELF sections.
16
17This module provides tools for dumping the contents of an ELF section. It can
18also be used to read values at a particular address. A command line interface
19for both of these features is provided.
20
21This module supports any ELF-format file, including .o and .so files. This
22module also has basic support for archive (.a) files. All ELF files in an
23archive are read as one unit.
24"""
25
26import argparse
27from pathlib import Path
28import re
29import struct
30import sys
31from typing import BinaryIO, Dict, Iterable, NamedTuple, Optional
32from typing import Pattern, Tuple, Union
33
34ARCHIVE_MAGIC = b'!<arch>\n'
35ELF_MAGIC = b'\x7fELF'
36
37
38def _check_next_bytes(fd: BinaryIO, expected: bytes, what: str) -> None:
39    actual = fd.read(len(expected))
40    if expected != actual:
41        raise FileDecodeError(
42            f'Invalid {what}: expected {expected!r}, found {actual!r} in file '
43            f'{getattr(fd, "name", "(unknown")}')
44
45
46def files_in_archive(fd: BinaryIO) -> Iterable[int]:
47    """Seeks to each file in an archive and yields its size."""
48
49    _check_next_bytes(fd, ARCHIVE_MAGIC, 'archive magic number')
50
51    while True:
52        # In some archives, the first file ends with an additional \n. If that
53        # is present, skip it.
54        if fd.read(1) != b'\n':
55            fd.seek(-1, 1)
56
57        # Each file in an archive is prefixed with an ASCII header:
58        #
59        #   16 B - file identifier (text)
60        #   12 B - file modification timestamp (decimal)
61        #    6 B - owner ID (decimal)
62        #    6 B - group ID (decimal)
63        #    8 B - file mode (octal)
64        #   10 B - file size in bytes (decimal)
65        #    2 B - ending characters (`\n)
66        #
67        # Skip the unused portions of the file header, then read the size.
68        fd.seek(16 + 12 + 6 + 6 + 8, 1)
69        size_str = fd.read(10)
70        if not size_str:
71            return
72
73        try:
74            size = int(size_str, 10)
75        except ValueError as exc:
76            raise FileDecodeError(
77                'Archive file sizes must be decimal integers') from exc
78
79        _check_next_bytes(fd, b'`\n', 'archive file header ending')
80        offset = fd.tell()  # Store offset in case the caller reads the file.
81
82        yield size
83
84        fd.seek(offset + size)
85
86
87def _elf_files_in_archive(fd: BinaryIO):
88    if _bytes_match(fd, ELF_MAGIC):
89        yield  # The value isn't used, so just yield None.
90    else:
91        for _ in files_in_archive(fd):
92            if _bytes_match(fd, ELF_MAGIC):
93                yield
94
95
96class Field(NamedTuple):
97    """A field in an ELF file.
98
99    Fields refer to a particular piece of data in an ELF file or section header.
100    """
101
102    name: str
103    offset_32: int
104    offset_64: int
105    size_32: int
106    size_64: int
107
108
109class _FileHeader(NamedTuple):
110    """Fields in the ELF file header."""
111
112    section_header_offset: Field = Field('e_shoff', 0x20, 0x28, 4, 8)
113    section_count: Field = Field('e_shnum', 0x30, 0x3C, 2, 2)
114    section_names_index: Field = Field('e_shstrndx', 0x32, 0x3E, 2, 2)
115
116
117FILE_HEADER = _FileHeader()
118
119
120class _SectionHeader(NamedTuple):
121    """Fields in an ELF section header."""
122
123    section_name_offset: Field = Field('sh_name', 0x00, 0x00, 4, 4)
124    section_address: Field = Field('sh_addr', 0x0C, 0x10, 4, 8)
125    section_offset: Field = Field('sh_offset', 0x10, 0x18, 4, 8)
126    section_size: Field = Field('sh_size', 0x14, 0x20, 4, 8)
127
128    # section_header_end records the size of the header.
129    section_header_end: Field = Field('section end', 0x28, 0x40, 0, 0)
130
131
132SECTION_HEADER = _SectionHeader()
133
134
135def read_c_string(fd: BinaryIO) -> bytes:
136    """Reads a null-terminated string from the provided file descriptor."""
137    string = bytearray()
138    while True:
139        byte = fd.read(1)
140        if not byte or byte == b'\0':
141            return bytes(string)
142        string += byte
143
144
145def _bytes_match(fd: BinaryIO, expected: bytes) -> bool:
146    """Peeks at the next bytes to see if they match the expected."""
147    try:
148        offset = fd.tell()
149        data = fd.read(len(expected))
150        fd.seek(offset)
151        return data == expected
152    except IOError:
153        return False
154
155
156def compatible_file(file: Union[BinaryIO, str, Path]) -> bool:
157    """True if the file type is supported (ELF or archive)."""
158    try:
159        fd = open(file, 'rb') if isinstance(file, (str, Path)) else file
160
161        offset = fd.tell()
162        fd.seek(0)
163        result = _bytes_match(fd, ELF_MAGIC) or _bytes_match(fd, ARCHIVE_MAGIC)
164        fd.seek(offset)
165    finally:
166        if isinstance(file, (str, Path)):
167            fd.close()
168
169    return result
170
171
172class FileDecodeError(Exception):
173    """Invalid data was read from an ELF file."""
174
175
176class FieldReader:
177    """Reads ELF fields defined with a Field tuple from an ELF file."""
178    def __init__(self, elf: BinaryIO):
179        self._elf = elf
180        self.file_offset = self._elf.tell()
181
182        _check_next_bytes(self._elf, ELF_MAGIC, 'ELF file header')
183        size_field = self._elf.read(1)  # e_ident[EI_CLASS] (address size)
184
185        int_unpacker = self._determine_integer_format()
186
187        if size_field == b'\x01':
188            self.offset = lambda field: field.offset_32
189            self._size = lambda field: field.size_32
190            self._decode = lambda f, d: int_unpacker[f.size_32].unpack(d)[0]
191        elif size_field == b'\x02':
192            self.offset = lambda field: field.offset_64
193            self._size = lambda field: field.size_64
194            self._decode = lambda f, d: int_unpacker[f.size_64].unpack(d)[0]
195        else:
196            raise FileDecodeError('Unknown size {!r}'.format(size_field))
197
198    def _determine_integer_format(self) -> Dict[int, struct.Struct]:
199        """Returns a dict of structs used for converting bytes to integers."""
200        endianness_byte = self._elf.read(1)  # e_ident[EI_DATA] (endianness)
201        if endianness_byte == b'\x01':
202            endianness = '<'
203        elif endianness_byte == b'\x02':
204            endianness = '>'
205        else:
206            raise FileDecodeError(
207                'Unknown endianness {!r}'.format(endianness_byte))
208
209        return {
210            1: struct.Struct(endianness + 'B'),
211            2: struct.Struct(endianness + 'H'),
212            4: struct.Struct(endianness + 'I'),
213            8: struct.Struct(endianness + 'Q'),
214        }
215
216    def read(self, field: Field, base: int = 0) -> int:
217        self._elf.seek(self.file_offset + base + self.offset(field))
218        data = self._elf.read(self._size(field))
219        return self._decode(field, data)
220
221    def read_string(self, offset: int) -> str:
222        self._elf.seek(self.file_offset + offset)
223        return read_c_string(self._elf).decode()
224
225
226class Elf:
227    """Represents an ELF file and the sections in it."""
228    class Section(NamedTuple):
229        """Info about a section in an ELF file."""
230        name: str
231        address: int
232        offset: int
233        size: int
234
235        file_offset: int  # Starting place in the file; 0 unless in an archive.
236
237        def range(self) -> range:
238            return range(self.address, self.address + self.size)
239
240        def __lt__(self, other) -> bool:
241            return self.address < other.address
242
243    def __init__(self, elf: BinaryIO):
244        self._elf = elf
245        self.sections: Tuple[Elf.Section, ...] = tuple(self._list_sections())
246
247    def _list_sections(self) -> Iterable['Elf.Section']:
248        """Reads the section headers to enumerate all ELF sections."""
249        for _ in _elf_files_in_archive(self._elf):
250            reader = FieldReader(self._elf)
251            base = reader.read(FILE_HEADER.section_header_offset)
252            section_header_size = reader.offset(
253                SECTION_HEADER.section_header_end)
254
255            # Find the section with the section names in it.
256            names_section_header_base = (
257                base + section_header_size *
258                reader.read(FILE_HEADER.section_names_index))
259            names_table_base = reader.read(SECTION_HEADER.section_offset,
260                                           names_section_header_base)
261
262            base = reader.read(FILE_HEADER.section_header_offset)
263            for _ in range(reader.read(FILE_HEADER.section_count)):
264                name_offset = reader.read(SECTION_HEADER.section_name_offset,
265                                          base)
266
267                yield self.Section(
268                    reader.read_string(names_table_base + name_offset),
269                    reader.read(SECTION_HEADER.section_address, base),
270                    reader.read(SECTION_HEADER.section_offset, base),
271                    reader.read(SECTION_HEADER.section_size, base),
272                    reader.file_offset)
273
274                base += section_header_size
275
276    def section_by_address(self, address: int) -> Optional['Elf.Section']:
277        """Returns the section that contains the provided address, if any."""
278        # Iterate in reverse to give priority to sections with nonzero addresses
279        for section in sorted(self.sections, reverse=True):
280            if address in section.range():
281                return section
282
283        return None
284
285    def sections_with_name(self, name: str) -> Iterable['Elf.Section']:
286        for section in self.sections:
287            if section.name == name:
288                yield section
289
290    def read_value(self,
291                   address: int,
292                   size: Optional[int] = None) -> Union[None, bytes, int]:
293        """Reads specified bytes or null-terminated string at address."""
294        section = self.section_by_address(address)
295        if not section:
296            return None
297
298        assert section.address <= address
299        self._elf.seek(section.file_offset + section.offset + address -
300                       section.address)
301
302        if size is None:
303            return read_c_string(self._elf)
304
305        return self._elf.read(size)
306
307    def dump_sections(self, name: Union[str,
308                                        Pattern[str]]) -> Dict[str, bytes]:
309        """Dumps a binary string containing the sections matching the regex."""
310        name_regex = re.compile(name)
311
312        sections: Dict[str, bytes] = {}
313        for section in self.sections:
314            if name_regex.match(section.name):
315                self._elf.seek(section.file_offset + section.offset)
316                sections[section.name] = self._elf.read(section.size)
317
318        return sections
319
320    def dump_section_contents(
321            self, name: Union[str, Pattern[str]]) -> Optional[bytes]:
322        sections = self.dump_sections(name)
323        return b''.join(sections.values()) if sections else None
324
325    def summary(self) -> str:
326        return '\n'.join(
327            '[{0:2}] {1.address:08x} {1.offset:08x} {1.size:08x} {1.name}'.
328            format(i, section) for i, section in enumerate(self.sections))
329
330    def __str__(self) -> str:
331        return 'Elf({}\n)'.format(''.join('\n  {},'.format(s)
332                                          for s in self.sections))
333
334
335def _read_addresses(elf, size: int, output, address: Iterable[int]) -> None:
336    for addr in address:
337        value = elf.read_value(addr, size)
338
339        if value is None:
340            raise ValueError('Invalid address 0x{:08x}'.format(addr))
341
342        output(value)
343
344
345def _dump_sections(elf: Elf, output, sections: Iterable[Pattern[str]]) -> None:
346    if not sections:
347        output(elf.summary().encode())
348        return
349
350    for section_pattern in sections:
351        output(elf.dump_section_contents(section_pattern))
352
353
354def _parse_args() -> argparse.Namespace:
355    """Parses and returns command line arguments."""
356    parser = argparse.ArgumentParser(description=__doc__)
357
358    def hex_int(arg):
359        return int(arg, 16)
360
361    parser.add_argument('-e',
362                        '--elf',
363                        type=argparse.FileType('rb'),
364                        help='the ELF file to examine',
365                        required=True)
366
367    parser.add_argument(
368        '-d',
369        '--delimiter',
370        default=ord('\n'),
371        type=int,
372        help=r'delimiter to write after each value; \n by default')
373
374    parser.set_defaults(handler=lambda **_: parser.print_help())
375
376    subparsers = parser.add_subparsers(
377        help='select whether to work with addresses or whole sections')
378
379    section_parser = subparsers.add_parser('section')
380    section_parser.set_defaults(handler=_dump_sections)
381    section_parser.add_argument(
382        'sections',
383        metavar='section_regex',
384        nargs='*',
385        type=re.compile,  # type: ignore
386        help='section name regular expression')
387
388    address_parser = subparsers.add_parser('address')
389    address_parser.set_defaults(handler=_read_addresses)
390    address_parser.add_argument(
391        '--size',
392        type=int,
393        help='the size to read; reads until a null terminator by default')
394    address_parser.add_argument('address',
395                                nargs='+',
396                                type=hex_int,
397                                help='hexadecimal addresses to read')
398
399    return parser.parse_args()
400
401
402def _main(args):
403    """Calls the appropriate handler for the command line options."""
404    handler = args.handler
405    del args.handler
406
407    delim = args.delimiter
408    del args.delimiter
409
410    def output(value):
411        if value is not None:
412            sys.stdout.buffer.write(value)
413            sys.stdout.buffer.write(bytearray([delim]))
414            sys.stdout.flush()
415
416    args.output = output
417    args.elf = Elf(args.elf)
418
419    handler(**vars(args))
420
421
422if __name__ == '__main__':
423    _main(_parse_args())
424