1 /* Copyright (C) 2007-2010 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 ** GNU General Public License for more details.
11 */
12 
13 /*
14  * Contains declaration of ElfFile classes that encapsulate an ELF file.
15  */
16 
17 #ifndef ELFF_ELF_FILE_H_
18 #define ELFF_ELF_FILE_H_
19 
20 #include "dwarf_die.h"
21 #include "elf_mapped_section.h"
22 #include "elff_api.h"
23 #include "mapfile.h"
24 
25 /* Encapsulates architecture-independent functionality of an ELF file.
26  *
27  * This class is a base class for templated ElfFileImpl. This class implements
28  * functionality around an ELF file that is independent from particulars of the
29  * ELF's CPU architectire, while ElfFileImpl handles all particulars of CPU
30  * architecture (namely, 32 or 64-bit), for which ELF file has been built.
31  *
32  * NOTE: This class operates on ELF sections that have been mapped to memory.
33  *
34  */
35 class ElfFile {
36  public:
37   /* Constructs ElfFile instance. */
38   ElfFile();
39 
40   /* Destructs ElfFile instance. */
41   virtual ~ElfFile();
42 
43   /* Creates ElfFileImpl instance, depending on ELF file CPU architecture.
44    * This method will collect initial information about requested ELF file,
45    * and will instantiate appropriate ElfFileImpl class object for it.
46    * Param:
47    *  path - Full path to the ELF file.
48    * Return:
49    *  Initialized ElfFileImpl instance, typecasted back to ElfFile object on
50    *  success, or NULL on failure, with errno providing extended error
51    *  information.
52    */
53   static ElfFile* Create(const char* path);
54 
55   /* Checks if ELF file is a 64, or 32-bit ELF file. */
is_ELF_64()56   bool is_ELF_64() const {
57     return is_ELF_64_;
58   }
is_ELF_32()59   bool is_ELF_32() const {
60     return !is_ELF_64_;
61   }
62 
63   /* Checks if ELF file data format is big, or little-endian. */
is_elf_big_endian()64   bool is_elf_big_endian() const {
65     return is_elf_big_endian_;
66   }
is_elf_little_endian()67   bool is_elf_little_endian() const {
68     return !is_elf_big_endian_;
69   }
70 
71   /* Checks whether or not endianness of CPU this library is built for matches
72    * endianness of the ELF file that is represented with this instance. */
same_endianness()73   bool same_endianness() const {
74     return same_endianness_;
75   }
76 
77   /* Checks if format of DWARF data in this file is 64, or 32-bit. */
is_DWARF_64()78   bool is_DWARF_64() const {
79     return is_DWARF_64_;
80   }
is_DWARF_32()81   bool is_DWARF_32() const {
82     return !is_DWARF_64_;
83   }
84 
85   /* Gets DWARF objects allocator for this instance. */
allocator()86   class ElfAllocator* allocator() const {
87     return allocator_;
88   }
89 
90   /* Gets head of compilation unit list, collected during parsing of this file.
91    * NOTE: list of collected compilation units returned from this method is
92    * in reverse order relatively to the order CUs have been added to the list
93    * during ELF file parsing.
94    */
last_cu()95   class DwarfCU* last_cu() const {
96     return last_cu_;
97   }
98 
99   /* Gets number of compilation units, collected during parsing of
100    * this ELF file with parse_compilation_units() method.
101    */
cu_count()102   int cu_count() const {
103     return cu_count_;
104   }
105 
106   /* Gets  executable file flag */
is_exec()107   bool is_exec() const {
108       return is_exec_;
109   }
110 
111  protected:
112   /* Initializes ElfFile instance. This method is called from Create method of
113    * this class after appropriate ElfFileImpl instance has been created. Note,
114    * that Create() method will validate that requested file is an ELF file,
115    * prior to instantiating of an ElfFileImpl object, and calling this method.
116    * Param:
117    *  elf_hdr - Address of the common ELF file header.
118    *  path - See Create().
119    * Return:
120    *  true on success, or false on failure, with errno containing extended
121    *  error information.
122    */
123   virtual bool initialize(const Elf_CommonHdr* elf_hdr, const char* path);
124 
125 /*=============================================================================
126  * Endianness helper methods.
127  * Since endianness of ELF file may differ from the endianness of the CPU this
128  * library runs on, every time a value is required from a section of the ELF
129  * file, it must be first pulled out of that section to a local variable, and
130  * then used from that local variable. While value is pulled from ELF file
131  * section, it must be converted accordingly to the endianness of the CPU and
132  * ELF file. Routines bellow provide such functionality.
133 =============================================================================*/
134 
135  public:
136   /* Pulls one byte value from ELF file. Note that for one byte we don't need
137    * to do any endianness conversion, and these two methods are provided purely
138    * for completness of the API.
139    * Param:
140    *  val - References value inside ELF file buffer to pull data from.
141    * Return
142    *  Pulled value with endianness appropriate for the CPU this library is
143    *  running on.
144    */
pull_val(const uint8_t * val)145   uint8_t pull_val(const uint8_t* val) const {
146     return *val;
147   }
pull_val(const uint8_t & val)148   uint8_t pull_val(const uint8_t& val) const {
149     return val;
150   }
pull_val(const int8_t * val)151   int8_t pull_val(const int8_t* val) const {
152     return *val;
153   }
pull_val(const int8_t & val)154   int8_t pull_val(const int8_t& val) const {
155     return val;
156   }
157 
158   /* Pulls two byte value from ELF file.
159    * Param:
160    *  val - References value inside ELF file buffer to pull data from.
161    * Return
162    *  Pulled value with endianness appropriate for the CPU this library is
163    *  running on.
164    */
pull_val(const uint16_t * val)165   uint16_t pull_val(const uint16_t* val) const {
166     if (same_endianness()) {
167       return *val;
168     }
169     if (is_elf_big_endian()) {
170       return (uint16_t)get_byte(val, 0) << 8 | get_byte(val, 1);
171     } else {
172       return (uint16_t)get_byte(val, 1) << 8 | get_byte(val, 0);
173     }
174   }
pull_val(const uint16_t & val)175   uint16_t pull_val(const uint16_t& val) const {
176     return same_endianness() ? val : pull_val(&val);
177   }
pull_val(const int16_t * val)178   int16_t pull_val(const int16_t* val) const {
179     return static_cast<int16_t>
180               (pull_val(reinterpret_cast<const uint16_t*>(val)));
181   }
pull_val(const int16_t & val)182   int16_t pull_val(const int16_t& val) const {
183     return static_cast<int16_t>
184               (pull_val(reinterpret_cast<const uint16_t&>(val)));
185   }
186 
187   /* Pulls four byte value from ELF file.
188    * Param:
189    *  val - References value inside ELF file buffer to pull data from.
190    * Return
191    *  Pulled value with endianness appropriate for the CPU this library is
192    *  running on.
193    */
pull_val(const uint32_t * val)194   uint32_t pull_val(const uint32_t* val) const {
195     if (same_endianness()) {
196       return *val;
197     }
198     if (is_elf_big_endian()) {
199       return (uint32_t)get_byte(val, 0) << 24 |
200              (uint32_t)get_byte(val, 1) << 16 |
201              (uint32_t)get_byte(val, 2) << 8  |
202              (uint32_t)get_byte(val, 3);
203     } else {
204       return (uint32_t)get_byte(val, 3) << 24 |
205              (uint32_t)get_byte(val, 2) << 16 |
206              (uint32_t)get_byte(val, 1) << 8  |
207              (uint32_t)get_byte(val, 0);
208     }
209   }
pull_val(const uint32_t & val)210   uint32_t pull_val(const uint32_t& val) const {
211     return same_endianness() ? val : pull_val(&val);
212   }
pull_val(const int32_t * val)213   int32_t pull_val(const int32_t* val) const {
214     return static_cast<int32_t>
215               (pull_val(reinterpret_cast<const uint32_t*>(val)));
216   }
pull_val(const int32_t & val)217   int32_t pull_val(const int32_t& val) const {
218     return static_cast<int32_t>
219               (pull_val(reinterpret_cast<const uint32_t&>(val)));
220   }
221 
222   /* Pulls eight byte value from ELF file.
223    * Param:
224    *  val - References value inside ELF file buffer to pull data from.
225    * Return
226    *  Pulled value with endianness appropriate for the CPU this library is
227    *  running on.
228    */
pull_val(const uint64_t * val)229   uint64_t pull_val(const uint64_t* val) const {
230     if (same_endianness()) {
231       return *val;
232     }
233     if (is_elf_big_endian()) {
234       return (uint64_t)get_byte(val, 0) << 56 |
235              (uint64_t)get_byte(val, 1) << 48 |
236              (uint64_t)get_byte(val, 2) << 40 |
237              (uint64_t)get_byte(val, 3) << 32 |
238              (uint64_t)get_byte(val, 4) << 24 |
239              (uint64_t)get_byte(val, 5) << 16 |
240              (uint64_t)get_byte(val, 6) << 8  |
241              (uint64_t)get_byte(val, 7);
242     } else {
243       return (uint64_t)get_byte(val, 7) << 56 |
244              (uint64_t)get_byte(val, 6) << 48 |
245              (uint64_t)get_byte(val, 5) << 40 |
246              (uint64_t)get_byte(val, 4) << 32 |
247              (uint64_t)get_byte(val, 3) << 24 |
248              (uint64_t)get_byte(val, 2) << 16 |
249              (uint64_t)get_byte(val, 1) << 8  |
250              (uint64_t)get_byte(val, 0);
251     }
252   }
pull_val(const uint64_t & val)253   uint64_t pull_val(const uint64_t& val) const {
254     return same_endianness() ? val : pull_val(&val);
255   }
pull_val(const int64_t * val)256   int64_t pull_val(const int64_t* val) const {
257     return static_cast<int64_t>
258               (pull_val(reinterpret_cast<const uint64_t*>(val)));
259   }
pull_val(const int64_t & val)260   int64_t pull_val(const int64_t& val) const {
261     return static_cast<int64_t>
262               (pull_val(reinterpret_cast<const uint64_t&>(val)));
263   }
264 
265 //=============================================================================
266 // ELF file section management.
267 //=============================================================================
268 
269  public:
270   /* Gets a string contained in ELF's string section by index.
271    * Param:
272    *  index - String index (byte offset) in the ELF's string section.
273    * Return:
274    *  Pointer to the requested string, or NULL if string index exceeds ELF's
275    *  string section size.
276    *  NOTE: pointer returned from this method points to a mapped section of
277    *  ELF file.
278    */
get_str_sec_str(Elf_Xword index)279   const char* get_str_sec_str(Elf_Xword index) const {
280     assert(string_section_.is_mapped() && index < string_section_.size());
281     if (string_section_.is_mapped() && index < string_section_.size()) {
282       return INC_CPTR_T(char, string_section_.data(), index);
283     } else {
284       _set_errno(EINVAL);
285       return NULL;
286     }
287   }
288 
289   /* Gets a string contained in ELF's debug string section (.debug_str)
290    * by index.
291    * Param:
292    *  index - String index (byte offset) in the ELF's debug string section.
293    * Return:
294    *  Pointer to the requested string, or NULL if string index exceeds ELF's
295    *  debug string section size.
296    *  NOTE: pointer returned from this method points to a mapped section of
297    *  ELF file.
298    */
get_debug_str(Elf_Xword index)299   const char* get_debug_str(Elf_Xword index) const {
300     assert(debug_str_.is_mapped() && index < debug_str_.size());
301     if (debug_str_.is_mapped() && index < debug_str_.size()) {
302       return INC_CPTR_T(char, debug_str_.data(), index);
303     } else {
304       _set_errno(EINVAL);
305       return NULL;
306     }
307   }
308 
309  protected:
310   /* Gets pointer to a section header, given section index within ELF's
311    * section table.
312    * Param:
313    *  index - Section index within ELF's section table.
314    * Return:
315    *  Pointer to a section header (ElfXX_SHdr flavor, depending on ELF's CPU
316    *  architecture) on success, or NULL if section index exceeds number of
317    *  sections for this ELF file.
318    */
get_section_by_index(Elf_Half index)319   const void* get_section_by_index(Elf_Half index) const {
320     assert(index < sec_count_);
321     if (index < sec_count_) {
322       return INC_CPTR(sec_table_, static_cast<size_t>(index) * sec_entry_size_);
323     } else {
324       _set_errno(EINVAL);
325       return NULL;
326     }
327   }
328 
329 //=============================================================================
330 // DWARF management.
331 //=============================================================================
332 
333  protected:
334   /* Parses DWARF, and buids a list of compilation units for this ELF file.
335    * Compilation unit, collected with this methods are linked together in a
336    * list, head of which is available via last_cu() method of this class.
337    * NOTE: CUs in the list returned via last_cu() method are in reverse order
338    * relatively to the order in which CUs are stored in .debug_info section.
339    * This is ELF and DWARF data format - dependent method.
340    * Param:
341    *  parse_context - Parsing context that defines which tags, and which
342    *    properties for which tag should be collected during parsing. NULL
343    *    passed in this parameter indicates that all properties for all tags
344    *    should be collected.
345    * Return:
346    *  Number of compilation units, collected in this method on success,
347    *  or -1 on failure.
348    */
349   virtual int parse_compilation_units(const DwarfParseContext* parse_context) = 0;
350 
351  public:
352   /* Gets PC address information.
353    * Param:
354    *  address - PC address to get information for. The address must be relative
355    *    to the beginning of ELF file represented by this class.
356    *  address_info - Upon success contains information about routine(s) that
357    *    contain the given address.
358    * Return:
359    *  true if routine(s) containing has been found and its information has been
360    *  saved into address_info, or false if no appropriate routine for that
361    *  address has been found, or there was a memory error when collecting
362    *  routine(s) information. In case of failure, errno contains extended error
363    *  information.
364    */
365   bool get_pc_address_info(Elf_Xword address, Elf_AddressInfo* address_info);
366 
367   /* Frees resources aqcuired for address information in successful call to
368    * get_pc_address_info().
369    * Param:
370    *  address_info - Address information structure, initialized in successful
371    *    call to get_pc_address_info() routine.
372    */
373   void free_pc_address_info(Elf_AddressInfo* address_info) const;
374 
375   /* Gets beginning of the .debug_info section data.
376    * Return:
377    *  Beginning of the .debug_info section data.
378    *  NOTE: pointer returned from this method points to a mapped section of
379    *  ELF file.
380    */
get_debug_info_data()381   const void* get_debug_info_data() const {
382     return debug_info_.data();
383   }
384 
385   /* Gets beginning of the .debug_abbrev section data.
386    * Return:
387    *  Beginning of the .debug_abbrev section data.
388    *  NOTE: pointer returned from this method points to a mapped section of
389    *  ELF file.
390    */
get_debug_abbrev_data()391   const void* get_debug_abbrev_data() const {
392     return debug_abbrev_.data();
393   }
394 
395   /* Gets beginning of the .debug_ranges section data.
396    * Return:
397    *  Beginning of the .debug_ranges section data.
398    *  NOTE: pointer returned from this method points to a mapped section of
399    *  ELF file.
400    */
get_debug_ranges_data()401   const void* get_debug_ranges_data() const {
402     return debug_ranges_.data();
403   }
404 
405   /* Gets beginning of the .debug_line section data.
406    * Return:
407    *  Beginning of the .debug_line section data.
408    *  NOTE: pointer returned from this method points to a mapped section of
409    *  ELF file.
410    */
get_debug_line_data()411   const void* get_debug_line_data() const {
412     return debug_line_.data();
413   }
414 
415   /* Checks, if given address range is contained in the mapped .debug_info
416    * section of this file.
417    * Param:
418    *  ptr - Starting address of the range.
419    *  size - Range size in bytes.
420    * Return:
421    *  true if given address range is contained in the mapped .debug_info
422    *  section of this file, or false if any part of the range doesn't belong
423    *  to that section.
424    */
is_valid_die_ptr(const void * ptr,size_t size)425   bool is_valid_die_ptr(const void* ptr, size_t size) const {
426     return debug_info_.is_contained(ptr, size);
427   }
428 
429   /* Checks, if given address range is contained in the mapped .debug_abbrev
430    * section of this file.
431    * Param:
432    *  ptr - Starting address of the range.
433    *  size - Range size in bytes.
434    * Return:
435    *  true if given address range is contained in the mapped .debug_abbrev
436    *  section of this file, or false if any part of the range doesn't belong
437    *  to that section.
438    */
is_valid_abbr_ptr(const void * ptr,size_t size)439   bool is_valid_abbr_ptr(const void* ptr, size_t size) const {
440     return debug_abbrev_.is_contained(ptr, size);
441   }
442 
443   /* Checks if given pointer addresses a valid compilation unit header in the
444    * mapped .debug_info section of the ELF file.
445    * Param:
446    *  cu_header - Pointer to a compilation unit header to check.
447    * Return
448    *  true, if given pointer addresses a valid compilation unit header, or
449    *  false, if it's not. A valid CU header must be fully conained inside
450    *  .debug_info section of the ELF file, and its size must not be zero.
451    */
is_valid_cu(const void * cu_header)452   bool is_valid_cu(const void* cu_header) const {
453     if (is_DWARF_64()) {
454       return is_valid_die_ptr(cu_header, sizeof(Dwarf64_CUHdr)) &&
455              reinterpret_cast<const Dwarf64_CUHdr*>(cu_header)->size_hdr.size != 0;
456     } else {
457       return is_valid_die_ptr(cu_header, sizeof(Dwarf32_CUHdr)) &&
458              reinterpret_cast<const Dwarf32_CUHdr*>(cu_header)->size_hdr.size != 0;
459     }
460   }
461 
462   /* Gets range's low and high pc for the given range reference in the mapped
463    * .debug_ranges section of an ELF file.
464    * Template param:
465    *  AddrType - Defines pointer type for the CU the range belongs to. CU's
466    *    pointer type can be defined independently from ELF and DWARF types,
467    *    and is encoded in address_size field of the CU header in .debug_info
468    *    section of ELF file.
469    * Param:
470    *  offset - Byte offset within .debug_ranges section of the range record.
471    *  low - Upon successful return contains value for range's low pc.
472    *  high - Upon successful return contains value for range's high pc.
473    * Return:
474    *  true on success, or false, if requested record is not fully contained
475    *  in the .debug_ranges section.
476    */
477   template<typename AddrType>
get_range(Elf_Word offset,AddrType * low,AddrType * high)478   bool get_range(Elf_Word offset, AddrType* low, AddrType* high) {
479     const AddrType* ptr = INC_CPTR_T(AddrType, debug_ranges_.data(), offset);
480     assert(debug_ranges_.is_contained(ptr, sizeof(AddrType) * 2));
481     if (!debug_ranges_.is_contained(ptr, sizeof(AddrType) * 2)) {
482       _set_errno(EINVAL);
483       return false;
484     }
485     *low = pull_val(ptr);
486     *high = pull_val(ptr + 1);
487     return true;
488   }
489 
490  protected:
491   /* Mapped ELF string section. */
492   ElfMappedSection    string_section_;
493 
494   /* Mapped .debug_info section. */
495   ElfMappedSection    debug_info_;
496 
497   /* Mapped .debug_abbrev section. */
498   ElfMappedSection    debug_abbrev_;
499 
500   /* Mapped .debug_str section. */
501   ElfMappedSection    debug_str_;
502 
503   /* Mapped .debug_line section. */
504   ElfMappedSection    debug_line_;
505 
506   /* Mapped .debug_ranges section. */
507   ElfMappedSection    debug_ranges_;
508 
509   /* Base address of the loaded module (if fixed), or 0 if module doesn't get
510    * loaded at fixed address. */
511   Elf_Xword           fixed_base_address_;
512 
513   /* Handle to the ELF file represented with this instance. */
514   MapFile*            elf_handle_;
515 
516   /* Path to the ELF file represented with this instance. */
517   char*               elf_file_path_;
518 
519   /* DWARF objects allocator for this instance. */
520   class ElfAllocator* allocator_;
521 
522   /* Beginning of the cached ELF's section table. */
523   void*               sec_table_;
524 
525   /* Number of sections in the ELF file wrapped by this instance. */
526   Elf_Half            sec_count_;
527 
528   /* Byte size of an entry in the section table. */
529   Elf_Half            sec_entry_size_;
530 
531   /* Head of compilation unit list, collected during the parsing. */
532   class DwarfCU*      last_cu_;
533 
534   /* Number of compilation units in last_cu_ list. */
535   int                 cu_count_;
536 
537   /* Flags ELF's CPU architecture: 64 (true), or 32 bits (false). */
538   bool                is_ELF_64_;
539 
540   /* Flags endianness of the processed ELF file. true indicates that ELF file
541    * data is stored in big-endian form, false indicates that ELF file data is
542    * stored in big-endian form.
543    */
544   bool                is_elf_big_endian_;
545 
546   /* Flags whether or not endianness of CPU this library is built for matches
547    * endianness of the ELF file that is represented with this instance.
548    */
549   bool                same_endianness_;
550 
551   /* Flags DWARF format: 64, or 32 bits. DWARF format is determined by looking
552    * at the first 4 bytes of .debug_info section (which is the beginning of the
553    * first compilation unit header). If first 4 bytes contain 0xFFFFFFFF, the
554    * DWARF is 64 bit. Otherwise, DWARF is 32 bit. */
555   bool                is_DWARF_64_;
556 
557   /* Flags executable file. If this member is 1, ELF file represented with this
558    * instance is an executable. If this member is 0, file is a shared library.
559    */
560   bool                is_exec_;
561 };
562 
563 /* Encapsulates architecture-dependent functionality of an ELF file.
564  * Template param:
565  *  Elf_Addr - type for an address field in ELF file. Must be:
566  *    - Elf32_Addr for 32-bit CPU, or
567  *    - Elf64_Addr for 64-bit CPU.
568  *  Elf_Off - type for an offset field in ELF file. Must be:
569  *    - Elf64_Off for 32-bit CPU, or
570  *    - Elf64_Off for 64-bit CPU.
571  */
572 template <typename Elf_Addr, typename Elf_Off>
573 class ElfFileImpl : protected ElfFile {
574 /* Instance of this class must be instantiated from
575  * ElfFile::Create() method only. */
576 friend class ElfFile;
577  protected:
578   /* Constructs ElfFileImpl instance. */
ElfFileImpl()579   ElfFileImpl() {
580   };
581 
582   /* Destructs ElfFileImpl instance. */
~ElfFileImpl()583   ~ElfFileImpl() {
584   }
585 
586  protected:
587   /* Initializes instance. This is an override of the base class method.
588    * See ElfFile::initialize().
589    */
590   bool initialize(const Elf_CommonHdr* elf_hdr, const char* path);
591 
592   /* Parses DWARF, and buids list of compilation units for this ELF file.
593    * This is an implementation of the base class' abstract method.
594    * See ElfFile::parse_compilation_units().
595    */
596   virtual int parse_compilation_units(const DwarfParseContext* parse_context);
597 
598   /* Gets section information by section name.
599    * Param:
600    *  name - Name of the section to get information for.
601    *  offset - Upon success contains offset of the section data in ELF file.
602    *  size - Upon success contains size of the section data in ELF file.
603    * Return:
604    *  true on sucess, or false if section with such name doesn't exist in
605    *  this ELF file.
606    */
607   bool get_section_info_by_name(const char* name,
608                                 Elf_Off* offset,
609                                 Elf_Word* size);
610 
611   /* Maps section by its name.
612    * Param:
613    *  name - Name of the section to map.
614    *  section - Upon success contains section's mapping information.
615    * Return:
616    *  true on sucess, or false if section with such name doesn't exist in
617    *  this ELF file, or mapping has failed.
618    */
619   bool map_section_by_name(const char* name, ElfMappedSection* section);
620 };
621 
622 #endif  // ELFF_ELF_FILE_H_
623