1 /* Copyright (C) 2007-2010 The Android Open Source Project
2 **
3 ** This software is licensed under the terms of the GNU General Public
4 ** License version 2, as published by the Free Software Foundation, and
5 ** may be copied, distributed, and modified under those terms.
6 **
7 ** This program is distributed in the hope that it will be useful,
8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 ** GNU General Public License for more details.
11 */
12 
13 /*
14  * Contains declarations of types, constants and structures
15  * describing DWARF format.
16  */
17 
18 #ifndef ELFF_DWARF_DEFS_H_
19 #define ELFF_DWARF_DEFS_H_
20 
21 #include "dwarf.h"
22 #include "elf_defs.h"
23 
24 /* DWARF structures are packed to 1 byte. */
25 #define ELFF_PACKED __attribute__ ((packed))
26 
27 /*
28  * Helper types for misc. DWARF variables.
29  */
30 
31 /* Type for DWARF abbreviation number. */
32 typedef uint32_t  Dwarf_AbbrNum;
33 
34 /* Type for DWARF tag ID. */
35 typedef uint16_t  Dwarf_Tag;
36 
37 /* Type for DWARF attribute ID. */
38 typedef uint16_t  Dwarf_At;
39 
40 /* Type for DWARF form ID. */
41 typedef uint16_t  Dwarf_Form;
42 
43 /* Type for offset in 32-bit DWARF. */
44 typedef uint32_t  Dwarf32_Off;
45 
46 /* Type for offset in 64-bit DWARF. */
47 typedef uint64_t  Dwarf64_Off;
48 
49 /* Enumerates types of values, obtained during DWARF attribute decoding. */
50 typedef enum DwarfValueType {
51   /* Undefined */
52   DWARF_VALUE_UNKNOWN = 1,
53 
54   /* uint8_t */
55   DWARF_VALUE_U8,
56 
57   /* int8_t */
58   DWARF_VALUE_S8,
59 
60   /* uint16_t */
61   DWARF_VALUE_U16,
62 
63   /* int16_t */
64   DWARF_VALUE_S16,
65 
66   /* uint32_t */
67   DWARF_VALUE_U32,
68 
69   /* int32_t */
70   DWARF_VALUE_S32,
71 
72   /* uint64_t */
73   DWARF_VALUE_U64,
74 
75   /* int64_t */
76   DWARF_VALUE_S64,
77 
78   /* const char* */
79   DWARF_VALUE_STR,
80 
81   /* 32-bit address */
82   DWARF_VALUE_PTR32,
83 
84   /* 64-bit address */
85   DWARF_VALUE_PTR64,
86 
87   /* Dwarf_Block */
88   DWARF_VALUE_BLOCK,
89 } DwarfValueType;
90 
91 /* Describes block of data, stored directly in the mapped .debug_info
92  * section. This type is used to represent an attribute encoded with
93  * DW_FORM_block# form.
94  */
95 typedef struct Dwarf_Block {
96   /* Pointer to the block data inside mapped .debug_info section. */
97   const void*   block_ptr;
98 
99   /* Byte size of the block data. */
100   Elf_Word      block_size;
101 } Dwarf_Block;
102 
103 /* Describes a value, obtained from the mapped .debug_info section
104  * during DWARF attribute decoding.
105  */
106 typedef struct Dwarf_Value {
107   /* Unites all possible data types for the value.
108    * See DwarfValueType for the list of types.
109    */
110   union {
111     Elf_Byte      u8;
112     Elf_Sbyte     s8;
113     Elf_Half      u16;
114     Elf_Shalf     s16;
115     Elf_Word      u32;
116     Elf_Sword     s32;
117     Elf_Xword     u64;
118     Elf_Sxword    s64;
119     Elf_Word      ptr32;
120     Elf_Xword     ptr64;
121     const char*   str;
122     Dwarf_Block   block;
123   };
124 
125   /* Value type (defines which variable in the union abowe
126    * contains the value).
127    */
128   DwarfValueType  type;
129 
130   /* Number of bytes that encode this value in .debug_info section
131    * of ELF file.
132    */
133   Elf_Word        encoded_size;
134 } Dwarf_Value;
135 
136 /* DWARF's LEB128 data type. LEB128 is defined as:
137  * Variable Length Data. "Little Endian Base 128" (LEB128) numbers. LEB128 is
138  * a scheme for encoding integers densely that exploits the assumption that
139  * most integers are small in magnitude. (This encoding is equally suitable
140  * whether the target machine architecture represents data in big-endian or
141  * littleendian order. It is "little endian" only in the sense that it avoids
142  * using space to represent the "big" end of an unsigned integer, when the big
143  * end is all zeroes or sign extension bits).
144  *
145  * Unsigned LEB128 numbers are encoded as follows: start at the low order end
146  * of an unsigned integer and chop it into 7-bit chunks. Place each chunk into
147  * the low order 7 bits of a byte. Typically, several of the high order bytes
148  * will be zero; discard them. Emit the remaining bytes in a stream, starting
149  * with the low order byte; set the high order bit on each byte except the last
150  * emitted byte. The high bit of zero on the last byte indicates to the decoder
151  * that it has encountered the last byte. The integer zero is a special case,
152  * consisting of a single zero byte.
153  *
154  * The encoding for signed LEB128 numbers is similar, except that the criterion
155  * for discarding high order bytes is not whether they are zero, but whether
156  * they consist entirely of sign extension bits. Consider the 32-bit integer
157  * -2. The three high level bytes of the number are sign extension, thus LEB128
158  * would represent it as a single byte containing the low order 7 bits, with
159  * the high order bit cleared to indicate the end of the byte stream. Note that
160  * there is nothing within the LEB128 representation that indicates whether an
161  * encoded number is signed or unsigned. The decoder must know what type of
162  * number to expect.
163  *
164  * NOTE: It's assumed that LEB128 will not contain encodings for integers,
165  * larger than 64 bit.
166 */
167 typedef struct ELFF_PACKED Dwarf_Leb128 {
168   /* Beginning of the LEB128 block. */
169   Elf_Byte  val;
170 
171   /* Pulls actual value, encoded with this LEB128 block.
172    * Param:
173    *  value - Upon return will contain value, encoded with this LEB128 block.
174    *  sign - If true, the caller expects the LEB128 to contain a signed
175    *    integer, otherwise, caller expects an unsigned integer value to be
176    *    encoded with this LEB128 block.
177    */
get_commonDwarf_Leb128178   void get_common(Dwarf_Value* value, bool sign) const {
179     value->u64 = 0;
180     /* Integer zero is a special case. */
181     if (val == 0) {
182       value->type = sign ? DWARF_VALUE_S32 : DWARF_VALUE_U32;
183       value->encoded_size = 1;
184       return;
185     }
186 
187     /* We've got to reconstruct the integer. */
188     value->type = DWARF_VALUE_UNKNOWN;
189     value->encoded_size = 0;
190 
191     /* Byte by byte loop though the LEB128, reconstructing the integer from
192      * 7-bits chunks. Byte with 8-th bit set to zero indicates the end
193      * of the LEB128 block. For signed integers, 7-th bit of the last LEB128
194      * byte controls the sign. If 7-th bit of the last LEB128 byte is set,
195      * the integer is negative. If 7-th bit of the last LEB128 byte is not
196      * set, the integer is positive.
197      */
198     const Elf_Byte* cur = &val;
199     Elf_Word shift = 0;
200     while ((*cur & 0x80) != 0) {
201       value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
202       shift += 7;
203       value->encoded_size++;
204       cur++;
205     }
206     value->u64 |= (static_cast<Elf_Xword>(*cur) & 0x7F) << shift;
207     value->encoded_size++;
208 
209     /* LEB128 format doesn't carry any info of the sizeof of the integer it
210      * represents. We well guess it, judging by the highest bit set in the
211      * reconstucted integer.
212      */
213     if ((value->u64 & 0xFFFFFFFF00000000LL) == 0) {
214       /* 32-bit integer. */
215       if (sign) {
216         value->type = DWARF_VALUE_S32;
217         if (((*cur) & 0x40) != 0) {
218           // Value is negative.
219           value->u64 |= - (1 << (shift + 7));
220         } else if ((value->u32 & 0x80000000) != 0) {
221           // Make sure we don't report negative value in this case.
222           value->type = DWARF_VALUE_S64;
223         }
224       } else {
225         value->type = DWARF_VALUE_U32;
226       }
227     } else {
228       /* 64-bit integer. */
229       if (sign) {
230         value->type = DWARF_VALUE_S64;
231         if (((*cur) & 0x40) != 0) {
232           // Value is negative.
233           value->u64 |= - (1 << (shift + 7));
234         }
235       } else {
236         value->type = DWARF_VALUE_U64;
237       }
238     }
239   }
240 
241   /* Pulls actual unsigned value, encoded with this LEB128 block.
242    * See get_common() for more info.
243    * Param:
244    *  value - Upon return will contain unsigned value, encoded with
245    *  this LEB128 block.
246    */
get_unsignedDwarf_Leb128247   void get_unsigned(Dwarf_Value* value) const {
248     get_common(value, false);
249   }
250 
251   /* Pulls actual signed value, encoded with this LEB128 block.
252    * See get_common() for more info.
253    * Param:
254    *  value - Upon return will contain signed value, encoded with
255    *  this LEB128 block.
256    */
get_signedDwarf_Leb128257   void get_signed(Dwarf_Value* value) const {
258     get_common(value, true);
259   }
260 
261   /* Pulls LEB128 value, advancing past this LEB128 block.
262    * See get_common() for more info.
263    * Return:
264    *  Pointer to the byte past this LEB128 block.
265    */
processDwarf_Leb128266   const void* process(Dwarf_Value* value, bool sign) const {
267     get_common(value, sign);
268     return INC_CPTR(&val, value->encoded_size);
269   }
270 
271   /* Pulls LEB128 unsigned value, advancing past this LEB128 block.
272    * See process() for more info.
273    */
process_unsignedDwarf_Leb128274   const void* process_unsigned(Dwarf_Value* value) const {
275     return process(value, false);
276   }
277 
278   /* Pulls LEB128 signed value, advancing past this LEB128 block.
279    * See process() for more info.
280    */
process_signedDwarf_Leb128281   const void* process_signed(Dwarf_Value* value) const {
282     return process(value, true);
283   }
284 } Dwarf_Leb128;
285 
286 /* DIE attribute descriptor in the .debug_abbrev section.
287  * Attribute descriptor contains two LEB128 values. First one provides
288  * attribute ID (one of DW_AT_XXX values), and the second one provides
289  * format (one of DW_FORMAT_XXX values), in which attribute value is
290  * encoded in the .debug_info section of the ELF file.
291  */
292 typedef struct ELFF_PACKED Dwarf_Abbr_AT {
293   /* Attribute ID (DW_AT_XXX).
294    * Attribute format (DW_FORMAT_XXX) follows immediately.
295    */
296   Dwarf_Leb128  at;
297 
298   /* Checks if this is a separator descriptor.
299    * Zero is an invalid attribute ID, indicating the end of attribute
300    * list for the current DIE.
301    */
is_separatorDwarf_Abbr_AT302   bool is_separator() const {
303     return at.val == 0;
304   }
305 
306   /* Pulls attribute data, advancing past this descriptor.
307    * Param:
308    *  at_value - Upon return contains attribute value of this descriptor.
309    *  form - Upon return contains form value of this descriptor.
310    * Return:
311    *  Pointer to the byte past this descriptor block (usually, next
312    *  attribute decriptor).
313    */
processDwarf_Abbr_AT314   const Dwarf_Abbr_AT* process(Dwarf_At* at_value, Dwarf_Form* form) const {
315     if (is_separator()) {
316       /* Size of separator descriptor is always 2 bytes. */
317       *at_value = 0;
318       *form = 0;
319       return INC_CPTR_T(Dwarf_Abbr_AT, &at.val, 2);
320     }
321 
322     Dwarf_Value val;
323 
324     /* Process attribute ID. */
325     const Dwarf_Leb128* next =
326         reinterpret_cast<const Dwarf_Leb128*>(at.process_unsigned(&val));
327     *at_value = val.u16;
328 
329     /* Follow with processing the form. */
330     next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
331     *form = val.u16;
332     return reinterpret_cast<const Dwarf_Abbr_AT*>(next);
333   }
334 } Dwarf_Abbr_AT;
335 
336 /* DIE abbreviation descriptor in the .debug_abbrev section.
337  * DIE abbreviation descriptor contains three parameters. The first one is a
338  * LEB128 value, that encodes 1 - based abbreviation descriptor number.
339  * Abbreviation descriptor numbers seems to be always in sequential order, and
340  * are counted on per-compilation unit basis. I.e. abbreviation number for the
341  * first DIE abbreviation descriptor of each compilation unit is always 1.
342  *
343  * Besides abbreviation number, DIE abbreviation descriptor contains two more
344  * values. The first one (after abbr_num) is a LEB128 value containing DIE's
345  * tag value, and the second one is one byte flag specifying whether or not
346  * the DIE contains any cildren.
347  *
348  * This descriptor is immediately followed by a list of attribute descriptors
349  * (see Dwarf_Abbr_AT) for the DIE represented by this abbreviation descriptor.
350  */
351 typedef struct ELFF_PACKED Dwarf_Abbr_DIE {
352   /* 1 - based abbreviation number for the DIE. */
353   Dwarf_Leb128  abbr_num;
354 
355   /* Gets abbreviation number for this descriptor. */
get_abbr_numDwarf_Abbr_DIE356   Dwarf_AbbrNum get_abbr_num() const {
357     Dwarf_Value val;
358     abbr_num.get_unsigned(&val);
359     return val.u16;
360   }
361 
362   /* Gets DIE tag for this descriptor. */
get_tagDwarf_Abbr_DIE363   Dwarf_Tag get_tag() const {
364     Dwarf_Tag tag;
365     process(NULL, &tag);
366     return tag;
367   }
368 
369   /* Pulls DIE abbreviation descriptor data, advancing past this descriptor.
370    * Param:
371    *  abbr_index - Upon return contains abbreviation number for this
372    *    descriptor. This parameter can be NULL, if the caller is not interested
373    *    in this value.
374    *  tag - Upon return contains tag of the DIE for this descriptor. This
375    *    parameter can be NULL, if the caller is not interested in this value.
376    *  form - Upon return contains form of the DIE for this descriptor.
377    * Return:
378    *  Pointer to the list of attribute descriptors for the DIE.
379    */
processDwarf_Abbr_DIE380   const Dwarf_Abbr_AT* process(Dwarf_AbbrNum* abbr_index,
381                                Dwarf_Tag* tag) const {
382     Dwarf_Value val;
383     const Dwarf_Leb128* next =
384         reinterpret_cast<const Dwarf_Leb128*>(abbr_num.process_unsigned(&val));
385     if (abbr_index != NULL) {
386       *abbr_index = val.u32;
387     }
388 
389     /* Next one is a "tag". */
390     next = reinterpret_cast<const Dwarf_Leb128*>(next->process_unsigned(&val));
391     if (tag != NULL) {
392       *tag = val.u16;
393     }
394 
395     /* Next one is a "has children" one byte flag. We're not interested in it,
396      * so jump to the list of attribute descriptors that immediately follows
397      * this DIE descriptor. */
398     return INC_CPTR_T(Dwarf_Abbr_AT, next, 1);
399   }
400 } Dwarf_Abbr_DIE;
401 
402 /* DIE descriptor in the .debug_info section.
403  * DIE descriptor contains one LEB128-encoded value, containing DIE's
404  * abbreviation descriptor number in the .debug_abbrev section.
405  *
406  * DIE descriptor is immediately followed by the list of DIE attribute values,
407  * format of wich is defined by the list of attribute descriptors in the
408  * .debug_abbrev section, that immediately follow the DIE attribute descriptor,
409  * addressed by this descriptor's abbr_num LEB128.
410  */
411 typedef struct ELFF_PACKED Dwarf_DIE {
412   /* 1 - based index of DIE abbreviation descriptor (Dwarf_Abbr_DIE) for this
413    * DIE in the .debug_abbrev section.
414    *
415    * NOTE: DIE abbreviation descriptor indexes are tied to the compilation
416    * unit. In other words, each compilation unit restarts counting DIE
417    * abbreviation descriptors from 1.
418    *
419    * NOTE: Zero is invalid value for this field, indicating that this DIE is a
420    * separator (usually it ends a list of "child" DIEs)
421    */
422   Dwarf_Leb128  abbr_num;
423 
424   /* Checks if this is a separator DIE. */
is_separatorDwarf_DIE425   bool is_separator() const {
426     return abbr_num.val == 0;
427   }
428 
429   /* Gets (1 - based) abbreviation number for this DIE. */
get_abbr_numDwarf_DIE430   Dwarf_AbbrNum get_abbr_num() const {
431     Dwarf_Value val;
432     abbr_num.get_unsigned(&val);
433     return val.u16;
434   }
435 
436   /* Pulls DIE information, advancing past this descriptor to DIE attributes.
437    * Param:
438    *  abbr_num - Upon return contains abbreviation number for this DIE. This
439    *    parameter can be NULL, if the caller is not interested in this value.
440    * Return:
441    *  Pointer to the byte past this descriptor (the list of DIE attributes).
442    */
processDwarf_DIE443   const Elf_Byte* process(Dwarf_AbbrNum* abbr_number) const {
444     if (is_separator()) {
445       if (abbr_number != NULL) {
446         *abbr_number = 0;
447       }
448       // Size of a separator DIE is 1 byte.
449       return INC_CPTR_T(Elf_Byte, &abbr_num.val, 1);
450     }
451     Dwarf_Value val;
452     const void* ret = abbr_num.process_unsigned(&val);
453     if (abbr_number != NULL) {
454       *abbr_number = val.u32;
455     }
456     return reinterpret_cast<const Elf_Byte*>(ret);
457   }
458 } Dwarf_DIE;
459 
460 /*
461  * Variable size headers.
462  * When encoding size value in DWARF, the first 32 bits of a "size" header
463  * define header type. If first 32 bits of the header contain 0xFFFFFFFF
464  * value, this is 64-bit size header with the following 64 bits encoding
465  * the size. Otherwise, if first 32 bits are not 0xFFFFFFFF, they contain
466  * 32-bit size value.
467  */
468 
469 /* Size header for 32-bit DWARF. */
470 typedef struct ELFF_PACKED Dwarf32_SizeHdr {
471   /* Size value. */
472   Elf_Word  size;
473 } Dwarf32_SizeHdr;
474 
475 /* Size header for 64-bit DWARF. */
476 typedef struct ELFF_PACKED Dwarf64_SizeHdr {
477   /* Size selector. For 64-bit DWARF this field is set to 0xFFFFFFFF */
478   Elf_Word  size_selector;
479 
480   /* Actual size value. */
481   Elf_Xword   size;
482 } Dwarf64_SizeHdr;
483 
484 /* Compilation unit header in the .debug_info section.
485  * Template param:
486  *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
487  *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
488  *  Elf_Off - Type for abbrev_offset field. Must be Elf_Word for for 32-bit
489  *    DWARF, or Elf_Xword for 64-bit DWARF.
490  */
491 template <typename Dwarf_SizeHdr, typename Elf_Off>
492 struct ELFF_PACKED Dwarf_CUHdr {
493   /* Size of the compilation unit data in .debug_info section. */
494   Dwarf_SizeHdr   size_hdr;
495 
496   /* Compilation unit's DWARF version stamp. */
497   Elf_Half        version;
498 
499   /* Relative (to the beginning of .debug_abbrev section data) offset of the
500    * beginning of abbreviation sequence for this compilation unit.
501    */
502   Elf_Off         abbrev_offset;
503 
504   /* Pointer size for this compilation unit (should be 4, or 8). */
505   Elf_Byte        address_size;
506 };
507 /* Compilation unit header in the .debug_info section for 32-bit DWARF. */
508 typedef Dwarf_CUHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_CUHdr;
509 /* Compilation unit header in the .debug_info section for 64-bit DWARF. */
510 typedef Dwarf_CUHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_CUHdr;
511 
512 /* CU STMTL header in the .debug_line section.
513  * Template param:
514  *  Dwarf_SizeHdr - Type for the header's size field. Must be Dwarf32_SizeHdr
515  *    for 32-bit DWARF, or Dwarf64_SizeHdr for 64-bit DWARF.
516  *  Elf_Size - Type for header_length field. Must be Elf_Word for for 32-bit
517  *    DWARF, or Elf_Xword for 64-bit DWARF.
518  */
519 template <typename Dwarf_SizeHdr, typename Elf_Size>
520 struct ELFF_PACKED Dwarf_STMTLHdr {
521   /* The size in bytes of the line number information for this compilation
522    * unit, not including the unit_length field itself. */
523   Dwarf_SizeHdr unit_length;
524 
525   /* A version number. This number is specific to the line number information
526    * and is independent of the DWARF version number. */
527   Elf_Half      version;
528 
529   /* The number of bytes following the header_length field to the beginning of
530    * the first byte of the line number program itself. In the 32-bit DWARF
531    * format, this is a 4-byte unsigned length; in the 64-bit DWARF format,
532    * this field is an 8-byte unsigned length. */
533   Elf_Size      header_length;
534 
535   /* The size in bytes of the smallest target machine instruction. Line number
536    * program opcodes that alter the address register first multiply their
537    * operands by this value. */
538   Elf_Byte      min_instruction_len;
539 
540   /* The initial value of the is_stmt register. */
541   Elf_Byte      default_is_stmt;
542 
543   /* This parameter affects the meaning of the special opcodes. */
544   Elf_Sbyte     line_base;
545 
546   /* This parameter affects the meaning of the special opcodes. */
547   Elf_Byte      line_range;
548 
549   /* The number assigned to the first special opcode. */
550   Elf_Byte      opcode_base;
551 
552   /* This is first opcode in an array specifying the number of LEB128 operands
553    * for each of the standard opcodes. The first element of the array
554    * corresponds to the opcode whose value is 1, and the last element
555    * corresponds to the opcode whose value is opcode_base - 1. By increasing
556    * opcode_base, and adding elements to this array, new standard opcodes can
557    * be added, while allowing consumers who do not know about these new opcodes
558    * to be able to skip them. NOTE: this array points to the mapped
559    * .debug_line section. */
560   Elf_Byte      standard_opcode_lengths;
561 };
562 /* CU STMTL header in the .debug_line section for 32-bit DWARF. */
563 typedef Dwarf_STMTLHdr<Dwarf32_SizeHdr, Elf_Word> Dwarf32_STMTLHdr;
564 /* CU STMTL header in the .debug_line section for 64-bit DWARF. */
565 typedef Dwarf_STMTLHdr<Dwarf64_SizeHdr, Elf_Xword> Dwarf64_STMTLHdr;
566 
567 /* Source file descriptor in the .debug_line section.
568  * Descriptor begins with zero-terminated file name, followed by an ULEB128,
569  * encoding directory index in the list of included directories, followed by
570  * an ULEB12, encoding file modification time, followed by an ULEB12, encoding
571  * file size.
572  */
573 typedef struct ELFF_PACKED Dwarf_STMTL_FileDesc {
574   /* Zero-terminated file name. */
575   char  file_name[1];
576 
577   /* Checks of this descriptor ends the list. */
is_last_entryDwarf_STMTL_FileDesc578   bool is_last_entry() const {
579     return file_name[0] == '\0';
580   }
581 
582   /* Gets file name. */
get_file_nameDwarf_STMTL_FileDesc583   const char* get_file_name() const {
584     return file_name;
585   }
586 
587   /* Processes this descriptor, advancing to the next one.
588    * Param:
589    *  dir_index - Upon return contains index of the parent directory in the
590    *    list of included directories. Can be NULL if caller is not interested
591    *    in this value.
592    * Return:
593    *  Pointer to the next source file descriptor in the list.
594    */
processDwarf_STMTL_FileDesc595   const Dwarf_STMTL_FileDesc* process(Elf_Word* dir_index) const {
596     if (is_last_entry()) {
597       return this;
598     }
599 
600     /* First parameter: include directory index. */
601     Dwarf_Value tmp;
602     const Dwarf_Leb128* leb =
603         INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
604     leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
605     if (dir_index != NULL) {
606       *dir_index = tmp.u32;
607     }
608     /* Process file time. */
609     leb = reinterpret_cast<const Dwarf_Leb128*>(leb->process_unsigned(&tmp));
610     /* Process file size. */
611     return reinterpret_cast<const Dwarf_STMTL_FileDesc*>(leb->process_unsigned(&tmp));
612   }
613 
614   /* Gets directory index for this descriptor. */
get_dir_indexDwarf_STMTL_FileDesc615   Elf_Word get_dir_index() const {
616     assert(!is_last_entry());
617     if (is_last_entry()) {
618       return 0;
619     }
620     /* Get directory index. */
621     Dwarf_Value ret;
622     const Dwarf_Leb128* leb =
623       INC_CPTR_T(Dwarf_Leb128, file_name, strlen(file_name) + 1);
624     leb->process_unsigned(&ret);
625     return ret.u32;
626   }
627 } Dwarf_STMTL_FileDesc;
628 
629 /* Encapsulates a DIE attribute, collected during ELF file parsing.
630  */
631 class DIEAttrib {
632  public:
633   /* Constructs DIEAttrib intance. */
DIEAttrib()634   DIEAttrib()
635       : at_(0),
636         form_(0) {
637     value_.type = DWARF_VALUE_UNKNOWN;
638   }
639 
640   /* Destructs DIEAttrib intance. */
~DIEAttrib()641   ~DIEAttrib() {
642   }
643 
644   /* Gets DWARF attribute ID (DW_AT_Xxx) for this property. */
at()645   Dwarf_At at() const {
646     return at_;
647   }
648 
649   /* Gets DWARF form ID (DW_FORM_Xxx) for this property. */
form()650   Dwarf_Form form() const {
651     return form_;
652   }
653 
654   /* Gets value of this property. */
value()655   const Dwarf_Value* value() const {
656     return &value_;
657   }
658 
659   /* Value of this property. */
660   Dwarf_Value   value_;
661 
662   /* DWARF attribute ID (DW_AT_Xxx) for this property. */
663   Dwarf_At      at_;
664 
665   /* DWARF form ID (DW_FORM_Xxx) for this property. */
666   Dwarf_Form    form_;
667 };
668 
669 /* Parse tag context.
670  * This structure is used as an ELF file parsing parameter, limiting collected
671  * DIEs by the list of tags.
672  */
673 typedef struct DwarfParseContext {
674   /* Zero-terminated list of tags to collect DIEs for. If this field is NULL,
675    * DIEs for all tags will be collected during the parsing. */
676   const Dwarf_Tag*  tags;
677 } DwarfParseContext;
678 
679 /* Checks if a DIE with the given tag should be collected during the parsing.
680  * Param:
681  *  parse_context - Parse context to check the tag against. This parameter can
682  *  be NULL, indicating that all tags should be collected.
683  *  tag - Tag to check.
684  * Return:
685  *  true if a DIE with the given tag should be collected during the parsing,
686  *  or false, if the DIE should not be collected.
687  */
688 static inline bool
collect_die(const DwarfParseContext * parse_context,Dwarf_Tag tag)689 collect_die(const DwarfParseContext* parse_context, Dwarf_Tag tag) {
690   if (parse_context == NULL || parse_context->tags == NULL) {
691     return true;
692   }
693   for (const Dwarf_Tag* tags = parse_context->tags; *tags != 0; tags++) {
694     if (*tags == tag) {
695       return true;
696     }
697   }
698   return false;
699 }
700 
701 /* Encapsulates an array of Dwarf_Abbr_DIE pointers, cached for a compilation
702  * unit. Although Dwarf_Abbr_DIE descriptors in the .debug_abbrev section of
703  * the ELF file seems to be always in sequential order, DIE descriptors may
704  * reference them randomly. So, to provide better performance, we will cache
705  * all Dwarf_Abbr_DIE pointers, that were found for each DIE. Since all of the
706  * Dwarf_Abbr_DIE are sequential, an array is the best way to cache them.
707  *
708  * NOTE: Objects of this class are instantiated one per each CU, as all DIE
709  * abbreviation numberation is restarted from 1 for each new CU.
710  */
711 class DwarfAbbrDieArray {
712  public:
713   /* Constructs DwarfAbbrDieArray instance.
714    * Most of the CUs don't have too many unique Dwarf_Abbr_DIEs, so, in order
715    * to decrease the amount of memory allocation calls, we will preallocate
716    * a relatively small array for them along with the instance of this class,
717    * hopping, that all Dwarf_Abbr_DIEs for the CU will fit into it.
718    */
DwarfAbbrDieArray()719   DwarfAbbrDieArray()
720       : array_(&small_array_[0]),
721         array_size_(ELFF_ARRAY_SIZE(small_array_)),
722         count_(0) {
723   }
724 
725   /* Destructs DwarfAbbrDieArray instance. */
~DwarfAbbrDieArray()726   ~DwarfAbbrDieArray() {
727     if (array_ != &small_array_[0]) {
728       delete[] array_;
729     }
730   }
731 
732   /* Adds new entry to the array
733    * Param:
734    *  abbr - New entry to add.
735    *  num - Abbreviation number for the adding entry.
736    *    NOTE: before adding, this method will verify that descriptor for the
737    *    given abbreviation number has not been cached yet.
738    *    NOTE: due to the nature of this array, entries MUST be added strictly
739    *    in sequential order.
740    * Return:
741    *  true on success, false on failure.
742    */
add(const Dwarf_Abbr_DIE * abbr,Dwarf_AbbrNum num)743   bool add(const Dwarf_Abbr_DIE* abbr, Dwarf_AbbrNum num) {
744     assert(num != 0);
745     if (num == 0) {
746       // Zero is illegal DIE abbreviation number.
747       _set_errno(EINVAL);
748       return false;
749     }
750 
751     if (num <= count_) {
752       // Already cached.
753       return true;
754     }
755 
756     // Enforce strict sequential order.
757     assert(num == (count_ + 1));
758     if (num != (count_ + 1)) {
759       _set_errno(EINVAL);
760       return false;
761     }
762 
763     if (num >= array_size_) {
764       /* Expand the array. Make it 64 entries bigger than adding entry number.
765        * NOTE: that we don't check for an overflow here, since we secured
766        * ourselves from that by enforcing strict sequential order. So, an
767        * overflow may happen iff number of entries cached in this array is
768        * close to 4G, which is a) totally unreasonable, and b) we would die
769        * long before this amount of entries is cached.
770        */
771       Dwarf_AbbrNum new_size = num + 64;
772 
773       // Reallocate.
774       const Dwarf_Abbr_DIE** new_array = new const Dwarf_Abbr_DIE*[new_size];
775       assert(new_array != NULL);
776       if (new_array == NULL) {
777         _set_errno(ENOMEM);
778         return false;
779       }
780       memcpy(new_array, array_, count_ * sizeof(const Dwarf_Abbr_DIE*));
781       if (array_ != &small_array_[0]) {
782         delete[] array_;
783       }
784       array_ = new_array;
785       array_size_ = new_size;
786     }
787 
788     // Abbreviation numbers are 1-based.
789     array_[num - 1] = abbr;
790     count_++;
791     return true;
792   }
793 
794   /* Adds new entry to the array
795    * Param:
796    *  abbr - New entry to add.
797    * Return:
798    *  true on success, false on failure.
799    */
add(const Dwarf_Abbr_DIE * abbr)800   bool add(const Dwarf_Abbr_DIE* abbr) {
801     return add(abbr, abbr->get_abbr_num());
802   }
803 
804   /* Gets an entry from the array
805    * Param:
806    *  num - 1-based index of an entry to get.
807    * Return:
808    *  Entry on success, or NULL if num exceeds the number of entries
809    *  contained in the array.
810    */
get(Dwarf_AbbrNum num)811   const Dwarf_Abbr_DIE* get(Dwarf_AbbrNum num) const {
812     assert(num != 0 && num <= count_);
813     if (num != 0 && num <= count_) {
814       return array_[num - 1];
815     } else {
816       _set_errno(EINVAL);
817       return NULL;
818     }
819   }
820 
821   /* Caches Dwarf_Abbr_DIEs into this array up to the requested number.
822    * NOTE: This method cannot be called on an empty array. Usually, first
823    * entry is inserted into this array when CU object is initialized.
824    * Param:
825    *  num - Entry number to cache entries up to.
826    * Return:
827    *  Last cached entry (actually, an entry for the 'num' index).
828    */
cache_to(Dwarf_AbbrNum num)829   const Dwarf_Abbr_DIE* cache_to(Dwarf_AbbrNum num) {
830     /* Last cached DIE abbreviation. We always should have cached at least one
831      * abbreviation for the CU DIE itself, added via "add" method when CU
832      * object was initialized. */
833     const Dwarf_Abbr_DIE* cur_abbr = get(count_);
834     assert(cur_abbr != NULL);
835     if (cur_abbr == NULL) {
836       return NULL;
837     }
838 
839     /* Starting with the last cached DIE abbreviation, loop through the
840      * remaining DIE abbreviations in the .debug_abbrev section of the
841      * mapped ELF file, caching them until we reach the requested
842      * abbreviation descriptor number. Normally, the very next DIE
843      * abbreviation will stop the loop. */
844     while (num > count_) {
845       Dwarf_AbbrNum abbr_num;
846       Dwarf_Tag tmp2;
847       Dwarf_Form tmp3;
848       Dwarf_At tmp4;
849 
850       /* Process all AT abbreviations for the current DIE entry, reaching next
851        * DIE abbreviation. */
852       const Dwarf_Abbr_AT* abbr_at = cur_abbr->process(&abbr_num, &tmp2);
853       while (!abbr_at->is_separator()) {
854         abbr_at = abbr_at->process(&tmp4, &tmp3);
855       }
856 
857       // Next DIE abbreviation is right after the separator AT abbreviation.
858       cur_abbr = reinterpret_cast<const Dwarf_Abbr_DIE*>
859                                               (abbr_at->process(&tmp4, &tmp3));
860       if (!add(cur_abbr)) {
861         return NULL;
862       }
863     }
864 
865     return array_[num - 1];
866   }
867 
868   /* Empties array and frees allocations. */
empty()869   void empty() {
870     if (array_ != &small_array_[0]) {
871       delete[] array_;
872       array_ = &small_array_[0];
873       array_size_ = sizeof(small_array_) / sizeof(small_array_[0]);
874     }
875     count_ = 0;
876   }
877 
878  protected:
879   /* Array, preallocated in anticipation of relatively small number of
880    * DIE abbreviations in compilation unit. */
881   const Dwarf_Abbr_DIE*   small_array_[64];
882 
883   /* Array of Dwarf_Abbr_DIE pointers, cached for a compilation unit. */
884   const Dwarf_Abbr_DIE**  array_;
885 
886   /* Current size of the array. */
887   Dwarf_AbbrNum           array_size_;
888 
889   /* Number of entries, cached in the array. */
890   Dwarf_AbbrNum           count_;
891 };
892 
893 /* Encapsulates a state machine for the "Line Number Program", that is run
894  * on data conained in the mapped .debug_line section.
895  */
896 class DwarfStateMachine {
897  public:
898   /* Constructs DwarfStateMachine instance.
899    * Param:
900    *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
901    *    see Dwarf_STMTL_HdrXX.
902    */
DwarfStateMachine(bool set_is_stmt)903   explicit DwarfStateMachine(bool set_is_stmt)
904     : address_(0),
905       file_(1),
906       line_(1),
907       column_(0),
908       discriminator_(0),
909       is_stmt_(set_is_stmt),
910       basic_block_(false),
911       end_sequence_(false),
912       prologue_end_(false),
913       epilogue_begin_(false),
914       isa_(0),
915       set_file_info_(NULL) {
916   }
917 
918   /* Destructs DwarfStateMachine instance. */
~DwarfStateMachine()919   ~DwarfStateMachine() {
920   }
921 
922   /* Resets the state to default.
923    * Param:
924    *  set_is_stmt - Matches value of default_is_stmt field in the STMTL header.
925    *    see Dwarf_STMTL_HdrXX.
926   */
reset(bool set_is_stmt)927   void reset(bool set_is_stmt) {
928     address_ = 0;
929     file_ = 1;
930     line_ = 1;
931     column_ = 0;
932     discriminator_ = 0;
933     is_stmt_ = set_is_stmt;
934     basic_block_ = false;
935     end_sequence_ = false;
936     prologue_end_ = false;
937     epilogue_begin_ = false;
938     isa_ = 0;
939     set_file_info_ = NULL;
940   }
941 
942   /*
943    * Machine state.
944    */
945 
946   /* Current address (current PC value). */
947   Elf_Xword                   address_;
948 
949   /* Current index of source file descriptor. */
950   Elf_Word                    file_;
951 
952   /* Current line in the current source file. */
953   Elf_Word                    line_;
954 
955   /* Current column. */
956   Elf_Word                    column_;
957 
958   /* Current discriminator value. */
959   Elf_Word                    discriminator_;
960 
961   /* Current STMT flag. */
962   bool                        is_stmt_;
963 
964   /* Current basic block flag. */
965   bool                        basic_block_;
966 
967   /* Current end of sequence flag. */
968   bool                        end_sequence_;
969 
970   /* Current end of prologue flag. */
971   bool                        prologue_end_;
972 
973   /* Current epilogue begin flag. */
974   bool                        epilogue_begin_;
975 
976   /* Current ISA value. */
977   Elf_Word                    isa_;
978 
979   /* Current value for explicitly set current source file descriptor.
980    * If not NULL, this descriptor has priority over the descriptor, addressed
981    * by the file_ member of this class. */
982   const Dwarf_STMTL_FileDesc* set_file_info_;
983 };
984 
985 /* Checks if given tag belongs to a routine. */
986 static inline bool
dwarf_tag_is_routine(Dwarf_Tag tag)987 dwarf_tag_is_routine(Dwarf_Tag tag) {
988   return tag == DW_TAG_inlined_subroutine ||
989          tag == DW_TAG_subprogram ||
990          tag == DW_AT_main_subprogram;
991 }
992 
993 /* Checks if given tag belongs to a compilation unit. */
994 static inline bool
dwarf_tag_is_cu(Dwarf_Tag tag)995 dwarf_tag_is_cu(Dwarf_Tag tag) {
996   return tag == DW_TAG_compile_unit ||
997          tag == DW_TAG_partial_unit;
998 }
999 
1000 #endif  // ELFF_DWARF_DEFS_H_
1001