1 /* -*- mode: C; c-basic-offset: 3; -*- */
2 
3 /*--------------------------------------------------------------------*/
4 /*--- Read DWARF3/4 ".debug_info" sections (DIE trees).            ---*/
5 /*---                                                 readdwarf3.c ---*/
6 /*--------------------------------------------------------------------*/
7 
8 /*
9    This file is part of Valgrind, a dynamic binary instrumentation
10    framework.
11 
12    Copyright (C) 2008-2015 OpenWorks LLP
13       info@open-works.co.uk
14 
15    This program is free software; you can redistribute it and/or
16    modify it under the terms of the GNU General Public License as
17    published by the Free Software Foundation; either version 2 of the
18    License, or (at your option) any later version.
19 
20    This program is distributed in the hope that it will be useful, but
21    WITHOUT ANY WARRANTY; without even the implied warranty of
22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23    General Public License for more details.
24 
25    You should have received a copy of the GNU General Public License
26    along with this program; if not, write to the Free Software
27    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28    02111-1307, USA.
29 
30    The GNU General Public License is contained in the file COPYING.
31 
32    Neither the names of the U.S. Department of Energy nor the
33    University of California nor the names of its contributors may be
34    used to endorse or promote products derived from this software
35    without prior written permission.
36 */
37 
38 #if defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
39 
40 /* REFERENCE (without which this code will not make much sense):
41 
42    DWARF Debugging Information Format, Version 3,
43    dated 20 December 2005 (the "D3 spec").
44 
45    Available at http://www.dwarfstd.org/Dwarf3.pdf.  There's also a
46    .doc (MS Word) version, but for some reason the section numbers
47    between the Word and PDF versions differ by 1 in the first digit.
48    All section references in this code are to the PDF version.
49 
50    CURRENT HACKS:
51 
52    DW_TAG_{const,volatile}_type no DW_AT_type is allowed; it is
53       assumed to mean "const void" or "volatile void" respectively.
54       GDB appears to interpret them like this, anyway.
55 
56    In many cases it is important to know the svma of a CU (the "base
57    address of the CU", as the D3 spec calls it).  There are some
58    situations in which the spec implies this value is unknown, but the
59    Dwarf3 produced by gcc-4.1 seems to assume is not unknown but
60    merely zero when not explicitly stated.  So we too have to make
61    that assumption.
62 
63    POTENTIAL BUG?  Spotted 6 Sept 08.  Why doesn't
64    unitary_range_list() bias the resulting range list in the same way
65    that its more general cousin, get_range_list(), does?  I don't
66    know.
67 
68    TODO, 2008 Feb 17:
69 
70    get rid of cu_svma_known and document the assumed-zero svma hack.
71 
72    ML_(sizeOfType): differentiate between zero sized types and types
73    for which the size is unknown.  Is this important?  I don't know.
74 
75    DW_TAG_array_types: deal with explicit sizes (currently we compute
76    the size from the bounds and the element size, although that's
77    fragile, if the bounds incompletely specified, or completely
78    absent)
79 
80    Document reason for difference (by 1) of stack preening depth in
81    parse_var_DIE vs parse_type_DIE.
82 
83    Don't hand to ML_(addVars), vars whose locations are entirely in
84    registers (DW_OP_reg*).  This is merely a space-saving
85    optimisation, as ML_(evaluate_Dwarf3_Expr) should handle these
86    expressions correctly, by failing to evaluate them and hence
87    effectively ignoring the variable with which they are associated.
88 
89    Deal with DW_TAG_array_types which have element size != stride
90 
91    In some cases, the info for a variable is split between two
92    different DIEs (generally a declarer and a definer).  We punt on
93    these.  Could do better here.
94 
95    The 'data_bias' argument passed to the expression evaluator
96    (ML_(evaluate_Dwarf3_Expr)) should really be changed to a
97    MaybeUWord, to make it clear when we do vs don't know what it is
98    for the evaluation of an expression.  At the moment zero is passed
99    for this parameter in the don't know case.  That's a bit fragile
100    and obscure; using a MaybeUWord would be clearer.
101 
102    POTENTIAL PERFORMANCE IMPROVEMENTS:
103 
104    Currently, duplicate removal and all other queries for the type
105    entities array is done using cuOffset-based pointing, which
106    involves a binary search (VG_(lookupXA)) for each access.  This is
107    wildly inefficient, although simple.  It would be better to
108    translate all the cuOffset-based references (iow, all the "R" and
109    "Rs" fields in the TyEnts in 'tyents') to direct index numbers in
110    'tyents' right at the start of dedup_types(), and use direct
111    indexing (VG_(indexXA)) wherever possible after that.
112 
113    cmp__XArrays_of_AddrRange is also a performance bottleneck.  Move
114    VG_(indexXA) into pub_tool_xarray.h so it can be inlined at all use
115    points, and possibly also make an _UNCHECKED version which skips
116    the range checks in performance-critical situations such as this.
117 
118    Handle interaction between read_DIE and parse_{var,type}_DIE
119    better.  Currently read_DIE reads the entire DIE just to find where
120    the end is (and for debug printing), so that it can later reliably
121    move the cursor to the end regardless of what parse_{var,type}_DIE
122    do.  This means many DIEs (most, even?) are read twice.  It would
123    be smarter to make parse_{var,type}_DIE return a Bool indicating
124    whether or not they advanced the DIE cursor, and only if they
125    didn't should read_DIE itself read through the DIE.
126 
127    ML_(addVar) and add_var_to_arange: quite a lot of DiAddrRanges have
128    zero variables in their .vars XArray.  Rather than have an XArray
129    with zero elements (which uses 2 malloc'd blocks), allow the .vars
130    pointer to be NULL in this case.
131 
132    More generally, reduce the amount of memory allocated and freed
133    while reading Dwarf3 type/variable information.  Even modest (20MB)
134    objects cause this module to allocate and free hundreds of
135    thousands of small blocks, and ML_(arena_malloc) and its various
136    groupies always show up at the top of performance profiles. */
137 
138 #include "pub_core_basics.h"
139 #include "pub_core_debuginfo.h"
140 #include "pub_core_libcbase.h"
141 #include "pub_core_libcassert.h"
142 #include "pub_core_libcprint.h"
143 #include "pub_core_libcsetjmp.h"   // setjmp facilities
144 #include "pub_core_hashtable.h"
145 #include "pub_core_options.h"
146 #include "pub_core_tooliface.h"    /* VG_(needs) */
147 #include "pub_core_xarray.h"
148 #include "pub_core_wordfm.h"
149 #include "priv_misc.h"             /* dinfo_zalloc/free */
150 #include "priv_image.h"
151 #include "priv_tytypes.h"
152 #include "priv_d3basics.h"
153 #include "priv_storage.h"
154 #include "priv_readdwarf3.h"       /* self */
155 
156 
157 /*------------------------------------------------------------*/
158 /*---                                                      ---*/
159 /*--- Basic machinery for parsing DIEs.                    ---*/
160 /*---                                                      ---*/
161 /*------------------------------------------------------------*/
162 
163 #define TRACE_D3(format, args...) \
164    if (UNLIKELY(td3)) { VG_(printf)(format, ## args); }
165 #define TD3 (UNLIKELY(td3))
166 
167 #define D3_INVALID_CUOFF  ((UWord)(-1UL))
168 #define D3_FAKEVOID_CUOFF ((UWord)(-2UL))
169 
170 typedef
171    struct {
172       DiSlice sli;      // to which this cursor applies
173       DiOffT  sli_next; // offset in underlying DiImage; must be >= sli.ioff
174       void (*barf)( const HChar* ) __attribute__((noreturn));
175       const HChar* barfstr;
176    }
177    Cursor;
178 
is_sane_Cursor(const Cursor * c)179 static inline Bool is_sane_Cursor ( const Cursor* c ) {
180    if (!c)                return False;
181    if (!c->barf)          return False;
182    if (!c->barfstr)       return False;
183    if (!ML_(sli_is_valid)(c->sli))    return False;
184    if (c->sli.ioff == DiOffT_INVALID) return False;
185    if (c->sli_next < c->sli.ioff)     return False;
186    return True;
187 }
188 
189 // Initialise a cursor from a DiSlice (ELF section, really) so as to
190 // start reading at offset |sli_initial_offset| from the start of the
191 // slice.
init_Cursor(Cursor * c,DiSlice sli,ULong sli_initial_offset,void (* barf)(const HChar *),const HChar * barfstr)192 static void init_Cursor ( /*OUT*/Cursor* c,
193                           DiSlice sli,
194                           ULong   sli_initial_offset,
195                           __attribute__((noreturn)) void (*barf)(const HChar*),
196                           const HChar* barfstr )
197 {
198    vg_assert(c);
199    VG_(bzero_inline)(c, sizeof(*c));
200    c->sli              = sli;
201    c->sli_next         = c->sli.ioff + sli_initial_offset;
202    c->barf             = barf;
203    c->barfstr          = barfstr;
204    vg_assert(is_sane_Cursor(c));
205 }
206 
is_at_end_Cursor(const Cursor * c)207 static Bool is_at_end_Cursor ( const Cursor* c ) {
208    vg_assert(is_sane_Cursor(c));
209    return c->sli_next >= c->sli.ioff + c->sli.szB;
210 }
211 
get_position_of_Cursor(const Cursor * c)212 static inline ULong get_position_of_Cursor ( const Cursor* c ) {
213    vg_assert(is_sane_Cursor(c));
214    return c->sli_next - c->sli.ioff;
215 }
set_position_of_Cursor(Cursor * c,ULong pos)216 static inline void set_position_of_Cursor ( Cursor* c, ULong pos ) {
217    c->sli_next = c->sli.ioff + pos;
218    vg_assert(is_sane_Cursor(c));
219 }
advance_position_of_Cursor(Cursor * c,ULong delta)220 static inline void advance_position_of_Cursor ( Cursor* c, ULong delta ) {
221    c->sli_next += delta;
222    vg_assert(is_sane_Cursor(c));
223 }
224 
get_remaining_length_Cursor(const Cursor * c)225 static /*signed*/Long get_remaining_length_Cursor ( const Cursor* c ) {
226    vg_assert(is_sane_Cursor(c));
227    return c->sli.ioff + c->sli.szB - c->sli_next;
228 }
229 
230 //static void* get_address_of_Cursor ( Cursor* c ) {
231 //   vg_assert(is_sane_Cursor(c));
232 //   return &c->region_start_img[ c->region_next ];
233 //}
234 
get_DiCursor_from_Cursor(const Cursor * c)235 static DiCursor get_DiCursor_from_Cursor ( const Cursor* c ) {
236    return mk_DiCursor(c->sli.img, c->sli_next);
237 }
238 
239 /* FIXME: document assumptions on endianness for
240    get_UShort/UInt/ULong. */
get_UChar(Cursor * c)241 static inline UChar get_UChar ( Cursor* c ) {
242    UChar r;
243    vg_assert(is_sane_Cursor(c));
244    if (c->sli_next + sizeof(UChar) > c->sli.ioff + c->sli.szB) {
245       c->barf(c->barfstr);
246       /*NOTREACHED*/
247       vg_assert(0);
248    }
249    r = ML_(img_get_UChar)(c->sli.img, c->sli_next);
250    c->sli_next += sizeof(UChar);
251    return r;
252 }
get_UShort(Cursor * c)253 static UShort get_UShort ( Cursor* c ) {
254    UShort r;
255    vg_assert(is_sane_Cursor(c));
256    if (c->sli_next + sizeof(UShort) > c->sli.ioff + c->sli.szB) {
257       c->barf(c->barfstr);
258       /*NOTREACHED*/
259       vg_assert(0);
260    }
261    r = ML_(img_get_UShort)(c->sli.img, c->sli_next);
262    c->sli_next += sizeof(UShort);
263    return r;
264 }
get_UInt(Cursor * c)265 static UInt get_UInt ( Cursor* c ) {
266    UInt r;
267    vg_assert(is_sane_Cursor(c));
268    if (c->sli_next + sizeof(UInt) > c->sli.ioff + c->sli.szB) {
269       c->barf(c->barfstr);
270       /*NOTREACHED*/
271       vg_assert(0);
272    }
273    r = ML_(img_get_UInt)(c->sli.img, c->sli_next);
274    c->sli_next += sizeof(UInt);
275    return r;
276 }
get_ULong(Cursor * c)277 static ULong get_ULong ( Cursor* c ) {
278    ULong r;
279    vg_assert(is_sane_Cursor(c));
280    if (c->sli_next + sizeof(ULong) > c->sli.ioff + c->sli.szB) {
281       c->barf(c->barfstr);
282       /*NOTREACHED*/
283       vg_assert(0);
284    }
285    r = ML_(img_get_ULong)(c->sli.img, c->sli_next);
286    c->sli_next += sizeof(ULong);
287    return r;
288 }
get_ULEB128(Cursor * c)289 static ULong get_ULEB128 ( Cursor* c ) {
290    ULong result;
291    Int   shift;
292    UChar byte;
293    /* unroll first iteration */
294    byte = get_UChar( c );
295    result = (ULong)(byte & 0x7f);
296    if (LIKELY(!(byte & 0x80))) return result;
297    shift = 7;
298    /* end unroll first iteration */
299    do {
300       byte = get_UChar( c );
301       result |= ((ULong)(byte & 0x7f)) << shift;
302       shift += 7;
303    } while (byte & 0x80);
304    return result;
305 }
get_SLEB128(Cursor * c)306 static Long get_SLEB128 ( Cursor* c ) {
307    ULong  result = 0;
308    Int    shift = 0;
309    UChar  byte;
310    do {
311       byte = get_UChar(c);
312       result |= ((ULong)(byte & 0x7f)) << shift;
313       shift += 7;
314    } while (byte & 0x80);
315    if (shift < 64 && (byte & 0x40))
316       result |= -(1ULL << shift);
317    return result;
318 }
319 
320 /* Assume 'c' points to the start of a string.  Return a DiCursor of
321    whatever it points at, and advance it past the terminating zero.
322    This makes it safe for the caller to then copy the string with
323    ML_(addStr), since (w.r.t. image overruns) the process of advancing
324    past the terminating zero will already have "vetted" the string. */
get_AsciiZ(Cursor * c)325 static DiCursor get_AsciiZ ( Cursor* c ) {
326    UChar uc;
327    DiCursor res = get_DiCursor_from_Cursor(c);
328    do { uc = get_UChar(c); } while (uc != 0);
329    return res;
330 }
331 
peek_ULEB128(Cursor * c)332 static ULong peek_ULEB128 ( Cursor* c ) {
333    DiOffT here = c->sli_next;
334    ULong  r    = get_ULEB128( c );
335    c->sli_next = here;
336    return r;
337 }
peek_UChar(Cursor * c)338 static UChar peek_UChar ( Cursor* c ) {
339    DiOffT here = c->sli_next;
340    UChar  r    = get_UChar( c );
341    c->sli_next = here;
342    return r;
343 }
344 
get_Dwarfish_UWord(Cursor * c,Bool is_dw64)345 static ULong get_Dwarfish_UWord ( Cursor* c, Bool is_dw64 ) {
346    return is_dw64 ? get_ULong(c) : (ULong) get_UInt(c);
347 }
348 
get_UWord(Cursor * c)349 static UWord get_UWord ( Cursor* c ) {
350    vg_assert(sizeof(UWord) == sizeof(void*));
351    if (sizeof(UWord) == 4) return get_UInt(c);
352    if (sizeof(UWord) == 8) return get_ULong(c);
353    vg_assert(0);
354 }
355 
356 /* Read a DWARF3 'Initial Length' field */
get_Initial_Length(Bool * is64,Cursor * c,const HChar * barfMsg)357 static ULong get_Initial_Length ( /*OUT*/Bool* is64,
358                                   Cursor* c,
359                                   const HChar* barfMsg )
360 {
361    ULong w64;
362    UInt  w32;
363    *is64 = False;
364    w32 = get_UInt( c );
365    if (w32 >= 0xFFFFFFF0 && w32 < 0xFFFFFFFF) {
366       c->barf( barfMsg );
367    }
368    else if (w32 == 0xFFFFFFFF) {
369       *is64 = True;
370       w64   = get_ULong( c );
371    } else {
372       *is64 = False;
373       w64 = (ULong)w32;
374    }
375    return w64;
376 }
377 
378 
379 /*------------------------------------------------------------*/
380 /*---                                                      ---*/
381 /*--- "CUConst" structure                                  ---*/
382 /*---                                                      ---*/
383 /*------------------------------------------------------------*/
384 
385 typedef
386    struct _name_form {
387       ULong at_name;  // Dwarf Attribute name
388       ULong at_form;  // Dwarf Attribute form
389       UInt  skip_szB; // Nr of bytes skippable from here ...
390       UInt  next_nf;  // ... to reach this attr/form index in the g_abbv.nf
391    } name_form;
392 /* skip_szB and next_nf are used to optimise the skipping of uninteresting DIEs.
393    Each name_form maintains how many (fixed) nr of bytes can be skipped from
394    the beginning of this form till the next attr/form to look at.
395    The next form to look can be:
396        an 'interesting' attr/form to read while skipping a DIE
397           (currently, this is only DW_AT_sibling)
398    or
399        a variable length form which must be read to be skipped.
400    For a variable length form, the skip_szB will be equal to VARSZ_FORM.
401 
402    Note: this technique could also be used to speed up the parsing
403    of DIEs : for each parser kind, we could have the nr of bytes
404    to skip to directly reach the interesting form(s) for the parser. */
405 
406 typedef
407    struct _g_abbv {
408       struct _g_abbv *next; // read/write by hash table.
409       UWord  abbv_code;     // key, read by hash table
410       ULong  atag;
411       ULong  has_children;
412       name_form nf[0];
413       /* Variable-length array of name/form pairs, terminated
414          by a 0/0 pair.
415          The skip_szB/next_nf allows to skip efficiently a DIE
416          described by this g_abbv; */
417     } g_abbv;
418 
419 /* Holds information that is constant through the parsing of a
420    Compilation Unit.  This is basically plumbed through to
421    everywhere. */
422 typedef
423    struct {
424       /* Call here if anything goes wrong */
425       void (*barf)( const HChar* ) __attribute__((noreturn));
426       /* Is this 64-bit DWARF ? */
427       Bool   is_dw64;
428       /* Which DWARF version ?  (2, 3 or 4) */
429       UShort version;
430       /* Length of this Compilation Unit, as stated in the
431          .unit_length :: InitialLength field of the CU Header.
432          However, this size (as specified by the D3 spec) does not
433          include the size of the .unit_length field itself, which is
434          either 4 or 12 bytes (32-bit or 64-bit Dwarf3).  That value
435          can be obtained through the expression ".is_dw64 ? 12 : 4". */
436       ULong  unit_length;
437       /* Offset of start of this unit in .debug_info */
438       UWord  cu_start_offset;
439       /* SVMA for this CU.  In the D3 spec, is known as the "base
440          address of the compilation unit (last para sec 3.1.1).
441          Needed for (amongst things) interpretation of location-list
442          values. */
443       Addr   cu_svma;
444       Bool   cu_svma_known;
445 
446       /* The debug_abbreviations table to be used for this Unit */
447       //UChar* debug_abbv;
448       /* Upper bound on size thereof (an overestimate, in general) */
449       //UWord  debug_abbv_maxszB;
450       /* A bounded area of the image, to be used as the
451          debug_abbreviations table tobe used for this Unit. */
452       DiSlice debug_abbv;
453 
454       /* Image information for various sections. */
455       DiSlice escn_debug_str;
456       DiSlice escn_debug_ranges;
457       DiSlice escn_debug_loc;
458       DiSlice escn_debug_line;
459       DiSlice escn_debug_info;
460       DiSlice escn_debug_types;
461       DiSlice escn_debug_info_alt;
462       DiSlice escn_debug_str_alt;
463       /* How much to add to .debug_types resp. alternate .debug_info offsets
464          in cook_die*.  */
465       UWord  types_cuOff_bias;
466       UWord  alt_cuOff_bias;
467       /* --- Needed so we can add stuff to the string table. --- */
468       struct _DebugInfo* di;
469       /* --- a hash table of g_abbv (i.e. parsed abbreviations) --- */
470       VgHashTable *ht_abbvs;
471 
472       /* True if this came from .debug_types; otherwise it came from
473          .debug_info.  */
474       Bool is_type_unit;
475       /* For a unit coming from .debug_types, these hold the TU's type
476          signature and the uncooked DIE offset of the TU's signatured
477          type.  For a unit coming from .debug_info, these are unused.  */
478       ULong type_signature;
479       ULong type_offset;
480 
481       /* Signatured type hash; computed once and then shared by all
482          CUs.  */
483       VgHashTable *signature_types;
484 
485       /* True if this came from alternate .debug_info; otherwise
486          it came from normal .debug_info or .debug_types.  */
487       Bool is_alt_info;
488    }
489    CUConst;
490 
491 
492 /* Return the cooked value of DIE depending on whether CC represents a
493    .debug_types unit.  To cook a DIE, we pretend that the .debug_info,
494    .debug_types and optional alternate .debug_info sections form
495    a contiguous whole, so that DIEs coming from .debug_types are numbered
496    starting at the end of .debug_info and DIEs coming from alternate
497    .debug_info are numbered starting at the end of .debug_types.  */
cook_die(const CUConst * cc,UWord die)498 static UWord cook_die( const CUConst* cc, UWord die )
499 {
500    if (cc->is_type_unit)
501       die += cc->types_cuOff_bias;
502    else if (cc->is_alt_info)
503       die += cc->alt_cuOff_bias;
504    return die;
505 }
506 
507 /* Like cook_die, but understand that DIEs coming from a
508    DW_FORM_ref_sig8 reference are already cooked.  Also, handle
509    DW_FORM_GNU_ref_alt from within primary .debug_info or .debug_types
510    as reference to alternate .debug_info.  */
cook_die_using_form(const CUConst * cc,UWord die,DW_FORM form)511 static UWord cook_die_using_form( const CUConst *cc, UWord die, DW_FORM form)
512 {
513    if (form == DW_FORM_ref_sig8)
514       return die;
515    if (form == DW_FORM_GNU_ref_alt)
516       return die + cc->alt_cuOff_bias;
517    return cook_die( cc, die );
518 }
519 
520 /* Return the uncooked offset of DIE and set *TYPE_FLAG to true if the DIE
521    came from the .debug_types section and *ALT_FLAG to true if the DIE
522    came from alternate .debug_info section.  */
uncook_die(const CUConst * cc,UWord die,Bool * type_flag,Bool * alt_flag)523 static UWord uncook_die( const CUConst *cc, UWord die, /*OUT*/Bool *type_flag,
524                          Bool *alt_flag )
525 {
526    *alt_flag = False;
527    *type_flag = False;
528    /* The use of escn_debug_{info,types}.szB seems safe to me even if
529       escn_debug_{info,types} are DiSlice_INVALID (meaning the
530       sections were not found), because DiSlice_INVALID.szB is always
531       zero.  That said, it seems unlikely we'd ever get here if
532       .debug_info or .debug_types were missing. */
533    if (die >= cc->escn_debug_info.szB) {
534       if (die >= cc->escn_debug_info.szB + cc->escn_debug_types.szB) {
535          *alt_flag = True;
536          die -= cc->escn_debug_info.szB + cc->escn_debug_types.szB;
537       } else {
538          *type_flag = True;
539          die -= cc->escn_debug_info.szB;
540       }
541    }
542    return die;
543 }
544 
545 /*------------------------------------------------------------*/
546 /*---                                                      ---*/
547 /*--- Helper functions for Guarded Expressions             ---*/
548 /*---                                                      ---*/
549 /*------------------------------------------------------------*/
550 
551 /* Parse the location list starting at img-offset 'debug_loc_offset'
552    in .debug_loc.  Results are biased with 'svma_of_referencing_CU'
553    and so I believe are correct SVMAs for the object as a whole.  This
554    function allocates the UChar*, and the caller must deallocate it.
555    The resulting block is in so-called Guarded-Expression format.
556 
557    Guarded-Expression format is similar but not identical to the DWARF3
558    location-list format.  The format of each returned block is:
559 
560       UChar biasMe;
561       UChar isEnd;
562       followed by zero or more of
563 
564       (Addr aMin;  Addr aMax;  UShort nbytes;  ..bytes..;  UChar isEnd)
565 
566    '..bytes..' is an standard DWARF3 location expression which is
567    valid when aMin <= pc <= aMax (possibly after suitable biasing).
568 
569    The number of bytes in '..bytes..' is nbytes.
570 
571    The end of the sequence is marked by an isEnd == 1 value.  All
572    previous isEnd values must be zero.
573 
574    biasMe is 1 if the aMin/aMax fields need this DebugInfo's
575    text_bias added before use, and 0 if the GX is this is not
576    necessary (is ready to go).
577 
578    Hence the block can be quickly parsed and is self-describing.  Note
579    that aMax is 1 less than the corresponding value in a DWARF3
580    location list.  Zero length ranges, with aMax == aMin-1, are not
581    allowed.
582 */
583 /* 2008-sept-12: moved ML_(pp_GX) from here to d3basics.c, where
584    it more logically belongs. */
585 
586 
587 /* Apply a text bias to a GX. */
bias_GX(GExpr * gx,const DebugInfo * di)588 static void bias_GX ( /*MOD*/GExpr* gx, const DebugInfo* di )
589 {
590    UShort nbytes;
591    UChar* p = &gx->payload[0];
592    UChar* pA;
593    UChar  uc;
594    uc = *p++; /*biasMe*/
595    if (uc == 0)
596       return;
597    vg_assert(uc == 1);
598    p[-1] = 0; /* mark it as done */
599    while (True) {
600       uc = *p++;
601       if (uc == 1)
602          break; /*isEnd*/
603       vg_assert(uc == 0);
604       /* t-bias aMin */
605       pA = (UChar*)p;
606       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
607       p += sizeof(Addr);
608       /* t-bias aMax */
609       pA = (UChar*)p;
610       ML_(write_Addr)(pA, ML_(read_Addr)(pA) + di->text_debug_bias);
611       p += sizeof(Addr);
612       /* nbytes, and actual expression */
613       nbytes = ML_(read_UShort)(p); p += sizeof(UShort);
614       p += nbytes;
615    }
616 }
617 
618 __attribute__((noinline))
make_singleton_GX(DiCursor block,ULong nbytes)619 static GExpr* make_singleton_GX ( DiCursor block, ULong nbytes )
620 {
621    SizeT  bytesReqd;
622    GExpr* gx;
623    UChar *p, *pstart;
624 
625    vg_assert(sizeof(UWord) == sizeof(Addr));
626    vg_assert(nbytes <= 0xFFFF); /* else we overflow the nbytes field */
627    bytesReqd
628       =   sizeof(UChar)  /*biasMe*/    + sizeof(UChar) /*!isEnd*/
629         + sizeof(UWord)  /*aMin*/      + sizeof(UWord) /*aMax*/
630         + sizeof(UShort) /*nbytes*/    + (SizeT)nbytes
631         + sizeof(UChar); /*isEnd*/
632 
633    gx = ML_(dinfo_zalloc)( "di.readdwarf3.msGX.1",
634                            sizeof(GExpr) + bytesReqd );
635 
636    p = pstart = &gx->payload[0];
637 
638    p = ML_(write_UChar)(p, 0);        /*biasMe*/
639    p = ML_(write_UChar)(p, 0);        /*!isEnd*/
640    p = ML_(write_Addr)(p, 0);         /*aMin*/
641    p = ML_(write_Addr)(p, ~0);        /*aMax*/
642    p = ML_(write_UShort)(p, nbytes);  /*nbytes*/
643    ML_(cur_read_get)(p, block, nbytes); p += nbytes;
644    p = ML_(write_UChar)(p, 1);        /*isEnd*/
645 
646    vg_assert( (SizeT)(p - pstart) == bytesReqd);
647    vg_assert( &gx->payload[bytesReqd]
648               == ((UChar*)gx) + sizeof(GExpr) + bytesReqd );
649 
650    return gx;
651 }
652 
653 __attribute__((noinline))
make_general_GX(const CUConst * cc,Bool td3,ULong debug_loc_offset,Addr svma_of_referencing_CU)654 static GExpr* make_general_GX ( const CUConst* cc,
655                                 Bool     td3,
656                                 ULong    debug_loc_offset,
657                                 Addr     svma_of_referencing_CU )
658 {
659    Addr      base;
660    Cursor    loc;
661    XArray*   xa; /* XArray of UChar */
662    GExpr*    gx;
663    Word      nbytes;
664 
665    vg_assert(sizeof(UWord) == sizeof(Addr));
666    if (!ML_(sli_is_valid)(cc->escn_debug_loc) || cc->escn_debug_loc.szB == 0)
667       cc->barf("make_general_GX: .debug_loc is empty/missing");
668 
669    init_Cursor( &loc, cc->escn_debug_loc, 0, cc->barf,
670                 "Overrun whilst reading .debug_loc section(2)" );
671    set_position_of_Cursor( &loc, debug_loc_offset );
672 
673    TRACE_D3("make_general_GX (.debug_loc_offset = %llu, ioff = %llu) {\n",
674             debug_loc_offset, get_DiCursor_from_Cursor(&loc).ioff );
675 
676    /* Who frees this xa?  It is freed before this fn exits. */
677    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.mgGX.1",
678                     ML_(dinfo_free),
679                     sizeof(UChar) );
680 
681    { UChar c = 1; /*biasMe*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
682 
683    base = 0;
684    while (True) {
685       Bool  acquire;
686       UWord len;
687       /* Read a (host-)word pair.  This is something of a hack since
688          the word size to read is really dictated by the ELF file;
689          however, we assume we're reading a file with the same
690          word-sizeness as the host.  Reasonably enough. */
691       UWord w1 = get_UWord( &loc );
692       UWord w2 = get_UWord( &loc );
693 
694       TRACE_D3("   %08lx %08lx\n", w1, w2);
695       if (w1 == 0 && w2 == 0)
696          break; /* end of list */
697 
698       if (w1 == -1UL) {
699          /* new value for 'base' */
700          base = w2;
701          continue;
702       }
703 
704       /* else a location expression follows */
705       /* else enumerate [w1+base, w2+base) */
706       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
707          (sec 2.17.2) */
708       if (w1 > w2) {
709          TRACE_D3("negative range is for .debug_loc expr at "
710                   "file offset %llu\n",
711                   debug_loc_offset);
712          cc->barf( "negative range in .debug_loc section" );
713       }
714 
715       /* ignore zero length ranges */
716       acquire = w1 < w2;
717       len     = (UWord)get_UShort( &loc );
718 
719       if (acquire) {
720          UWord  w;
721          UShort s;
722          UChar  c;
723          c = 0; /* !isEnd*/
724          VG_(addBytesToXA)( xa, &c, sizeof(c) );
725          w = w1    + base + svma_of_referencing_CU;
726          VG_(addBytesToXA)( xa, &w, sizeof(w) );
727          w = w2 -1 + base + svma_of_referencing_CU;
728          VG_(addBytesToXA)( xa, &w, sizeof(w) );
729          s = (UShort)len;
730          VG_(addBytesToXA)( xa, &s, sizeof(s) );
731       }
732 
733       while (len > 0) {
734          UChar byte = get_UChar( &loc );
735          TRACE_D3("%02x", (UInt)byte);
736          if (acquire)
737             VG_(addBytesToXA)( xa, &byte, 1 );
738          len--;
739       }
740       TRACE_D3("\n");
741    }
742 
743    { UChar c = 1; /*isEnd*/ VG_(addBytesToXA)( xa, &c, sizeof(c) ); }
744 
745    nbytes = VG_(sizeXA)( xa );
746    vg_assert(nbytes >= 1);
747 
748    gx = ML_(dinfo_zalloc)( "di.readdwarf3.mgGX.2", sizeof(GExpr) + nbytes );
749    VG_(memcpy)( &gx->payload[0], (UChar*)VG_(indexXA)(xa,0), nbytes );
750    vg_assert( &gx->payload[nbytes]
751               == ((UChar*)gx) + sizeof(GExpr) + nbytes );
752 
753    VG_(deleteXA)( xa );
754 
755    TRACE_D3("}\n");
756 
757    return gx;
758 }
759 
760 
761 /*------------------------------------------------------------*/
762 /*---                                                      ---*/
763 /*--- Helper functions for range lists and CU headers      ---*/
764 /*---                                                      ---*/
765 /*------------------------------------------------------------*/
766 
767 /* Denotes an address range.  Both aMin and aMax are included in the
768    range; hence a complete range is (0, ~0) and an empty range is any
769    (X, X-1) for X > 0.*/
770 typedef
771    struct { Addr aMin; Addr aMax; }
772    AddrRange;
773 
774 
775 /* Generate an arbitrary structural total ordering on
776    XArray* of AddrRange. */
cmp__XArrays_of_AddrRange(const XArray * rngs1,const XArray * rngs2)777 static Word cmp__XArrays_of_AddrRange ( const XArray* rngs1,
778                                         const XArray* rngs2 )
779 {
780    Word n1, n2, i;
781    vg_assert(rngs1 && rngs2);
782    n1 = VG_(sizeXA)( rngs1 );
783    n2 = VG_(sizeXA)( rngs2 );
784    if (n1 < n2) return -1;
785    if (n1 > n2) return 1;
786    for (i = 0; i < n1; i++) {
787       AddrRange* rng1 = (AddrRange*)VG_(indexXA)( rngs1, i );
788       AddrRange* rng2 = (AddrRange*)VG_(indexXA)( rngs2, i );
789       if (rng1->aMin < rng2->aMin) return -1;
790       if (rng1->aMin > rng2->aMin) return 1;
791       if (rng1->aMax < rng2->aMax) return -1;
792       if (rng1->aMax > rng2->aMax) return 1;
793    }
794    return 0;
795 }
796 
797 
798 __attribute__((noinline))
empty_range_list(void)799 static XArray* /* of AddrRange */ empty_range_list ( void )
800 {
801    XArray* xa; /* XArray of AddrRange */
802    /* Who frees this xa?  varstack_preen() does. */
803    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.erl.1",
804                     ML_(dinfo_free),
805                     sizeof(AddrRange) );
806    return xa;
807 }
808 
809 
810 __attribute__((noinline))
unitary_range_list(Addr aMin,Addr aMax)811 static XArray* unitary_range_list ( Addr aMin, Addr aMax )
812 {
813    XArray*   xa;
814    AddrRange pair;
815    vg_assert(aMin <= aMax);
816    /* Who frees this xa?  varstack_preen() does. */
817    xa = VG_(newXA)( ML_(dinfo_zalloc),  "di.readdwarf3.url.1",
818                     ML_(dinfo_free),
819                     sizeof(AddrRange) );
820    pair.aMin = aMin;
821    pair.aMax = aMax;
822    VG_(addToXA)( xa, &pair );
823    return xa;
824 }
825 
826 
827 /* Enumerate the address ranges starting at img-offset
828    'debug_ranges_offset' in .debug_ranges.  Results are biased with
829    'svma_of_referencing_CU' and so I believe are correct SVMAs for the
830    object as a whole.  This function allocates the XArray, and the
831    caller must deallocate it. */
832 __attribute__((noinline))
833 static XArray* /* of AddrRange */
get_range_list(const CUConst * cc,Bool td3,UWord debug_ranges_offset,Addr svma_of_referencing_CU)834 get_range_list ( const CUConst* cc,
835                  Bool     td3,
836                  UWord    debug_ranges_offset,
837                  Addr     svma_of_referencing_CU )
838 {
839    Addr      base;
840    Cursor    ranges;
841    XArray*   xa; /* XArray of AddrRange */
842    AddrRange pair;
843 
844    if (!ML_(sli_is_valid)(cc->escn_debug_ranges)
845        || cc->escn_debug_ranges.szB == 0)
846       cc->barf("get_range_list: .debug_ranges is empty/missing");
847 
848    init_Cursor( &ranges, cc->escn_debug_ranges, 0, cc->barf,
849                 "Overrun whilst reading .debug_ranges section(2)" );
850    set_position_of_Cursor( &ranges, debug_ranges_offset );
851 
852    /* Who frees this xa?  varstack_preen() does. */
853    xa = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.grl.1", ML_(dinfo_free),
854                     sizeof(AddrRange) );
855    base = 0;
856    while (True) {
857       /* Read a (host-)word pair.  This is something of a hack since
858          the word size to read is really dictated by the ELF file;
859          however, we assume we're reading a file with the same
860          word-sizeness as the host.  Reasonably enough. */
861       UWord w1 = get_UWord( &ranges );
862       UWord w2 = get_UWord( &ranges );
863 
864       if (w1 == 0 && w2 == 0)
865          break; /* end of list. */
866 
867       if (w1 == -1UL) {
868          /* new value for 'base' */
869          base = w2;
870          continue;
871       }
872 
873       /* else enumerate [w1+base, w2+base) */
874       /* w2 is 1 past end of range, as per D3 defn for "DW_AT_high_pc"
875          (sec 2.17.2) */
876       if (w1 > w2)
877          cc->barf( "negative range in .debug_ranges section" );
878       if (w1 < w2) {
879          pair.aMin = w1     + base + svma_of_referencing_CU;
880          pair.aMax = w2 - 1 + base + svma_of_referencing_CU;
881          vg_assert(pair.aMin <= pair.aMax);
882          VG_(addToXA)( xa, &pair );
883       }
884    }
885    return xa;
886 }
887 
888 #define VARSZ_FORM 0xffffffff
889 static UInt get_Form_szB (const CUConst* cc, DW_FORM form );
890 
891 /* Initialises the hash table of abbreviations.
892    We do a single scan of the abbv slice to parse and
893    build all abbreviations, for the following reasons:
894      * all or most abbreviations will be needed in any case
895        (at least for var-info reading).
896      * re-reading each time an abbreviation causes a lot of calls
897        to get_ULEB128.
898      * a CU should not have many abbreviations. */
init_ht_abbvs(CUConst * cc,Bool td3)899 static void init_ht_abbvs (CUConst* cc,
900                            Bool td3)
901 {
902    Cursor c;
903    g_abbv *ta; // temporary abbreviation, reallocated if needed.
904    UInt ta_nf_maxE; // max nr of pairs in ta.nf[], doubled when reallocated.
905    UInt ta_nf_n;    // nr of pairs in ta->nf that are initialised.
906    g_abbv *ht_ta; // abbv to insert in hash table.
907    Int i;
908 
909    #define SZ_G_ABBV(_nf_szE) (sizeof(g_abbv) + _nf_szE * sizeof(name_form))
910 
911    ta_nf_maxE = 10; // starting with enough for 9 pairs+terminating pair.
912    ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf", SZ_G_ABBV(ta_nf_maxE));
913    cc->ht_abbvs = VG_(HT_construct) ("di.readdwarf3.ht_abbvs");
914 
915    init_Cursor( &c, cc->debug_abbv, 0, cc->barf,
916                "Overrun whilst parsing .debug_abbrev section(2)" );
917    while (True) {
918       ta->abbv_code = get_ULEB128( &c );
919       if (ta->abbv_code == 0) break; /* end of the table */
920 
921       ta->atag = get_ULEB128( &c );
922       ta->has_children = get_UChar( &c );
923       ta_nf_n = 0;
924       while (True) {
925          if (ta_nf_n >= ta_nf_maxE) {
926             g_abbv *old_ta = ta;
927             ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta_nf",
928                                     SZ_G_ABBV(2 * ta_nf_maxE));
929             ta_nf_maxE = 2 * ta_nf_maxE;
930             VG_(memcpy) (ta, old_ta, SZ_G_ABBV(ta_nf_n));
931             ML_(dinfo_free) (old_ta);
932          }
933          ta->nf[ta_nf_n].at_name = get_ULEB128( &c );
934          ta->nf[ta_nf_n].at_form = get_ULEB128( &c );
935          if (ta->nf[ta_nf_n].at_name == 0 && ta->nf[ta_nf_n].at_form == 0) {
936             ta_nf_n++;
937             break;
938          }
939         ta_nf_n++;
940       }
941 
942       // Initialises the skip_szB/next_nf elements : an element at position
943       // i must contain the sum of its own size + the sizes of all elements
944       // following i till either the next variable size element, the next
945       // sibling element or the end of the DIE.
946       ta->nf[ta_nf_n - 1].skip_szB = 0;
947       ta->nf[ta_nf_n - 1].next_nf = 0;
948       for (i = ta_nf_n - 2; i >= 0; i--) {
949          const UInt form_szB = get_Form_szB (cc, (DW_FORM)ta->nf[i].at_form);
950 
951          if (ta->nf[i+1].at_name == DW_AT_sibling
952              || ta->nf[i+1].skip_szB == VARSZ_FORM) {
953             ta->nf[i].skip_szB = form_szB;
954             ta->nf[i].next_nf  = i+1;
955          } else if (form_szB == VARSZ_FORM) {
956             ta->nf[i].skip_szB = form_szB;
957             ta->nf[i].next_nf  = i+1;
958          } else {
959             ta->nf[i].skip_szB = ta->nf[i+1].skip_szB + form_szB;
960             ta->nf[i].next_nf  = ta->nf[i+1].next_nf;
961          }
962       }
963 
964       ht_ta = ML_(dinfo_zalloc) ("di.readdwarf3.ht_ta", SZ_G_ABBV(ta_nf_n));
965       VG_(memcpy) (ht_ta, ta, SZ_G_ABBV(ta_nf_n));
966       VG_(HT_add_node) ( cc->ht_abbvs, ht_ta );
967       if (TD3) {
968          TRACE_D3("  Adding abbv_code %lu TAG  %s [%s] nf %u ",
969                   ht_ta->abbv_code, ML_(pp_DW_TAG)(ht_ta->atag),
970                   ML_(pp_DW_children)(ht_ta->has_children),
971                   ta_nf_n);
972          TRACE_D3("  ");
973          for (i = 0; i < ta_nf_n; i++)
974             TRACE_D3("[%u,%u] ", ta->nf[i].skip_szB, ta->nf[i].next_nf);
975          TRACE_D3("\n");
976       }
977    }
978 
979    ML_(dinfo_free) (ta);
980    #undef SZ_G_ABBV
981 }
982 
get_abbv(const CUConst * cc,ULong abbv_code)983 static g_abbv* get_abbv (const CUConst* cc, ULong abbv_code)
984 {
985    g_abbv *abbv;
986 
987    abbv = VG_(HT_lookup) (cc->ht_abbvs, abbv_code);
988    if (!abbv)
989       cc->barf ("abbv_code not found in ht_abbvs table");
990    return abbv;
991 }
992 
993 /* Free the memory allocated in CUConst. */
clear_CUConst(CUConst * cc)994 static void clear_CUConst (CUConst* cc)
995 {
996    VG_(HT_destruct) ( cc->ht_abbvs, ML_(dinfo_free));
997    cc->ht_abbvs = NULL;
998 }
999 
1000 /* Parse the Compilation Unit header indicated at 'c' and
1001    initialise 'cc' accordingly. */
1002 static __attribute__((noinline))
parse_CU_Header(CUConst * cc,Bool td3,Cursor * c,DiSlice escn_debug_abbv,Bool type_unit,Bool alt_info)1003 void parse_CU_Header ( /*OUT*/CUConst* cc,
1004                        Bool td3,
1005                        Cursor* c,
1006                        DiSlice escn_debug_abbv,
1007 		       Bool type_unit,
1008                        Bool alt_info )
1009 {
1010    UChar  address_size;
1011    ULong  debug_abbrev_offset;
1012 
1013    VG_(memset)(cc, 0, sizeof(*cc));
1014    vg_assert(c && c->barf);
1015    cc->barf = c->barf;
1016 
1017    /* initial_length field */
1018    cc->unit_length
1019       = get_Initial_Length( &cc->is_dw64, c,
1020            "parse_CU_Header: invalid initial-length field" );
1021 
1022    TRACE_D3("   Length:        %llu\n", cc->unit_length );
1023 
1024    /* version */
1025    cc->version = get_UShort( c );
1026    if (cc->version != 2 && cc->version != 3 && cc->version != 4)
1027       cc->barf( "parse_CU_Header: is neither DWARF2 nor DWARF3 nor DWARF4" );
1028    TRACE_D3("   Version:       %d\n", (Int)cc->version );
1029 
1030    /* debug_abbrev_offset */
1031    debug_abbrev_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1032    if (debug_abbrev_offset >= escn_debug_abbv.szB)
1033       cc->barf( "parse_CU_Header: invalid debug_abbrev_offset" );
1034    TRACE_D3("   Abbrev Offset: %llu\n", debug_abbrev_offset );
1035 
1036    /* address size.  If this isn't equal to the host word size, just
1037       give up.  This makes it safe to assume elsewhere that
1038       DW_FORM_addr and DW_FORM_ref_addr can be treated as a host
1039       word. */
1040    address_size = get_UChar( c );
1041    if (address_size != sizeof(void*))
1042       cc->barf( "parse_CU_Header: invalid address_size" );
1043    TRACE_D3("   Pointer Size:  %d\n", (Int)address_size );
1044 
1045    cc->is_type_unit = type_unit;
1046    cc->is_alt_info = alt_info;
1047 
1048    if (type_unit) {
1049       cc->type_signature = get_ULong( c );
1050       cc->type_offset = get_Dwarfish_UWord( c, cc->is_dw64 );
1051    }
1052 
1053    /* Set up cc->debug_abbv to point to the relevant table for this
1054       CU.  Set its .szB so that at least we can't read off the end of
1055       the debug_abbrev section -- potentially (and quite likely) too
1056       big, if this isn't the last table in the section, but at least
1057       it's safe.
1058 
1059       This amounts to taking debug_abbv_escn and moving the start
1060       position along by debug_abbrev_offset bytes, hence forming a
1061       smaller DiSlice which has the same end point.  Since we checked
1062       just above that debug_abbrev_offset is less than the size of
1063       debug_abbv_escn, this should leave us with a nonempty slice. */
1064    vg_assert(debug_abbrev_offset < escn_debug_abbv.szB);
1065    cc->debug_abbv      = escn_debug_abbv;
1066    cc->debug_abbv.ioff += debug_abbrev_offset;
1067    cc->debug_abbv.szB  -= debug_abbrev_offset;
1068 
1069    init_ht_abbvs(cc, td3);
1070 }
1071 
1072 /* This represents a single signatured type.  It maps a type signature
1073    (a ULong) to a cooked DIE offset.  Objects of this type are stored
1074    in the type signature hash table.  */
1075 typedef
1076    struct D3SignatureType {
1077       struct D3SignatureType *next;
1078       UWord data;
1079       ULong type_signature;
1080       UWord die;
1081    }
1082    D3SignatureType;
1083 
1084 /* Record a signatured type in the hash table.  */
record_signatured_type(VgHashTable * tab,ULong type_signature,UWord die)1085 static void record_signatured_type ( VgHashTable *tab,
1086                                      ULong type_signature,
1087                                      UWord die )
1088 {
1089    D3SignatureType *dstype = ML_(dinfo_zalloc) ( "di.readdwarf3.sigtype",
1090                                                  sizeof(D3SignatureType) );
1091    dstype->data = (UWord) type_signature;
1092    dstype->type_signature = type_signature;
1093    dstype->die = die;
1094    VG_(HT_add_node) ( tab, dstype );
1095 }
1096 
1097 /* Given a type signature hash table and a type signature, return the
1098    cooked DIE offset of the type.  If the type cannot be found, call
1099    BARF.  */
lookup_signatured_type(const VgHashTable * tab,ULong type_signature,void (* barf)(const HChar *))1100 static UWord lookup_signatured_type ( const VgHashTable *tab,
1101                                       ULong type_signature,
1102                                       void (*barf)( const HChar* ) __attribute__((noreturn)) )
1103 {
1104    D3SignatureType *dstype = VG_(HT_lookup) ( tab, (UWord) type_signature );
1105    /* This may be unwarranted chumminess with the hash table
1106       implementation.  */
1107    while ( dstype != NULL && dstype->type_signature != type_signature)
1108       dstype = dstype->next;
1109    if (dstype == NULL) {
1110       barf("lookup_signatured_type: could not find signatured type");
1111       /*NOTREACHED*/
1112       vg_assert(0);
1113    }
1114    return dstype->die;
1115 }
1116 
1117 
1118 /* Represents Form data.  If szB is 1/2/4/8 then the result is in the
1119    lowest 1/2/4/8 bytes of u.val.  If szB is zero or negative then the
1120    result is an image section beginning at u.cur and with size -szB.
1121    No other szB values are allowed. */
1122 typedef
1123    struct {
1124       Long szB; // 1, 2, 4, 8 or non-positive values only.
1125       union { ULong val; DiCursor cur; } u;
1126    }
1127    FormContents;
1128 
1129 /* From 'c', get the Form data into 'cts'.  Either it gets a 1/2/4/8
1130    byte scalar value, or (a reference to) zero or more bytes starting
1131    at a DiCursor.*/
1132 static
get_Form_contents(FormContents * cts,const CUConst * cc,Cursor * c,Bool td3,DW_FORM form)1133 void get_Form_contents ( /*OUT*/FormContents* cts,
1134                          const CUConst* cc, Cursor* c,
1135                          Bool td3, DW_FORM form )
1136 {
1137    VG_(bzero_inline)(cts, sizeof(*cts));
1138    // !!! keep switch in sync with get_Form_szB. The nr of characters read below
1139    // must be computed similarly in get_Form_szB.
1140    // The consistency is verified in trace_DIE.
1141    switch (form) {
1142       case DW_FORM_data1:
1143          cts->u.val = (ULong)(UChar)get_UChar(c);
1144          cts->szB   = 1;
1145          TRACE_D3("%u", (UInt)cts->u.val);
1146          break;
1147       case DW_FORM_data2:
1148          cts->u.val = (ULong)(UShort)get_UShort(c);
1149          cts->szB   = 2;
1150          TRACE_D3("%u", (UInt)cts->u.val);
1151          break;
1152       case DW_FORM_data4:
1153          cts->u.val = (ULong)(UInt)get_UInt(c);
1154          cts->szB   = 4;
1155          TRACE_D3("%u", (UInt)cts->u.val);
1156          break;
1157       case DW_FORM_data8:
1158          cts->u.val = get_ULong(c);
1159          cts->szB   = 8;
1160          TRACE_D3("%llu", cts->u.val);
1161          break;
1162       case DW_FORM_sec_offset:
1163          cts->u.val = (ULong)get_Dwarfish_UWord( c, cc->is_dw64 );
1164          cts->szB   = cc->is_dw64 ? 8 : 4;
1165          TRACE_D3("%llu", cts->u.val);
1166          break;
1167       case DW_FORM_sdata:
1168          cts->u.val = (ULong)(Long)get_SLEB128(c);
1169          cts->szB   = 8;
1170          TRACE_D3("%llu", cts->u.val);
1171          break;
1172       case DW_FORM_udata:
1173          cts->u.val = (ULong)(Long)get_ULEB128(c);
1174          cts->szB   = 8;
1175          TRACE_D3("%llu", cts->u.val);
1176          break;
1177       case DW_FORM_addr:
1178          /* note, this is a hack.  DW_FORM_addr is defined as getting
1179             a word the size of the target machine as defined by the
1180             address_size field in the CU Header.  However,
1181             parse_CU_Header() rejects all inputs except those for
1182             which address_size == sizeof(Word), hence we can just
1183             treat it as a (host) Word.  */
1184          cts->u.val = (ULong)(UWord)get_UWord(c);
1185          cts->szB   = sizeof(UWord);
1186          TRACE_D3("0x%lx", (UWord)cts->u.val);
1187          break;
1188 
1189       case DW_FORM_ref_addr:
1190          /* We make the same word-size assumption as DW_FORM_addr. */
1191          /* What does this really mean?  From D3 Sec 7.5.4,
1192             description of "reference", it would appear to reference
1193             some other DIE, by specifying the offset from the
1194             beginning of a .debug_info section.  The D3 spec mentions
1195             that this might be in some other shared object and
1196             executable.  But I don't see how the name of the other
1197             object/exe is specified.
1198 
1199             At least for the DW_FORM_ref_addrs created by icc11, the
1200             references seem to be within the same object/executable.
1201             So for the moment we merely range-check, to see that they
1202             actually do specify a plausible offset within this
1203             object's .debug_info, and return the value unchanged.
1204 
1205             In DWARF 2, DW_FORM_ref_addr is address-sized, but in
1206             DWARF 3 and later, it is offset-sized.
1207          */
1208          if (cc->version == 2) {
1209             cts->u.val = (ULong)(UWord)get_UWord(c);
1210             cts->szB   = sizeof(UWord);
1211          } else {
1212             cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1213             cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1214          }
1215          TRACE_D3("0x%lx", (UWord)cts->u.val);
1216          if (0) VG_(printf)("DW_FORM_ref_addr 0x%lx\n", (UWord)cts->u.val);
1217          if (/* the following is surely impossible, but ... */
1218              !ML_(sli_is_valid)(cc->escn_debug_info)
1219              || cts->u.val >= (ULong)cc->escn_debug_info.szB) {
1220             /* Hmm.  Offset is nonsensical for this object's .debug_info
1221                section.  Be safe and reject it. */
1222             cc->barf("get_Form_contents: DW_FORM_ref_addr points "
1223                      "outside .debug_info");
1224          }
1225          break;
1226 
1227       case DW_FORM_strp: {
1228          /* this is an offset into .debug_str */
1229          UWord uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1230          if (!ML_(sli_is_valid)(cc->escn_debug_str)
1231              || uw >= cc->escn_debug_str.szB)
1232             cc->barf("get_Form_contents: DW_FORM_strp "
1233                      "points outside .debug_str");
1234          /* FIXME: check the entire string lies inside debug_str,
1235             not just the first byte of it. */
1236          DiCursor str
1237             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str), uw );
1238          if (TD3) {
1239             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.1");
1240             TRACE_D3("(indirect string, offset: 0x%lx): %s", uw, tmp);
1241             ML_(dinfo_free)(tmp);
1242          }
1243          cts->u.cur = str;
1244          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1245          break;
1246       }
1247       case DW_FORM_string: {
1248          DiCursor str = get_AsciiZ(c);
1249          if (TD3) {
1250             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.2");
1251             TRACE_D3("%s", tmp);
1252             ML_(dinfo_free)(tmp);
1253          }
1254          cts->u.cur = str;
1255          /* strlen is safe because get_AsciiZ already 'vetted' the
1256             entire string */
1257          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1258          break;
1259       }
1260       case DW_FORM_ref1: {
1261          UChar u8   = get_UChar(c);
1262          UWord res  = cc->cu_start_offset + (UWord)u8;
1263          cts->u.val = (ULong)res;
1264          cts->szB   = sizeof(UWord);
1265          TRACE_D3("<%lx>", res);
1266          break;
1267       }
1268       case DW_FORM_ref2: {
1269          UShort u16 = get_UShort(c);
1270          UWord  res = cc->cu_start_offset + (UWord)u16;
1271          cts->u.val = (ULong)res;
1272          cts->szB   = sizeof(UWord);
1273          TRACE_D3("<%lx>", res);
1274          break;
1275       }
1276       case DW_FORM_ref4: {
1277          UInt  u32  = get_UInt(c);
1278          UWord res  = cc->cu_start_offset + (UWord)u32;
1279          cts->u.val = (ULong)res;
1280          cts->szB   = sizeof(UWord);
1281          TRACE_D3("<%lx>", res);
1282          break;
1283       }
1284       case DW_FORM_ref8: {
1285          ULong u64  = get_ULong(c);
1286          UWord res  = cc->cu_start_offset + (UWord)u64;
1287          cts->u.val = (ULong)res;
1288          cts->szB   = sizeof(UWord);
1289          TRACE_D3("<%lx>", res);
1290          break;
1291       }
1292       case DW_FORM_ref_udata: {
1293          ULong u64  = get_ULEB128(c);
1294          UWord res  = cc->cu_start_offset + (UWord)u64;
1295          cts->u.val = (ULong)res;
1296          cts->szB   = sizeof(UWord);
1297          TRACE_D3("<%lx>", res);
1298          break;
1299       }
1300       case DW_FORM_flag: {
1301          UChar u8 = get_UChar(c);
1302          TRACE_D3("%u", (UInt)u8);
1303          cts->u.val = (ULong)u8;
1304          cts->szB   = 1;
1305          break;
1306       }
1307       case DW_FORM_flag_present:
1308          TRACE_D3("1");
1309          cts->u.val = 1;
1310          cts->szB   = 1;
1311          break;
1312       case DW_FORM_block1: {
1313          ULong    u64b;
1314          ULong    u64   = (ULong)get_UChar(c);
1315          DiCursor block = get_DiCursor_from_Cursor(c);
1316          TRACE_D3("%llu byte block: ", u64);
1317          for (u64b = u64; u64b > 0; u64b--) {
1318             UChar u8 = get_UChar(c);
1319             TRACE_D3("%x ", (UInt)u8);
1320          }
1321          cts->u.cur = block;
1322          cts->szB   = - (Long)u64;
1323          break;
1324       }
1325       case DW_FORM_block2: {
1326          ULong    u64b;
1327          ULong    u64   = (ULong)get_UShort(c);
1328          DiCursor block = get_DiCursor_from_Cursor(c);
1329          TRACE_D3("%llu byte block: ", u64);
1330          for (u64b = u64; u64b > 0; u64b--) {
1331             UChar u8 = get_UChar(c);
1332             TRACE_D3("%x ", (UInt)u8);
1333          }
1334          cts->u.cur = block;
1335          cts->szB   = - (Long)u64;
1336          break;
1337       }
1338       case DW_FORM_block4: {
1339          ULong    u64b;
1340          ULong    u64   = (ULong)get_UInt(c);
1341          DiCursor block = get_DiCursor_from_Cursor(c);
1342          TRACE_D3("%llu byte block: ", u64);
1343          for (u64b = u64; u64b > 0; u64b--) {
1344             UChar u8 = get_UChar(c);
1345             TRACE_D3("%x ", (UInt)u8);
1346          }
1347          cts->u.cur = block;
1348          cts->szB   = - (Long)u64;
1349          break;
1350       }
1351       case DW_FORM_exprloc:
1352       case DW_FORM_block: {
1353          ULong    u64b;
1354          ULong    u64   = (ULong)get_ULEB128(c);
1355          DiCursor block = get_DiCursor_from_Cursor(c);
1356          TRACE_D3("%llu byte block: ", u64);
1357          for (u64b = u64; u64b > 0; u64b--) {
1358             UChar u8 = get_UChar(c);
1359             TRACE_D3("%x ", (UInt)u8);
1360          }
1361          cts->u.cur = block;
1362          cts->szB   = - (Long)u64;
1363          break;
1364       }
1365       case DW_FORM_ref_sig8: {
1366          ULong  u64b;
1367          ULong  signature = get_ULong (c);
1368          ULong  work = signature;
1369          TRACE_D3("8 byte signature: ");
1370          for (u64b = 8; u64b > 0; u64b--) {
1371             UChar u8 = work & 0xff;
1372             TRACE_D3("%x ", (UInt)u8);
1373             work >>= 8;
1374          }
1375 
1376          /* cc->signature_types is only built/initialised when
1377             VG_(clo_read_var_info) is set. In this case,
1378             the DW_FORM_ref_sig8 can be looked up.
1379             But we can also arrive here when only reading inline info
1380             and VG_(clo_trace_symtab) is set. In such a case,
1381             we cannot lookup the DW_FORM_ref_sig8, we rather assign
1382             a dummy value. This is a kludge, but otherwise,
1383             the 'dwarf inline info reader' tracing would have to
1384             do type processing/reading. It is better to avoid
1385             adding significant 'real' processing only due to tracing. */
1386          if (VG_(clo_read_var_info)) {
1387             /* Due to the way that the hash table is constructed, the
1388                resulting DIE offset here is already "cooked".  See
1389                cook_die_using_form.  */
1390             cts->u.val = lookup_signatured_type (cc->signature_types, signature,
1391                                                  c->barf);
1392          } else {
1393             vg_assert (td3);
1394             vg_assert (VG_(clo_read_inline_info));
1395             TRACE_D3("<not dereferencing signature type>");
1396             cts->u.val = 0; /* Assign a dummy/rubbish value */
1397          }
1398          cts->szB   = sizeof(UWord);
1399          break;
1400       }
1401       case DW_FORM_indirect:
1402          get_Form_contents (cts, cc, c, td3, (DW_FORM)get_ULEB128(c));
1403          return;
1404 
1405       case DW_FORM_GNU_ref_alt:
1406          cts->u.val = get_Dwarfish_UWord(c, cc->is_dw64);
1407          cts->szB   = cc->is_dw64 ? sizeof(ULong) : sizeof(UInt);
1408          TRACE_D3("0x%lx", (UWord)cts->u.val);
1409          if (0) VG_(printf)("DW_FORM_GNU_ref_alt 0x%lx\n", (UWord)cts->u.val);
1410          if (/* the following is surely impossible, but ... */
1411              !ML_(sli_is_valid)(cc->escn_debug_info_alt))
1412             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr used, "
1413                      "but no alternate .debug_info");
1414          else if (cts->u.val >= (ULong)cc->escn_debug_info_alt.szB) {
1415             /* Hmm.  Offset is nonsensical for this object's .debug_info
1416                section.  Be safe and reject it. */
1417             cc->barf("get_Form_contents: DW_FORM_GNU_ref_addr points "
1418                      "outside alternate .debug_info");
1419          }
1420          break;
1421 
1422       case DW_FORM_GNU_strp_alt: {
1423          /* this is an offset into alternate .debug_str */
1424          SizeT uw = (UWord)get_Dwarfish_UWord( c, cc->is_dw64 );
1425          if (!ML_(sli_is_valid)(cc->escn_debug_str_alt))
1426             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt used, "
1427                      "but no alternate .debug_str");
1428          else if (uw >= cc->escn_debug_str_alt.szB)
1429             cc->barf("get_Form_contents: DW_FORM_GNU_strp_alt "
1430                      "points outside alternate .debug_str");
1431          /* FIXME: check the entire string lies inside debug_str,
1432             not just the first byte of it. */
1433          DiCursor str
1434             = ML_(cur_plus)( ML_(cur_from_sli)(cc->escn_debug_str_alt), uw);
1435          if (TD3) {
1436             HChar* tmp = ML_(cur_read_strdup)(str, "di.getFC.3");
1437             TRACE_D3("(indirect alt string, offset: 0x%lx): %s", uw, tmp);
1438             ML_(dinfo_free)(tmp);
1439          }
1440          cts->u.cur = str;
1441          cts->szB   = - (Long)(1 + (ULong)ML_(cur_strlen)(str));
1442          break;
1443       }
1444 
1445       default:
1446          VG_(printf)(
1447             "get_Form_contents: unhandled %u (%s) at <%llx>\n",
1448             form, ML_(pp_DW_FORM)(form), get_position_of_Cursor(c));
1449          c->barf("get_Form_contents: unhandled DW_FORM");
1450    }
1451 }
1452 
sizeof_Dwarfish_UWord(Bool is_dw64)1453 static inline UInt sizeof_Dwarfish_UWord (Bool is_dw64)
1454 {
1455    if (is_dw64)
1456       return sizeof(ULong);
1457    else
1458       return sizeof(UInt);
1459 }
1460 
1461 #define VARSZ_FORM 0xffffffff
1462 /* If the form is a fixed length form, return the nr of bytes for this form.
1463    If the form is a variable length form, return VARSZ_FORM. */
1464 static
get_Form_szB(const CUConst * cc,DW_FORM form)1465 UInt get_Form_szB (const CUConst* cc, DW_FORM form )
1466 {
1467    // !!! keep switch in sync with get_Form_contents : the nr of bytes
1468    // read from a cursor by get_Form_contents must be returned by
1469    // the below switch.
1470    // The consistency is verified in trace_DIE.
1471    switch (form) {
1472       case DW_FORM_data1: return 1;
1473       case DW_FORM_data2: return 2;
1474       case DW_FORM_data4: return 4;
1475       case DW_FORM_data8: return 8;
1476       case DW_FORM_sec_offset:
1477          if (cc->is_dw64)
1478             return 8;
1479          else
1480             return 4;
1481       case DW_FORM_sdata:
1482          return VARSZ_FORM;
1483       case DW_FORM_udata:
1484          return VARSZ_FORM;
1485       case DW_FORM_addr: // See hack in get_Form_contents
1486          return sizeof(UWord);
1487       case DW_FORM_ref_addr: // See hack in get_Form_contents
1488          if (cc->version == 2)
1489             return sizeof(UWord);
1490          else
1491             return sizeof_Dwarfish_UWord (cc->is_dw64);
1492       case DW_FORM_strp:
1493          return sizeof_Dwarfish_UWord (cc->is_dw64);
1494       case DW_FORM_string:
1495          return VARSZ_FORM;
1496       case DW_FORM_ref1:
1497          return 1;
1498       case DW_FORM_ref2:
1499          return 2;
1500       case DW_FORM_ref4:
1501          return 4;
1502       case DW_FORM_ref8:
1503          return 8;
1504       case DW_FORM_ref_udata:
1505          return VARSZ_FORM;
1506       case DW_FORM_flag:
1507          return 1;
1508       case DW_FORM_flag_present:
1509          return 0; // !!! special case, no data.
1510       case DW_FORM_block1:
1511          return VARSZ_FORM;
1512       case DW_FORM_block2:
1513          return VARSZ_FORM;
1514       case DW_FORM_block4:
1515          return VARSZ_FORM;
1516       case DW_FORM_exprloc:
1517       case DW_FORM_block:
1518          return VARSZ_FORM;
1519       case DW_FORM_ref_sig8:
1520          return 8;
1521       case DW_FORM_indirect:
1522          return VARSZ_FORM;
1523       case DW_FORM_GNU_ref_alt:
1524          return sizeof_Dwarfish_UWord(cc->is_dw64);
1525       case DW_FORM_GNU_strp_alt:
1526          return sizeof_Dwarfish_UWord(cc->is_dw64);
1527       default:
1528          VG_(printf)(
1529             "get_Form_szB: unhandled %u (%s)\n",
1530             form, ML_(pp_DW_FORM)(form));
1531          cc->barf("get_Form_contents: unhandled DW_FORM");
1532    }
1533 }
1534 
1535 /* Skip a DIE as described by abbv.
1536    If the DIE has a sibling, *sibling is set to the skipped DIE sibling value. */
1537 static
skip_DIE(UWord * sibling,Cursor * c_die,const g_abbv * abbv,const CUConst * cc)1538 void skip_DIE (UWord  *sibling,
1539                Cursor* c_die,
1540                const g_abbv *abbv,
1541                const CUConst* cc)
1542 {
1543    UInt nf_i;
1544    FormContents cts;
1545    nf_i = 0;
1546    while (True) {
1547       if (abbv->nf[nf_i].at_name == DW_AT_sibling) {
1548          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1549                             (DW_FORM)abbv->nf[nf_i].at_form );
1550          if ( cts.szB > 0 )
1551             *sibling = cts.u.val;
1552          nf_i++;
1553       } else if (abbv->nf[nf_i].skip_szB == VARSZ_FORM) {
1554          get_Form_contents( &cts, cc, c_die, False /*td3*/,
1555                             (DW_FORM)abbv->nf[nf_i].at_form );
1556          nf_i++;
1557       } else {
1558          advance_position_of_Cursor (c_die, (ULong)abbv->nf[nf_i].skip_szB);
1559          nf_i = abbv->nf[nf_i].next_nf;
1560       }
1561       if (nf_i == 0)
1562          break;
1563    }
1564 }
1565 
1566 
1567 /*------------------------------------------------------------*/
1568 /*---                                                      ---*/
1569 /*--- Parsing of variable-related DIEs                     ---*/
1570 /*---                                                      ---*/
1571 /*------------------------------------------------------------*/
1572 
1573 typedef
1574    struct _TempVar {
1575       const HChar*  name; /* in DebugInfo's .strpool */
1576       /* Represent ranges economically.  nRanges is the number of
1577          ranges.  Cases:
1578          0: .rngOneMin .rngOneMax .manyRanges are all zero
1579          1: .rngOneMin .rngOneMax hold the range; .rngMany is NULL
1580          2: .rngOneMin .rngOneMax are zero; .rngMany holds the ranges.
1581          This is merely an optimisation to avoid having to allocate
1582          and free the XArray in the common (98%) of cases where there
1583          is zero or one address ranges. */
1584       UWord   nRanges;
1585       Addr    rngOneMin;
1586       Addr    rngOneMax;
1587       XArray* rngMany; /* of AddrRange.  NON-UNIQUE PTR in AR_DINFO. */
1588       /* Do not free .rngMany, since many TempVars will have the same
1589          value.  Instead the associated storage is to be freed by
1590          deleting 'rangetree', which stores a single copy of each
1591          range. */
1592       /* --- */
1593       Int     level;
1594       UWord   typeR; /* a cuOff */
1595       GExpr*  gexpr; /* for this variable */
1596       GExpr*  fbGX;  /* to find the frame base of the enclosing fn, if
1597                         any */
1598       UInt    fndn_ix; /* declaring file/dirname index in fndnpool, or 0 */
1599       Int     fLine; /* declaring file line number, or zero */
1600       /* offset in .debug_info, so that abstract instances can be
1601          found to satisfy references from concrete instances. */
1602       UWord   dioff;
1603       UWord   absOri; /* so the absOri fields refer to dioff fields
1604                          in some other, related TempVar. */
1605    }
1606    TempVar;
1607 
1608 typedef
1609    struct {
1610       /* Contains the range stack: a stack of address ranges, one
1611          stack entry for each nested scope.
1612 
1613          Some scope entries are created by function definitions
1614          (DW_AT_subprogram), and for those, we also note the GExpr
1615          derived from its DW_AT_frame_base attribute, if any.
1616          Consequently it should be possible to find, for any
1617          variable's DIE, the GExpr for the containing function's
1618          DW_AT_frame_base by scanning back through the stack to find
1619          the nearest entry associated with a function.  This somewhat
1620          elaborate scheme is provided so as to make it possible to
1621          obtain the correct DW_AT_frame_base expression even in the
1622          presence of nested functions (or to be more precise, in the
1623          presence of nested DW_AT_subprogram DIEs).
1624       */
1625       Int     sp; /* [sp] is innermost active entry; sp==-1 for empty
1626                      stack */
1627       Int     stack_size;
1628       XArray **ranges; /* XArray of AddrRange */
1629       Int     *level;  /* D3 DIE levels */
1630       Bool    *isFunc; /* from DW_AT_subprogram? */
1631       GExpr  **fbGX;   /* if isFunc, contains the FB expr, else NULL */
1632       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
1633          integer index to the index in di->fndnpool. */
1634       XArray* /* of UInt* */ fndn_ix_Table;
1635    }
1636    D3VarParser;
1637 
1638 /* Completely initialise a variable parser object */
1639 static void
var_parser_init(D3VarParser * parser)1640 var_parser_init ( D3VarParser *parser )
1641 {
1642    parser->sp = -1;
1643    parser->stack_size = 0;
1644    parser->ranges = NULL;
1645    parser->level  = NULL;
1646    parser->isFunc = NULL;
1647    parser->fbGX = NULL;
1648    parser->fndn_ix_Table = NULL;
1649 }
1650 
1651 /* Release any memory hanging off a variable parser object */
1652 static void
var_parser_release(D3VarParser * parser)1653 var_parser_release ( D3VarParser *parser )
1654 {
1655    ML_(dinfo_free)( parser->ranges );
1656    ML_(dinfo_free)( parser->level );
1657    ML_(dinfo_free)( parser->isFunc );
1658    ML_(dinfo_free)( parser->fbGX );
1659 }
1660 
varstack_show(const D3VarParser * parser,const HChar * str)1661 static void varstack_show ( const D3VarParser* parser, const HChar* str )
1662 {
1663    Word i, j;
1664    VG_(printf)("  varstack (%s) {\n", str);
1665    for (i = 0; i <= parser->sp; i++) {
1666       XArray* xa = parser->ranges[i];
1667       vg_assert(xa);
1668       VG_(printf)("    [%ld] (level %d)", i, parser->level[i]);
1669       if (parser->isFunc[i]) {
1670          VG_(printf)(" (fbGX=%p)", parser->fbGX[i]);
1671       } else {
1672          vg_assert(parser->fbGX[i] == NULL);
1673       }
1674       VG_(printf)(": ");
1675       if (VG_(sizeXA)( xa ) == 0) {
1676          VG_(printf)("** empty PC range array **");
1677       } else {
1678          for (j = 0; j < VG_(sizeXA)( xa ); j++) {
1679             AddrRange* range = (AddrRange*) VG_(indexXA)( xa, j );
1680             vg_assert(range);
1681             VG_(printf)("[%#lx,%#lx] ", range->aMin, range->aMax);
1682          }
1683       }
1684       VG_(printf)("\n");
1685    }
1686    VG_(printf)("  }\n");
1687 }
1688 
1689 /* Remove from the stack, all entries with .level > 'level' */
1690 static
varstack_preen(D3VarParser * parser,Bool td3,Int level)1691 void varstack_preen ( D3VarParser* parser, Bool td3, Int level )
1692 {
1693    Bool changed = False;
1694    vg_assert(parser->sp < parser->stack_size);
1695    while (True) {
1696       vg_assert(parser->sp >= -1);
1697       if (parser->sp == -1) break;
1698       if (parser->level[parser->sp] <= level) break;
1699       if (0)
1700          TRACE_D3("BBBBAAAA varstack_pop [newsp=%d]\n", parser->sp-1);
1701       vg_assert(parser->ranges[parser->sp]);
1702       /* Who allocated this xa?  get_range_list() or
1703          unitary_range_list(). */
1704       VG_(deleteXA)( parser->ranges[parser->sp] );
1705       parser->sp--;
1706       changed = True;
1707    }
1708    if (changed && td3)
1709       varstack_show( parser, "after preen" );
1710 }
1711 
varstack_push(const CUConst * cc,D3VarParser * parser,Bool td3,XArray * ranges,Int level,Bool isFunc,GExpr * fbGX)1712 static void varstack_push ( const CUConst* cc,
1713                             D3VarParser* parser,
1714                             Bool td3,
1715                             XArray* ranges, Int level,
1716                             Bool    isFunc, GExpr* fbGX ) {
1717    if (0)
1718    TRACE_D3("BBBBAAAA varstack_push[newsp=%d]: %d  %p\n",
1719             parser->sp+1, level, ranges);
1720 
1721    /* First we need to zap everything >= 'level', as we are about to
1722       replace any previous entry at 'level', so .. */
1723    varstack_preen(parser, /*td3*/False, level-1);
1724 
1725    vg_assert(parser->sp >= -1);
1726    vg_assert(parser->sp < parser->stack_size);
1727    if (parser->sp == parser->stack_size - 1) {
1728       parser->stack_size += 48;
1729       parser->ranges =
1730          ML_(dinfo_realloc)("di.readdwarf3.varpush.1", parser->ranges,
1731                             parser->stack_size * sizeof parser->ranges[0]);
1732       parser->level =
1733          ML_(dinfo_realloc)("di.readdwarf3.varpush.2", parser->level,
1734                             parser->stack_size * sizeof parser->level[0]);
1735       parser->isFunc =
1736          ML_(dinfo_realloc)("di.readdwarf3.varpush.3", parser->isFunc,
1737                             parser->stack_size * sizeof parser->isFunc[0]);
1738       parser->fbGX =
1739          ML_(dinfo_realloc)("di.readdwarf3.varpush.4", parser->fbGX,
1740                             parser->stack_size * sizeof parser->fbGX[0]);
1741    }
1742    if (parser->sp >= 0)
1743       vg_assert(parser->level[parser->sp] < level);
1744    parser->sp++;
1745    vg_assert(ranges != NULL);
1746    if (!isFunc) vg_assert(fbGX == NULL);
1747    parser->ranges[parser->sp] = ranges;
1748    parser->level[parser->sp]  = level;
1749    parser->isFunc[parser->sp] = isFunc;
1750    parser->fbGX[parser->sp]   = fbGX;
1751    if (TD3)
1752       varstack_show( parser, "after push" );
1753 }
1754 
1755 
1756 /* cts is derived from a DW_AT_location and so refers either to a
1757    location expression or to a location list.  Figure out which, and
1758    in both cases bundle the expression or location list into a
1759    so-called GExpr (guarded expression). */
1760 __attribute__((noinline))
get_GX(const CUConst * cc,Bool td3,const FormContents * cts)1761 static GExpr* get_GX ( const CUConst* cc, Bool td3, const FormContents* cts )
1762 {
1763    GExpr* gexpr = NULL;
1764    if (cts->szB < 0) {
1765       /* represents a non-empty in-line location expression, and
1766          cts->u.cur points at the image bytes */
1767       gexpr = make_singleton_GX( cts->u.cur, (ULong)(- cts->szB) );
1768    }
1769    else
1770    if (cts->szB > 0) {
1771       /* represents a location list.  cts->u.val is the offset of it
1772          in .debug_loc. */
1773       if (!cc->cu_svma_known)
1774          cc->barf("get_GX: location list, but CU svma is unknown");
1775       gexpr = make_general_GX( cc, td3, cts->u.val, cc->cu_svma );
1776    }
1777    else {
1778       vg_assert(0); /* else caller is bogus */
1779    }
1780    return gexpr;
1781 }
1782 
1783 /* Returns an xarray* of directory names (indexed by the dwarf dirname
1784    integer).
1785    If 'compdir' is NULL, entry [0] will be set to "."
1786    otherwise entry [0] is set to compdir.
1787    Entry [0] basically means "the current directory of the compilation",
1788    whatever that means, according to the DWARF3 spec.
1789    FIXME??? readdwarf3.c/readdwarf.c have a lot of duplicated code */
1790 static
read_dirname_xa(DebugInfo * di,const HChar * compdir,Cursor * c,Bool td3)1791 XArray* read_dirname_xa (DebugInfo* di, const HChar *compdir,
1792                          Cursor *c,
1793                          Bool td3 )
1794 {
1795    XArray*        dirname_xa;   /* xarray of HChar* dirname */
1796    const HChar*   dirname;
1797    UInt           compdir_len;
1798 
1799    dirname_xa = VG_(newXA) (ML_(dinfo_zalloc), "di.rdxa.1", ML_(dinfo_free),
1800                             sizeof(HChar*) );
1801 
1802    if (compdir == NULL) {
1803       dirname = ".";
1804       compdir_len = 1;
1805    } else {
1806       dirname = compdir;
1807       compdir_len = VG_(strlen)(compdir);
1808    }
1809    VG_(addToXA) (dirname_xa, &dirname);
1810 
1811    TRACE_D3(" The Directory Table%s\n",
1812             peek_UChar(c) == 0 ? " is empty." : ":" );
1813 
1814    while (peek_UChar(c) != 0) {
1815 
1816       DiCursor cur = get_AsciiZ(c);
1817       HChar* data_str = ML_(cur_read_strdup)( cur, "dirname_xa.1" );
1818       TRACE_D3("  %s\n", data_str);
1819 
1820       /* If data_str[0] is '/', then 'data' is an absolute path and we
1821          don't mess with it.  Otherwise, construct the
1822          path 'compdir' ++ "/" ++ 'data'. */
1823 
1824       if (data_str[0] != '/'
1825           /* not an absolute path */
1826           && compdir
1827           /* actually got something sensible for compdir */
1828           && compdir_len)
1829       {
1830          SizeT  len = compdir_len + 1 + VG_(strlen)(data_str);
1831          HChar *buf = ML_(dinfo_zalloc)("dirname_xa.2", len + 1);
1832 
1833          VG_(strcpy)(buf, compdir);
1834          VG_(strcat)(buf, "/");
1835          VG_(strcat)(buf, data_str);
1836 
1837          dirname = ML_(addStr)(di, buf, len);
1838          VG_(addToXA) (dirname_xa, &dirname);
1839          if (0) VG_(printf)("rel path  %s\n", buf);
1840          ML_(dinfo_free)(buf);
1841       } else {
1842          /* just use 'data'. */
1843          dirname = ML_(addStr)(di,data_str,-1);
1844          VG_(addToXA) (dirname_xa, &dirname);
1845          if (0) VG_(printf)("abs path  %s\n", data_str);
1846       }
1847 
1848       ML_(dinfo_free)(data_str);
1849    }
1850 
1851    TRACE_D3 ("\n");
1852 
1853    if (get_UChar (c) != 0) {
1854       ML_(symerr)(NULL, True,
1855                   "could not get NUL at end of DWARF directory table");
1856       VG_(deleteXA)(dirname_xa);
1857       return NULL;
1858    }
1859 
1860    return dirname_xa;
1861 }
1862 
1863 static
read_filename_table(XArray * fndn_ix_Table,const HChar * compdir,const CUConst * cc,ULong debug_line_offset,Bool td3)1864 void read_filename_table( /*MOD*/XArray* /* of UInt* */ fndn_ix_Table,
1865                           const HChar* compdir,
1866                           const CUConst* cc, ULong debug_line_offset,
1867                           Bool td3 )
1868 {
1869    Bool   is_dw64;
1870    Cursor c;
1871    Word   i;
1872    UShort version;
1873    UChar  opcode_base;
1874    const HChar* str;
1875    XArray* dirname_xa;   /* xarray of HChar* dirname */
1876    ULong  dir_xa_ix;     /* Index in dirname_xa, as read from dwarf info. */
1877    const HChar* dirname;
1878    UInt   fndn_ix;
1879 
1880    vg_assert(fndn_ix_Table && cc && cc->barf);
1881    if (!ML_(sli_is_valid)(cc->escn_debug_line)
1882        || cc->escn_debug_line.szB <= debug_line_offset) {
1883       cc->barf("read_filename_table: .debug_line is missing?");
1884    }
1885 
1886    init_Cursor( &c, cc->escn_debug_line, debug_line_offset, cc->barf,
1887                 "Overrun whilst reading .debug_line section(1)" );
1888 
1889    /* unit_length = */
1890    get_Initial_Length( &is_dw64, &c,
1891                        "read_filename_table: invalid initial-length field" );
1892    version = get_UShort( &c );
1893    if (version != 2 && version != 3 && version != 4)
1894      cc->barf("read_filename_table: Only DWARF version 2, 3 and 4 line info "
1895               "is currently supported.");
1896    /*header_length              = (ULong)*/ get_Dwarfish_UWord( &c, is_dw64 );
1897    /*minimum_instruction_length = */ get_UChar( &c );
1898    if (version >= 4)
1899       /*maximum_operations_per_insn = */ get_UChar( &c );
1900    /*default_is_stmt            = */ get_UChar( &c );
1901    /*line_base                  = (Char)*/ get_UChar( &c );
1902    /*line_range                 = */ get_UChar( &c );
1903    opcode_base                = get_UChar( &c );
1904    /* skip over "standard_opcode_lengths" */
1905    for (i = 1; i < (Word)opcode_base; i++)
1906      (void)get_UChar( &c );
1907 
1908    dirname_xa = read_dirname_xa(cc->di, compdir, &c, td3);
1909 
1910    /* Read and record the file names table */
1911    vg_assert( VG_(sizeXA)( fndn_ix_Table ) == 0 );
1912    /* Add a dummy index-zero entry.  DWARF3 numbers its files
1913       from 1, for some reason. */
1914    fndn_ix = ML_(addFnDn) ( cc->di, "<unknown_file>", NULL );
1915    VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1916    while (peek_UChar(&c) != 0) {
1917       DiCursor cur = get_AsciiZ(&c);
1918       str = ML_(addStrFromCursor)( cc->di, cur );
1919       dir_xa_ix = get_ULEB128( &c );
1920       if (dirname_xa != NULL
1921           && dir_xa_ix >= 0 && dir_xa_ix < VG_(sizeXA) (dirname_xa))
1922          dirname = *(HChar**)VG_(indexXA) ( dirname_xa, dir_xa_ix );
1923       else
1924          dirname = NULL;
1925       fndn_ix = ML_(addFnDn)( cc->di, str, dirname);
1926       TRACE_D3("  read_filename_table: %ld fndn_ix %u %s %s\n",
1927                VG_(sizeXA)(fndn_ix_Table), fndn_ix,
1928                dirname, str);
1929       VG_(addToXA)( fndn_ix_Table, &fndn_ix );
1930       (void)get_ULEB128( &c ); /* skip last mod time */
1931       (void)get_ULEB128( &c ); /* file size */
1932    }
1933    /* We're done!  The rest of it is not interesting. */
1934    if (dirname_xa != NULL)
1935       VG_(deleteXA)(dirname_xa);
1936 }
1937 
1938 /* setup_cu_svma to be called when a cu is found at level 0,
1939    to establish the cu_svma. */
setup_cu_svma(CUConst * cc,Bool have_lo,Addr ip_lo,Bool td3)1940 static void setup_cu_svma(CUConst* cc, Bool have_lo, Addr ip_lo, Bool td3)
1941 {
1942    Addr cu_svma;
1943    /* We have potentially more than one type of parser parsing the
1944       dwarf information. At least currently, each parser establishes
1945       the cu_svma. So, in case cu_svma_known, we check that the same
1946       result is obtained by the 2nd parsing of the cu.
1947 
1948       Alternatively, we could reset cu_svma_known after each parsing
1949       and then check that we only see a single DW_TAG_compile_unit DIE
1950       at level 0, DWARF3 only allows exactly one top level DIE per
1951       CU. */
1952 
1953    if (have_lo)
1954       cu_svma = ip_lo;
1955    else {
1956       /* Now, it may be that this DIE doesn't tell us the CU's
1957          SVMA, by way of not having a DW_AT_low_pc.  That's OK --
1958          the CU doesn't *have* to have its SVMA specified.
1959 
1960          But as per last para D3 spec sec 3.1.1 ("Normal and
1961          Partial Compilation Unit Entries", "If the base address
1962          (viz, the SVMA) is undefined, then any DWARF entry of
1963          structure defined interms of the base address of that
1964          compilation unit is not valid.".  So that means, if whilst
1965          processing the children of this top level DIE (or their
1966          children, etc) we see a DW_AT_range, and cu_svma_known is
1967          False, then the DIE that contains it is (per the spec)
1968          invalid, and we can legitimately stop and complain. */
1969       /* .. whereas The Reality is, simply assume the SVMA is zero
1970          if it isn't specified. */
1971       cu_svma = 0;
1972    }
1973 
1974    if (cc->cu_svma_known) {
1975       vg_assert (cu_svma == cc->cu_svma);
1976    } else {
1977       cc->cu_svma_known = True;
1978       cc->cu_svma = cu_svma;
1979       if (0)
1980          TRACE_D3("setup_cu_svma: acquire CU_SVMA of %p\n", (void*) cc->cu_svma);
1981    }
1982 }
1983 
trace_DIE(DW_TAG dtag,UWord posn,Int level,UWord saved_die_c_offset,const g_abbv * abbv,const CUConst * cc)1984 static void trace_DIE(
1985    DW_TAG dtag,
1986    UWord posn,
1987    Int level,
1988    UWord saved_die_c_offset,
1989    const g_abbv *abbv,
1990    const CUConst* cc)
1991 {
1992    Cursor c;
1993    FormContents cts;
1994    UWord sibling = 0;
1995    UInt nf_i;
1996    Bool  debug_types_flag;
1997    Bool  alt_flag;
1998    Cursor check_skip;
1999    UWord check_sibling = 0;
2000 
2001    posn = uncook_die( cc, posn, &debug_types_flag, &alt_flag );
2002    init_Cursor (&c,
2003                 debug_types_flag ? cc->escn_debug_types :
2004                 alt_flag ? cc->escn_debug_info_alt : cc->escn_debug_info,
2005                 saved_die_c_offset, cc->barf,
2006                 "Overrun trace_DIE");
2007    check_skip = c;
2008    VG_(printf)(" <%d><%lx>: Abbrev Number: %llu (%s)%s%s\n",
2009                level, posn, (ULong) abbv->abbv_code, ML_(pp_DW_TAG)( dtag ),
2010                debug_types_flag ? " (in .debug_types)" : "",
2011                alt_flag ? " (in alternate .debug_info)" : "");
2012    nf_i = 0;
2013    while (True) {
2014       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2015       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2016       nf_i++;
2017       if (attr == 0 && form == 0) break;
2018       VG_(printf)("     %-18s: ", ML_(pp_DW_AT)(attr));
2019       /* Get the form contents, so as to print them */
2020       get_Form_contents( &cts, cc, &c, True, form );
2021       if (attr == DW_AT_sibling && cts.szB > 0) {
2022          sibling = cts.u.val;
2023       }
2024       VG_(printf)("\t\n");
2025    }
2026 
2027    /* Verify that skipping a DIE gives the same displacement as
2028       tracing (i.e. reading) a DIE. If there is an inconsistency in
2029       the nr of bytes read by get_Form_contents and get_Form_szB, this
2030       should be detected by the below. Using --trace-symtab=yes
2031       --read-var-info=yes will ensure all DIEs are systematically
2032       verified. */
2033    skip_DIE (&check_sibling, &check_skip, abbv, cc);
2034    vg_assert (check_sibling == sibling);
2035    vg_assert (get_position_of_Cursor (&check_skip)
2036               == get_position_of_Cursor (&c));
2037 }
2038 
2039 __attribute__((noreturn))
dump_bad_die_and_barf(const HChar * whichparser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,UWord saved_die_c_offset,const g_abbv * abbv,const CUConst * cc)2040 static void dump_bad_die_and_barf(
2041    const HChar *whichparser,
2042    DW_TAG dtag,
2043    UWord posn,
2044    Int level,
2045    Cursor* c_die,
2046    UWord saved_die_c_offset,
2047    const g_abbv *abbv,
2048    const CUConst* cc)
2049 {
2050    trace_DIE (dtag, posn, level, saved_die_c_offset, abbv, cc);
2051    VG_(printf)("%s:\n", whichparser);
2052    cc->barf("confused by the above DIE");
2053 }
2054 
2055 __attribute__((noinline))
bad_DIE_confusion(int linenr)2056 static void bad_DIE_confusion(int linenr)
2057 {
2058    VG_(printf)("\nparse DIE(readdwarf3.c:%d): confused by:\n", linenr);
2059 }
2060 #define goto_bad_DIE do {bad_DIE_confusion(__LINE__); goto bad_DIE;} while (0)
2061 
2062 __attribute__((noinline))
parse_var_DIE(WordFM * rangestree,XArray * tempvars,XArray * gexprs,D3VarParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,CUConst * cc,Bool td3)2063 static void parse_var_DIE (
2064    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
2065    /*MOD*/XArray* /* of TempVar* */ tempvars,
2066    /*MOD*/XArray* /* of GExpr* */ gexprs,
2067    /*MOD*/D3VarParser* parser,
2068    DW_TAG dtag,
2069    UWord posn,
2070    Int level,
2071    Cursor* c_die,
2072    const g_abbv *abbv,
2073    CUConst* cc,
2074    Bool td3
2075 )
2076 {
2077    FormContents cts;
2078    UInt nf_i;
2079 
2080    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2081 
2082    varstack_preen( parser, td3, level-1 );
2083 
2084    if (dtag == DW_TAG_compile_unit
2085        || dtag == DW_TAG_type_unit
2086        || dtag == DW_TAG_partial_unit) {
2087       Bool have_lo    = False;
2088       Bool have_hi1   = False;
2089       Bool hiIsRelative = False;
2090       Bool have_range = False;
2091       Addr ip_lo    = 0;
2092       Addr ip_hi1   = 0;
2093       Addr rangeoff = 0;
2094       const HChar *compdir = NULL;
2095       nf_i = 0;
2096       while (True) {
2097          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2098          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2099          nf_i++;
2100          if (attr == 0 && form == 0) break;
2101          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2102          if (attr == DW_AT_low_pc && cts.szB > 0) {
2103             ip_lo   = cts.u.val;
2104             have_lo = True;
2105          }
2106          if (attr == DW_AT_high_pc && cts.szB > 0) {
2107             ip_hi1   = cts.u.val;
2108             have_hi1 = True;
2109             if (form != DW_FORM_addr)
2110                hiIsRelative = True;
2111          }
2112          if (attr == DW_AT_ranges && cts.szB > 0) {
2113             rangeoff   = cts.u.val;
2114             have_range = True;
2115          }
2116          if (attr == DW_AT_comp_dir) {
2117             if (cts.szB >= 0)
2118                cc->barf("parse_var_DIE compdir: expecting indirect string");
2119             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2120                                                "parse_var_DIE.compdir" );
2121             compdir = ML_(addStr)(cc->di, str, -1);
2122             ML_(dinfo_free) (str);
2123          }
2124          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2125             read_filename_table( parser->fndn_ix_Table, compdir,
2126                                  cc, cts.u.val, td3 );
2127          }
2128       }
2129       if (have_lo && have_hi1 && hiIsRelative)
2130          ip_hi1 += ip_lo;
2131 
2132       /* Now, does this give us an opportunity to find this
2133          CU's svma? */
2134       if (level == 0)
2135          setup_cu_svma(cc, have_lo, ip_lo, td3);
2136 
2137       /* Do we have something that looks sane? */
2138       if (have_lo && have_hi1 && (!have_range)) {
2139          if (ip_lo < ip_hi1)
2140             varstack_push( cc, parser, td3,
2141                            unitary_range_list(ip_lo, ip_hi1 - 1),
2142                            level,
2143                            False/*isFunc*/, NULL/*fbGX*/ );
2144          else if (ip_lo == 0 && ip_hi1 == 0)
2145             /* CU has no code, presumably?
2146                Such situations have been encountered for code
2147                compiled with -ffunction-sections -fdata-sections
2148                and linked with --gc-sections. Completely
2149                eliminated CU gives such 0 lo/hi pc. Similarly
2150                to a CU which has no lo/hi/range pc, we push
2151                an empty range list. */
2152             varstack_push( cc, parser, td3,
2153                            empty_range_list(),
2154                            level,
2155                            False/*isFunc*/, NULL/*fbGX*/ );
2156       } else
2157       if ((!have_lo) && (!have_hi1) && have_range) {
2158          varstack_push( cc, parser, td3,
2159                         get_range_list( cc, td3,
2160                                         rangeoff, cc->cu_svma ),
2161                         level,
2162                         False/*isFunc*/, NULL/*fbGX*/ );
2163       } else
2164       if ((!have_lo) && (!have_hi1) && (!have_range)) {
2165          /* CU has no code, presumably? */
2166          varstack_push( cc, parser, td3,
2167                         empty_range_list(),
2168                         level,
2169                         False/*isFunc*/, NULL/*fbGX*/ );
2170       } else
2171       if (have_lo && (!have_hi1) && have_range && ip_lo == 0) {
2172          /* broken DIE created by gcc-4.3.X ?  Ignore the
2173             apparently-redundant DW_AT_low_pc and use the DW_AT_ranges
2174             instead. */
2175          varstack_push( cc, parser, td3,
2176                         get_range_list( cc, td3,
2177                                         rangeoff, cc->cu_svma ),
2178                         level,
2179                         False/*isFunc*/, NULL/*fbGX*/ );
2180       } else {
2181          if (0) VG_(printf)("I got hlo %d hhi1 %d hrange %d\n",
2182                             (Int)have_lo, (Int)have_hi1, (Int)have_range);
2183          goto_bad_DIE;
2184       }
2185    }
2186 
2187    if (dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram) {
2188       Bool   have_lo    = False;
2189       Bool   have_hi1   = False;
2190       Bool   have_range = False;
2191       Bool   hiIsRelative = False;
2192       Addr   ip_lo      = 0;
2193       Addr   ip_hi1     = 0;
2194       Addr   rangeoff   = 0;
2195       Bool   isFunc     = dtag == DW_TAG_subprogram;
2196       GExpr* fbGX       = NULL;
2197       nf_i = 0;
2198       while (True) {
2199          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2200          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2201          nf_i++;
2202          if (attr == 0 && form == 0) break;
2203          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2204          if (attr == DW_AT_low_pc && cts.szB > 0) {
2205             ip_lo   = cts.u.val;
2206             have_lo = True;
2207          }
2208          if (attr == DW_AT_high_pc && cts.szB > 0) {
2209             ip_hi1   = cts.u.val;
2210             have_hi1 = True;
2211             if (form != DW_FORM_addr)
2212                hiIsRelative = True;
2213          }
2214          if (attr == DW_AT_ranges && cts.szB > 0) {
2215             rangeoff   = cts.u.val;
2216             have_range = True;
2217          }
2218          if (isFunc
2219              && attr == DW_AT_frame_base
2220              && cts.szB != 0 /* either scalar or nonempty block */) {
2221             fbGX = get_GX( cc, False/*td3*/, &cts );
2222             vg_assert(fbGX);
2223             VG_(addToXA)(gexprs, &fbGX);
2224          }
2225       }
2226       if (have_lo && have_hi1 && hiIsRelative)
2227          ip_hi1 += ip_lo;
2228       /* Do we have something that looks sane? */
2229       if (dtag == DW_TAG_subprogram
2230           && (!have_lo) && (!have_hi1) && (!have_range)) {
2231          /* This is legit - ignore it. Sec 3.3.3: "A subroutine entry
2232             representing a subroutine declaration that is not also a
2233             definition does not have code address or range
2234             attributes." */
2235       } else
2236       if (dtag == DW_TAG_lexical_block
2237           && (!have_lo) && (!have_hi1) && (!have_range)) {
2238          /* I believe this is legit, and means the lexical block
2239             contains no insns (whatever that might mean).  Ignore. */
2240       } else
2241       if (have_lo && have_hi1 && (!have_range)) {
2242          /* This scope supplies just a single address range. */
2243          if (ip_lo < ip_hi1)
2244             varstack_push( cc, parser, td3,
2245                            unitary_range_list(ip_lo, ip_hi1 - 1),
2246                            level, isFunc, fbGX );
2247       } else
2248       if ((!have_lo) && (!have_hi1) && have_range) {
2249          /* This scope supplies multiple address ranges via the use of
2250             a range list. */
2251          varstack_push( cc, parser, td3,
2252                         get_range_list( cc, td3,
2253                                         rangeoff, cc->cu_svma ),
2254                         level, isFunc, fbGX );
2255       } else
2256       if (have_lo && (!have_hi1) && (!have_range)) {
2257          /* This scope is bogus.  The D3 spec sec 3.4 (Lexical Block
2258             Entries) says fairly clearly that a scope must have either
2259             _range or (_low_pc and _high_pc). */
2260          /* The spec is a bit ambiguous though.  Perhaps a single byte
2261             range is intended?  See sec 2.17 (Code Addresses And Ranges) */
2262          /* This case is here because icc9 produced this:
2263          <2><13bd>: DW_TAG_lexical_block
2264             DW_AT_decl_line   : 5229
2265             DW_AT_decl_column : 37
2266             DW_AT_decl_file   : 1
2267             DW_AT_low_pc      : 0x401b03
2268          */
2269          /* Ignore (seems safe than pushing a single byte range) */
2270       } else
2271          goto_bad_DIE;
2272    }
2273 
2274    if (dtag == DW_TAG_variable || dtag == DW_TAG_formal_parameter) {
2275       const  HChar* name = NULL;
2276       UWord  typeR       = D3_INVALID_CUOFF;
2277       Bool   global      = False;
2278       GExpr* gexpr       = NULL;
2279       Int    n_attrs     = 0;
2280       UWord  abs_ori     = (UWord)D3_INVALID_CUOFF;
2281       Int    lineNo      = 0;
2282       UInt   fndn_ix     = 0;
2283       nf_i = 0;
2284       while (True) {
2285          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2286          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2287          nf_i++;
2288          if (attr == 0 && form == 0) break;
2289          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2290          n_attrs++;
2291          if (attr == DW_AT_name && cts.szB < 0) {
2292             name = ML_(addStrFromCursor)( cc->di, cts.u.cur );
2293          }
2294          if (attr == DW_AT_location
2295              && cts.szB != 0 /* either scalar or nonempty block */) {
2296             gexpr = get_GX( cc, False/*td3*/, &cts );
2297             vg_assert(gexpr);
2298             VG_(addToXA)(gexprs, &gexpr);
2299          }
2300          if (attr == DW_AT_type && cts.szB > 0) {
2301             typeR = cook_die_using_form( cc, cts.u.val, form );
2302          }
2303          if (attr == DW_AT_external && cts.szB > 0 && cts.u.val > 0) {
2304             global = True;
2305          }
2306          if (attr == DW_AT_abstract_origin && cts.szB > 0) {
2307             abs_ori = (UWord)cts.u.val;
2308          }
2309          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
2310             /*declaration = True;*/
2311          }
2312          if (attr == DW_AT_decl_line && cts.szB > 0) {
2313             lineNo = (Int)cts.u.val;
2314          }
2315          if (attr == DW_AT_decl_file && cts.szB > 0) {
2316             Int ftabIx = (Int)cts.u.val;
2317             if (ftabIx >= 1
2318                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2319                fndn_ix = *(UInt*)VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2320             }
2321             if (0) VG_(printf)("XXX filename fndn_ix = %u %s\n", fndn_ix,
2322                                ML_(fndn_ix2filename) (cc->di, fndn_ix));
2323          }
2324       }
2325       if (!global && dtag == DW_TAG_variable && level == 1) {
2326          /* Case of a static variable. It is better to declare
2327             it global as the variable is not really related to
2328             a PC range, as its address can be used by program
2329             counters outside of the ranges where it is visible . */
2330          global = True;
2331       }
2332 
2333       /* We'll collect it under if one of the following three
2334          conditions holds:
2335          (1) has location and type    -> completed
2336          (2) has type only            -> is an abstract instance
2337          (3) has location and abs_ori -> is a concrete instance
2338          Name, fndn_ix and line number are all optional frills.
2339       */
2340       if ( /* 1 */ (gexpr && typeR != D3_INVALID_CUOFF)
2341            /* 2 */ || (typeR != D3_INVALID_CUOFF)
2342            /* 3 */ || (gexpr && abs_ori != (UWord)D3_INVALID_CUOFF) ) {
2343 
2344          /* Add this variable to the list of interesting looking
2345             variables.  Crucially, note along with it the address
2346             range(s) associated with the variable, which for locals
2347             will be the address ranges at the top of the varparser's
2348             stack. */
2349          GExpr*   fbGX = NULL;
2350          Word     i, nRanges;
2351          const XArray*  /* of AddrRange */ xa;
2352          TempVar* tv;
2353          /* Stack can't be empty; we put a dummy entry on it for the
2354             entire address range before starting with the DIEs for
2355             this CU. */
2356          vg_assert(parser->sp >= 0);
2357 
2358          /* If this is a local variable (non-global), try to find
2359             the GExpr for the DW_AT_frame_base of the containing
2360             function.  It should have been pushed on the stack at the
2361             time we encountered its DW_TAG_subprogram DIE, so the way
2362             to find it is to scan back down the stack looking for it.
2363             If there isn't an enclosing stack entry marked 'isFunc'
2364             then we must be seeing variable or formal param DIEs
2365             outside of a function, so we deem the Dwarf to be
2366             malformed if that happens.  Note that the fbGX may be NULL
2367             if the containing DT_TAG_subprogram didn't supply a
2368             DW_AT_frame_base -- that's OK, but there must actually be
2369             a containing DW_TAG_subprogram. */
2370          if (!global) {
2371             Bool found = False;
2372             for (i = parser->sp; i >= 0; i--) {
2373                if (parser->isFunc[i]) {
2374                   fbGX = parser->fbGX[i];
2375                   found = True;
2376                   break;
2377                }
2378             }
2379             if (!found) {
2380                if (0 && VG_(clo_verbosity) >= 0) {
2381                   VG_(message)(Vg_DebugMsg,
2382                      "warning: parse_var_DIE: non-global variable "
2383                      "outside DW_TAG_subprogram\n");
2384                }
2385                /* goto_bad_DIE; */
2386                /* This seems to happen a lot.  Just ignore it -- if,
2387                   when we come to evaluation of the location (guarded)
2388                   expression, it requires a frame base value, and
2389                   there's no expression for that, then evaluation as a
2390                   whole will fail.  Harmless - a bit of a waste of
2391                   cycles but nothing more. */
2392             }
2393          }
2394 
2395          /* re "global ? 0 : parser->sp" (twice), if the var is
2396             marked 'global' then we must put it at the global scope,
2397             as only the global scope (level 0) covers the entire PC
2398             address space.  It is asserted elsewhere that level 0
2399             always covers the entire address space. */
2400          xa = parser->ranges[global ? 0 : parser->sp];
2401          nRanges = VG_(sizeXA)(xa);
2402          vg_assert(nRanges >= 0);
2403 
2404          tv = ML_(dinfo_zalloc)( "di.readdwarf3.pvD.1", sizeof(TempVar) );
2405          tv->name   = name;
2406          tv->level  = global ? 0 : parser->sp;
2407          tv->typeR  = typeR;
2408          tv->gexpr  = gexpr;
2409          tv->fbGX   = fbGX;
2410          tv->fndn_ix= fndn_ix;
2411          tv->fLine  = lineNo;
2412          tv->dioff  = posn;
2413          tv->absOri = abs_ori;
2414 
2415          /* See explanation on definition of type TempVar for the
2416             reason for this elaboration. */
2417          tv->nRanges = nRanges;
2418          tv->rngOneMin = 0;
2419          tv->rngOneMax = 0;
2420          tv->rngMany = NULL;
2421          if (nRanges == 1) {
2422             AddrRange* range = VG_(indexXA)(xa, 0);
2423             tv->rngOneMin = range->aMin;
2424             tv->rngOneMax = range->aMax;
2425          }
2426          else if (nRanges > 1) {
2427             /* See if we already have a range list which is
2428                structurally identical.  If so, use that; if not, clone
2429                this one, and add it to our collection. */
2430             UWord keyW, valW;
2431             if (VG_(lookupFM)( rangestree, &keyW, &valW, (UWord)xa )) {
2432                XArray* old = (XArray*)keyW;
2433                vg_assert(valW == 0);
2434                vg_assert(old != xa);
2435                tv->rngMany = old;
2436             } else {
2437                XArray* cloned = VG_(cloneXA)( "di.readdwarf3.pvD.2", xa );
2438                tv->rngMany = cloned;
2439                VG_(addToFM)( rangestree, (UWord)cloned, 0 );
2440             }
2441          }
2442 
2443          VG_(addToXA)( tempvars, &tv );
2444 
2445          TRACE_D3("  Recording this variable, with %ld PC range(s)\n",
2446                   VG_(sizeXA)(xa) );
2447          /* collect stats on how effective the ->ranges special
2448             casing is */
2449          if (0) {
2450             static Int ntot=0, ngt=0;
2451             ntot++;
2452             if (tv->rngMany) ngt++;
2453             if (0 == (ntot % 100000))
2454                VG_(printf)("XXXX %d tot, %d cloned\n", ntot, ngt);
2455          }
2456 
2457       }
2458 
2459       /* Here are some other weird cases seen in the wild:
2460 
2461             We have a variable with a name and a type, but no
2462             location.  I guess that's a sign that it has been
2463             optimised away.  Ignore it.  Here's an example:
2464 
2465             static Int lc_compar(void* n1, void* n2) {
2466                MC_Chunk* mc1 = *(MC_Chunk**)n1;
2467                MC_Chunk* mc2 = *(MC_Chunk**)n2;
2468                return (mc1->data < mc2->data ? -1 : 1);
2469             }
2470 
2471             Both mc1 and mc2 are like this
2472             <2><5bc>: Abbrev Number: 21 (DW_TAG_variable)
2473                 DW_AT_name        : mc1
2474                 DW_AT_decl_file   : 1
2475                 DW_AT_decl_line   : 216
2476                 DW_AT_type        : <5d3>
2477 
2478             whereas n1 and n2 do have locations specified.
2479 
2480             ---------------------------------------------
2481 
2482             We see a DW_TAG_formal_parameter with a type, but
2483             no name and no location.  It's probably part of a function type
2484             construction, thusly, hence ignore it:
2485          <1><2b4>: Abbrev Number: 12 (DW_TAG_subroutine_type)
2486              DW_AT_sibling     : <2c9>
2487              DW_AT_prototyped  : 1
2488              DW_AT_type        : <114>
2489          <2><2be>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2490              DW_AT_type        : <13e>
2491          <2><2c3>: Abbrev Number: 13 (DW_TAG_formal_parameter)
2492              DW_AT_type        : <133>
2493 
2494             ---------------------------------------------
2495 
2496             Is very minimal, like this:
2497             <4><81d>: Abbrev Number: 44 (DW_TAG_variable)
2498                 DW_AT_abstract_origin: <7ba>
2499             What that signifies I have no idea.  Ignore.
2500 
2501             ----------------------------------------------
2502 
2503             Is very minimal, like this:
2504             <200f>: DW_TAG_formal_parameter
2505                 DW_AT_abstract_ori: <1f4c>
2506                 DW_AT_location    : 13440
2507             What that signifies I have no idea.  Ignore.
2508             It might be significant, though: the variable at least
2509             has a location and so might exist somewhere.
2510             Maybe we should handle this.
2511 
2512             ---------------------------------------------
2513 
2514             <22407>: DW_TAG_variable
2515               DW_AT_name        : (indirect string, offset: 0x6579):
2516                                   vgPlain_trampoline_stuff_start
2517               DW_AT_decl_file   : 29
2518               DW_AT_decl_line   : 56
2519               DW_AT_external    : 1
2520               DW_AT_declaration : 1
2521 
2522             Nameless and typeless variable that has a location?  Who
2523             knows.  Not me.
2524             <2><3d178>: Abbrev Number: 22 (DW_TAG_variable)
2525                  DW_AT_location    : 9 byte block: 3 c0 c7 13 38 0 0 0 0
2526                                      (DW_OP_addr: 3813c7c0)
2527 
2528             No, really.  Check it out.  gcc is quite simply borked.
2529             <3><168cc>: Abbrev Number: 141 (DW_TAG_variable)
2530             // followed by no attributes, and the next DIE is a sibling,
2531             // not a child
2532             */
2533    }
2534    return;
2535 
2536   bad_DIE:
2537    dump_bad_die_and_barf("parse_var_DIE", dtag, posn, level,
2538                          c_die, saved_die_c_offset,
2539                          abbv,
2540                          cc);
2541    /*NOTREACHED*/
2542 }
2543 
2544 typedef
2545    struct {
2546       /* The fndn_ix file name/dirname table.  Is a mapping from dwarf
2547          integer index to the index in di->fndnpool. */
2548       XArray* /* of UInt* */ fndn_ix_Table;
2549       UWord sibling; // sibling of the last read DIE (if it has a sibling).
2550    }
2551    D3InlParser;
2552 
2553 /* Return the function name corresponding to absori.
2554 
2555    absori is a 'cooked' reference to a DIE, i.e. absori can be either
2556    in cc->escn_debug_info or in cc->escn_debug_info_alt.
2557    get_inlFnName will uncook absori.
2558 
2559    The returned value is a (permanent) string in DebugInfo's .strchunks.
2560 
2561    LIMITATION: absori must point in the CU of cc. If absori points
2562    in another CU, returns "UnknownInlinedFun".
2563 
2564    Here are the problems to retrieve the fun name if absori is in
2565    another CU:  the DIE reading code cannot properly extract data from
2566    another CU, as the abbv code retrieved in the other CU cannot be
2567    translated in an abbreviation. Reading data from the alternate debug
2568    info also gives problems as the string reference is also in the alternate
2569    file, but when reading the alt DIE, the string form is a 'local' string,
2570    but cannot be read in the current CU, but must be read in the alt CU.
2571    See bug 338803 comment#3 and attachment for a failed attempt to handle
2572    these problems (failed because with the patch, only one alt abbrev hash
2573    table is kept, while we must handle all abbreviations in all CUs
2574    referenced by an absori (being a reference to an alt CU, or a previous
2575    or following CU). */
get_inlFnName(Int absori,const CUConst * cc,Bool td3)2576 static const HChar* get_inlFnName (Int absori, const CUConst* cc, Bool td3)
2577 {
2578    Cursor c;
2579    const g_abbv *abbv;
2580    ULong  atag, abbv_code;
2581    UInt   has_children;
2582    UWord  posn;
2583    Bool type_flag, alt_flag;
2584    const HChar *ret = NULL;
2585    FormContents cts;
2586    UInt nf_i;
2587 
2588    posn = uncook_die( cc, absori, &type_flag, &alt_flag);
2589    if (type_flag)
2590       cc->barf("get_inlFnName: uncooked absori in type debug info");
2591 
2592    /* LIMITATION: check we are in the same CU.
2593       If not, return unknown inlined function name. */
2594    /* if crossing between alt debug info<>normal info
2595           or posn not in the cu range,
2596       then it is in another CU. */
2597    if (alt_flag != cc->is_alt_info
2598        || posn < cc->cu_start_offset
2599        || posn >= cc->cu_start_offset + cc->unit_length) {
2600       static Bool reported = False;
2601       if (!reported && VG_(clo_verbosity) > 1) {
2602          VG_(message)(Vg_DebugMsg,
2603                       "Warning: cross-CU LIMITATION: some inlined fn names\n"
2604                       "might be shown as UnknownInlinedFun\n");
2605          reported = True;
2606       }
2607       TRACE_D3(" <get_inlFnName><%lx>: cross-CU LIMITATION", posn);
2608       return ML_(addStr)(cc->di, "UnknownInlinedFun", -1);
2609    }
2610 
2611    init_Cursor (&c, cc->escn_debug_info, posn, cc->barf,
2612                 "Overrun get_inlFnName absori");
2613 
2614    abbv_code = get_ULEB128( &c );
2615    abbv      = get_abbv ( cc, abbv_code);
2616    atag      = abbv->atag;
2617    TRACE_D3(" <get_inlFnName><%lx>: Abbrev Number: %llu (%s)\n",
2618             posn, abbv_code, ML_(pp_DW_TAG)( atag ) );
2619 
2620    if (atag == 0)
2621       cc->barf("get_inlFnName: invalid zero tag on DIE");
2622 
2623    has_children = abbv->has_children;
2624    if (has_children != DW_children_no && has_children != DW_children_yes)
2625       cc->barf("get_inlFnName: invalid has_children value");
2626 
2627    if (atag != DW_TAG_subprogram)
2628       cc->barf("get_inlFnName: absori not a subprogram");
2629 
2630    nf_i = 0;
2631    while (True) {
2632       DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2633       DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2634       nf_i++;
2635       if (attr == 0 && form == 0) break;
2636       get_Form_contents( &cts, cc, &c, False/*td3*/, form );
2637       if (attr == DW_AT_name) {
2638          HChar *fnname;
2639          if (cts.szB >= 0)
2640             cc->barf("get_inlFnName: expecting indirect string");
2641          fnname = ML_(cur_read_strdup)( cts.u.cur,
2642                                         "get_inlFnName.1" );
2643          ret = ML_(addStr)(cc->di, fnname, -1);
2644          ML_(dinfo_free) (fnname);
2645          break; /* Name found, get out of the loop, as this has priority over
2646                  DW_AT_specification. */
2647       }
2648       if (attr == DW_AT_specification) {
2649          UWord cdie;
2650 
2651          if (cts.szB == 0)
2652             cc->barf("get_inlFnName: AT specification missing");
2653 
2654          /* The recursive call to get_inlFnName will uncook its arg.
2655             So, we need to cook it here, so as to reference the
2656             correct section (e.g. the alt info). */
2657          cdie = cook_die_using_form(cc, (UWord)cts.u.val, form);
2658 
2659          /* hoping that there is no loop */
2660          ret = get_inlFnName (cdie, cc, td3);
2661          /* Unclear if having both DW_AT_specification and DW_AT_name is
2662             possible but in any case, we do not break here.
2663             If we find later on a DW_AT_name, it will override the name found
2664             in the DW_AT_specification.*/
2665       }
2666    }
2667 
2668    if (ret)
2669       return ret;
2670    else {
2671       TRACE_D3("AbsOriFnNameNotFound");
2672       return ML_(addStr)(cc->di, "AbsOriFnNameNotFound", -1);
2673    }
2674 }
2675 
2676 /* Returns True if the (possibly) childrens of the current DIE are interesting
2677    to parse. Returns False otherwise.
2678    If the current DIE has a sibling, the non interesting children can
2679    maybe be skipped (if the DIE has a DW_AT_sibling).  */
2680 __attribute__((noinline))
parse_inl_DIE(D3InlParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,CUConst * cc,Bool td3)2681 static Bool parse_inl_DIE (
2682    /*MOD*/D3InlParser* parser,
2683    DW_TAG dtag,
2684    UWord posn,
2685    Int level,
2686    Cursor* c_die,
2687    const g_abbv *abbv,
2688    CUConst* cc,
2689    Bool td3
2690 )
2691 {
2692    FormContents cts;
2693    UInt nf_i;
2694 
2695    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
2696 
2697    /* Get info about DW_TAG_compile_unit and DW_TAG_partial_unit 'which
2698       in theory could also contain inlined fn calls).  */
2699    if (dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit) {
2700       Bool have_lo    = False;
2701       Addr ip_lo    = 0;
2702       const HChar *compdir = NULL;
2703 
2704       nf_i = 0;
2705       while (True) {
2706          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2707          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2708          nf_i++;
2709          if (attr == 0 && form == 0) break;
2710          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2711          if (attr == DW_AT_low_pc && cts.szB > 0) {
2712             ip_lo   = cts.u.val;
2713             have_lo = True;
2714          }
2715          if (attr == DW_AT_comp_dir) {
2716             if (cts.szB >= 0)
2717                cc->barf("parse_inl_DIE compdir: expecting indirect string");
2718             HChar *str = ML_(cur_read_strdup)( cts.u.cur,
2719                                                "parse_inl_DIE.compdir" );
2720             compdir = ML_(addStr)(cc->di, str, -1);
2721             ML_(dinfo_free) (str);
2722          }
2723          if (attr == DW_AT_stmt_list && cts.szB > 0) {
2724             read_filename_table( parser->fndn_ix_Table, compdir,
2725                                  cc, cts.u.val, td3 );
2726          }
2727          if (attr == DW_AT_sibling && cts.szB > 0) {
2728             parser->sibling = cts.u.val;
2729          }
2730       }
2731       if (level == 0)
2732          setup_cu_svma (cc, have_lo, ip_lo, td3);
2733    }
2734 
2735    if (dtag == DW_TAG_inlined_subroutine) {
2736       Bool   have_lo    = False;
2737       Bool   have_hi1   = False;
2738       Bool   have_range = False;
2739       Bool   hiIsRelative = False;
2740       Addr   ip_lo      = 0;
2741       Addr   ip_hi1     = 0;
2742       Addr   rangeoff   = 0;
2743       UInt   caller_fndn_ix = 0;
2744       Int caller_lineno = 0;
2745       Int inlinedfn_abstract_origin = 0;
2746 
2747       nf_i = 0;
2748       while (True) {
2749          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
2750          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
2751          nf_i++;
2752          if (attr == 0 && form == 0) break;
2753          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
2754          if (attr == DW_AT_call_file && cts.szB > 0) {
2755             Int ftabIx = (Int)cts.u.val;
2756             if (ftabIx >= 1
2757                 && ftabIx < VG_(sizeXA)( parser->fndn_ix_Table )) {
2758                caller_fndn_ix = *(UInt*)
2759                           VG_(indexXA)( parser->fndn_ix_Table, ftabIx );
2760             }
2761             if (0) VG_(printf)("XXX caller_fndn_ix = %u %s\n", caller_fndn_ix,
2762                                ML_(fndn_ix2filename) (cc->di, caller_fndn_ix));
2763          }
2764          if (attr == DW_AT_call_line && cts.szB > 0) {
2765             caller_lineno = cts.u.val;
2766          }
2767 
2768          if (attr == DW_AT_abstract_origin  && cts.szB > 0) {
2769             inlinedfn_abstract_origin
2770                = cook_die_using_form (cc, (UWord)cts.u.val, form);
2771          }
2772 
2773          if (attr == DW_AT_low_pc && cts.szB > 0) {
2774             ip_lo   = cts.u.val;
2775             have_lo = True;
2776          }
2777          if (attr == DW_AT_high_pc && cts.szB > 0) {
2778             ip_hi1   = cts.u.val;
2779             have_hi1 = True;
2780             if (form != DW_FORM_addr)
2781                hiIsRelative = True;
2782          }
2783          if (attr == DW_AT_ranges && cts.szB > 0) {
2784             rangeoff   = cts.u.val;
2785             have_range = True;
2786          }
2787          if (attr == DW_AT_sibling && cts.szB > 0) {
2788             parser->sibling = cts.u.val;
2789          }
2790       }
2791       if (have_lo && have_hi1 && hiIsRelative)
2792          ip_hi1 += ip_lo;
2793       /* Do we have something that looks sane? */
2794       if (dtag == DW_TAG_inlined_subroutine
2795           && (!have_lo) && (!have_hi1) && (!have_range)) {
2796          /* Seems strange. How can an inlined subroutine have
2797             no code ? */
2798          goto_bad_DIE;
2799       } else
2800       if (have_lo && have_hi1 && (!have_range)) {
2801          /* This inlined call is just a single address range. */
2802          if (ip_lo < ip_hi1) {
2803             /* Apply text debug biasing */
2804             ip_lo += cc->di->text_debug_bias;
2805             ip_hi1 += cc->di->text_debug_bias;
2806             ML_(addInlInfo) (cc->di,
2807                              ip_lo, ip_hi1,
2808                              get_inlFnName (inlinedfn_abstract_origin, cc, td3),
2809                              caller_fndn_ix,
2810                              caller_lineno, level);
2811          }
2812       } else if (have_range) {
2813          /* This inlined call is several address ranges. */
2814          XArray *ranges;
2815          Word j;
2816          const HChar *inlfnname =
2817             get_inlFnName (inlinedfn_abstract_origin, cc, td3);
2818 
2819          /* Ranges are biased for the inline info using the same logic
2820             as what is used for biasing ranges for the var info, for which
2821             ranges are read using cc->cu_svma (see parse_var_DIE).
2822             Then text_debug_bias is added when a (non global) var
2823             is recorded (see just before the call to ML_(addVar)) */
2824          ranges = get_range_list( cc, td3,
2825                                   rangeoff, cc->cu_svma );
2826          for (j = 0; j < VG_(sizeXA)( ranges ); j++) {
2827             AddrRange* range = (AddrRange*) VG_(indexXA)( ranges, j );
2828             ML_(addInlInfo) (cc->di,
2829                              range->aMin   + cc->di->text_debug_bias,
2830                              range->aMax+1 + cc->di->text_debug_bias,
2831                              // aMax+1 as range has its last bound included
2832                              // while ML_(addInlInfo) expects last bound not
2833                              // included.
2834                              inlfnname,
2835                              caller_fndn_ix,
2836                              caller_lineno, level);
2837          }
2838          VG_(deleteXA)( ranges );
2839       } else
2840          goto_bad_DIE;
2841    }
2842 
2843    // Only recursively parse the (possible) children for the DIE which
2844    // might maybe contain a DW_TAG_inlined_subroutine:
2845    return dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
2846       || dtag == DW_TAG_inlined_subroutine
2847       || dtag == DW_TAG_compile_unit || dtag == DW_TAG_partial_unit;
2848 
2849   bad_DIE:
2850    dump_bad_die_and_barf("parse_inl_DIE", dtag, posn, level,
2851                          c_die, saved_die_c_offset,
2852                          abbv,
2853                          cc);
2854    /*NOTREACHED*/
2855 }
2856 
2857 
2858 /*------------------------------------------------------------*/
2859 /*---                                                      ---*/
2860 /*--- Parsing of type-related DIEs                         ---*/
2861 /*---                                                      ---*/
2862 /*------------------------------------------------------------*/
2863 
2864 typedef
2865    struct {
2866       /* What source language?  'A'=Ada83/95,
2867                                 'C'=C/C++,
2868                                 'F'=Fortran,
2869                                 '?'=other
2870          Established once per compilation unit. */
2871       UChar language;
2872       /* A stack of types which are currently under construction */
2873       Int   sp; /* [sp] is innermost active entry; sp==-1 for empty
2874                    stack */
2875       Int   stack_size;
2876       /* Note that the TyEnts in qparentE are temporary copies of the
2877          ones accumulating in the main tyent array.  So it is not safe
2878          to free up anything on them when popping them off the stack
2879          (iow, it isn't safe to use TyEnt__make_EMPTY on them).  Just
2880          memset them to zero when done. */
2881       TyEnt *qparentE; /* parent TyEnts */
2882       Int   *qlevel;
2883    }
2884    D3TypeParser;
2885 
2886 /* Completely initialise a type parser object */
2887 static void
type_parser_init(D3TypeParser * parser)2888 type_parser_init ( D3TypeParser *parser )
2889 {
2890    parser->sp = -1;
2891    parser->language = '?';
2892    parser->stack_size = 0;
2893    parser->qparentE = NULL;
2894    parser->qlevel   = NULL;
2895 }
2896 
2897 /* Release any memory hanging off a type parser object */
2898 static void
type_parser_release(D3TypeParser * parser)2899 type_parser_release ( D3TypeParser *parser )
2900 {
2901    ML_(dinfo_free)( parser->qparentE );
2902    ML_(dinfo_free)( parser->qlevel );
2903 }
2904 
typestack_show(const D3TypeParser * parser,const HChar * str)2905 static void typestack_show ( const D3TypeParser* parser, const HChar* str )
2906 {
2907    Word i;
2908    VG_(printf)("  typestack (%s) {\n", str);
2909    for (i = 0; i <= parser->sp; i++) {
2910       VG_(printf)("    [%ld] (level %d): ", i, parser->qlevel[i]);
2911       ML_(pp_TyEnt)( &parser->qparentE[i] );
2912       VG_(printf)("\n");
2913    }
2914    VG_(printf)("  }\n");
2915 }
2916 
2917 /* Remove from the stack, all entries with .level > 'level' */
2918 static
typestack_preen(D3TypeParser * parser,Bool td3,Int level)2919 void typestack_preen ( D3TypeParser* parser, Bool td3, Int level )
2920 {
2921    Bool changed = False;
2922    vg_assert(parser->sp < parser->stack_size);
2923    while (True) {
2924       vg_assert(parser->sp >= -1);
2925       if (parser->sp == -1) break;
2926       if (parser->qlevel[parser->sp] <= level) break;
2927       if (0)
2928          TRACE_D3("BBBBAAAA typestack_pop [newsp=%d]\n", parser->sp-1);
2929       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
2930       parser->sp--;
2931       changed = True;
2932    }
2933    if (changed && td3)
2934       typestack_show( parser, "after preen" );
2935 }
2936 
typestack_is_empty(const D3TypeParser * parser)2937 static Bool typestack_is_empty ( const D3TypeParser* parser )
2938 {
2939    vg_assert(parser->sp >= -1 && parser->sp < parser->stack_size);
2940    return parser->sp == -1;
2941 }
2942 
typestack_push(const CUConst * cc,D3TypeParser * parser,Bool td3,const TyEnt * parentE,Int level)2943 static void typestack_push ( const CUConst* cc,
2944                              D3TypeParser* parser,
2945                              Bool td3,
2946                              const TyEnt* parentE, Int level )
2947 {
2948    if (0)
2949    TRACE_D3("BBBBAAAA typestack_push[newsp=%d]: %d  %05lx\n",
2950             parser->sp+1, level, parentE->cuOff);
2951 
2952    /* First we need to zap everything >= 'level', as we are about to
2953       replace any previous entry at 'level', so .. */
2954    typestack_preen(parser, /*td3*/False, level-1);
2955 
2956    vg_assert(parser->sp >= -1);
2957    vg_assert(parser->sp < parser->stack_size);
2958    if (parser->sp == parser->stack_size - 1) {
2959       parser->stack_size += 16;
2960       parser->qparentE =
2961          ML_(dinfo_realloc)("di.readdwarf3.typush.1", parser->qparentE,
2962                             parser->stack_size * sizeof parser->qparentE[0]);
2963       parser->qlevel =
2964          ML_(dinfo_realloc)("di.readdwarf3.typush.2", parser->qlevel,
2965                             parser->stack_size * sizeof parser->qlevel[0]);
2966    }
2967    if (parser->sp >= 0)
2968       vg_assert(parser->qlevel[parser->sp] < level);
2969    parser->sp++;
2970    vg_assert(parentE);
2971    vg_assert(ML_(TyEnt__is_type)(parentE));
2972    vg_assert(parentE->cuOff != D3_INVALID_CUOFF);
2973    parser->qparentE[parser->sp] = *parentE;
2974    parser->qlevel[parser->sp]  = level;
2975    if (TD3)
2976       typestack_show( parser, "after push" );
2977 }
2978 
2979 /* True if the subrange type being parsed gives the bounds of an array. */
subrange_type_denotes_array_bounds(const D3TypeParser * parser,DW_TAG dtag)2980 static Bool subrange_type_denotes_array_bounds ( const D3TypeParser* parser,
2981                                                  DW_TAG dtag ) {
2982    vg_assert(dtag == DW_TAG_subrange_type);
2983    /* For most languages, a subrange_type dtag always gives the
2984       bounds of an array.
2985       For Ada, there are additional conditions as a subrange_type
2986       is also used for other purposes. */
2987    if (parser->language != 'A')
2988       /* not Ada, so it definitely denotes an array bound. */
2989       return True;
2990    else
2991       /* Extra constraints for Ada: it only denotes an array bound if .. */
2992       return (! typestack_is_empty(parser)
2993               && parser->qparentE[parser->sp].tag == Te_TyArray);
2994 }
2995 
2996 /* Parse a type-related DIE.  'parser' holds the current parser state.
2997    'admin' is where the completed types are dumped.  'dtag' is the tag
2998    for this DIE.  'c_die' points to the start of the data fields (FORM
2999    stuff) for the DIE.  abbv is the parsed abbreviation which describe
3000    the DIE.
3001 
3002    We may find the DIE uninteresting, in which case we should ignore
3003    it.
3004 
3005    What happens: the DIE is examined.  If uninteresting, it is ignored.
3006    Otherwise, the DIE gives rise to two things:
3007 
3008    (1) the offset of this DIE in the CU -- the cuOffset, a UWord
3009    (2) a TyAdmin structure, which holds the type, or related stuff
3010 
3011    (2) is added at the end of 'tyadmins', at some index, say 'i'.
3012 
3013    A pair (cuOffset, i) is added to 'tydict'.
3014 
3015    Hence 'tyadmins' holds the actual type entities, and 'tydict' holds
3016    a mapping from cuOffset to the index of the corresponding entry in
3017    'tyadmin'.
3018 
3019    When resolving a cuOffset to a TyAdmin, first look up the cuOffset
3020    in the tydict (by binary search).  This gives an index into
3021    tyadmins, and the required entity lives in tyadmins at that index.
3022 */
3023 __attribute__((noinline))
parse_type_DIE(XArray * tyents,D3TypeParser * parser,DW_TAG dtag,UWord posn,Int level,Cursor * c_die,const g_abbv * abbv,const CUConst * cc,Bool td3)3024 static void parse_type_DIE ( /*MOD*/XArray* /* of TyEnt */ tyents,
3025                              /*MOD*/D3TypeParser* parser,
3026                              DW_TAG dtag,
3027                              UWord posn,
3028                              Int level,
3029                              Cursor* c_die,
3030                              const g_abbv *abbv,
3031                              const CUConst* cc,
3032                              Bool td3 )
3033 {
3034    FormContents cts;
3035    UInt nf_i;
3036    TyEnt typeE;
3037    TyEnt atomE;
3038    TyEnt fieldE;
3039    TyEnt boundE;
3040 
3041    UWord saved_die_c_offset  = get_position_of_Cursor( c_die );
3042 
3043    VG_(memset)( &typeE,  0xAA, sizeof(typeE) );
3044    VG_(memset)( &atomE,  0xAA, sizeof(atomE) );
3045    VG_(memset)( &fieldE, 0xAA, sizeof(fieldE) );
3046    VG_(memset)( &boundE, 0xAA, sizeof(boundE) );
3047 
3048    /* If we've returned to a level at or above any previously noted
3049       parent, un-note it, so we don't believe we're still collecting
3050       its children. */
3051    typestack_preen( parser, td3, level-1 );
3052 
3053    if (dtag == DW_TAG_compile_unit
3054        || dtag == DW_TAG_type_unit
3055        || dtag == DW_TAG_partial_unit) {
3056       /* See if we can find DW_AT_language, since it is important for
3057          establishing array bounds (see DW_TAG_subrange_type below in
3058          this fn) */
3059       nf_i = 0;
3060       while (True) {
3061          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3062          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3063          nf_i++;
3064          if (attr == 0 && form == 0) break;
3065          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3066          if (attr != DW_AT_language)
3067             continue;
3068          if (cts.szB <= 0)
3069            goto_bad_DIE;
3070          switch (cts.u.val) {
3071             case DW_LANG_C89: case DW_LANG_C:
3072             case DW_LANG_C_plus_plus: case DW_LANG_ObjC:
3073             case DW_LANG_ObjC_plus_plus: case DW_LANG_UPC:
3074             case DW_LANG_Upc: case DW_LANG_C99: case DW_LANG_C11:
3075             case DW_LANG_C_plus_plus_11: case DW_LANG_C_plus_plus_14:
3076                parser->language = 'C'; break;
3077             case DW_LANG_Fortran77: case DW_LANG_Fortran90:
3078             case DW_LANG_Fortran95: case DW_LANG_Fortran03:
3079             case DW_LANG_Fortran08:
3080                parser->language = 'F'; break;
3081             case DW_LANG_Ada83: case DW_LANG_Ada95:
3082                parser->language = 'A'; break;
3083             case DW_LANG_Cobol74:
3084             case DW_LANG_Cobol85: case DW_LANG_Pascal83:
3085             case DW_LANG_Modula2: case DW_LANG_Java:
3086             case DW_LANG_PLI:
3087             case DW_LANG_D: case DW_LANG_Python: case DW_LANG_Go:
3088             case DW_LANG_Mips_Assembler:
3089                parser->language = '?'; break;
3090             default:
3091                goto_bad_DIE;
3092          }
3093       }
3094    }
3095 
3096    if (dtag == DW_TAG_base_type) {
3097       /* We can pick up a new base type any time. */
3098       VG_(memset)(&typeE, 0, sizeof(typeE));
3099       typeE.cuOff = D3_INVALID_CUOFF;
3100       typeE.tag   = Te_TyBase;
3101       nf_i = 0;
3102       while (True) {
3103          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3104          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3105          nf_i++;
3106          if (attr == 0 && form == 0) break;
3107          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3108          if (attr == DW_AT_name && cts.szB < 0) {
3109             typeE.Te.TyBase.name
3110                = ML_(cur_read_strdup)( cts.u.cur,
3111                                        "di.readdwarf3.ptD.base_type.1" );
3112          }
3113          if (attr == DW_AT_byte_size && cts.szB > 0) {
3114             typeE.Te.TyBase.szB = cts.u.val;
3115          }
3116          if (attr == DW_AT_encoding && cts.szB > 0) {
3117             switch (cts.u.val) {
3118                case DW_ATE_unsigned: case DW_ATE_unsigned_char:
3119                case DW_ATE_UTF: /* since DWARF4, e.g. char16_t from C++ */
3120                case DW_ATE_boolean:/* FIXME - is this correct? */
3121                case DW_ATE_unsigned_fixed:
3122                   typeE.Te.TyBase.enc = 'U'; break;
3123                case DW_ATE_signed: case DW_ATE_signed_char:
3124                case DW_ATE_signed_fixed:
3125                   typeE.Te.TyBase.enc = 'S'; break;
3126                case DW_ATE_float:
3127                   typeE.Te.TyBase.enc = 'F'; break;
3128                case DW_ATE_complex_float:
3129                   typeE.Te.TyBase.enc = 'C'; break;
3130                default:
3131                   goto_bad_DIE;
3132             }
3133          }
3134       }
3135 
3136       /* Invent a name if it doesn't have one.  gcc-4.3
3137          -ftree-vectorize is observed to emit nameless base types. */
3138       if (!typeE.Te.TyBase.name)
3139          typeE.Te.TyBase.name
3140             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.base_type.2",
3141                                  "<anon_base_type>" );
3142 
3143       /* Do we have something that looks sane? */
3144       if (/* must have a name */
3145           typeE.Te.TyBase.name == NULL
3146           /* and a plausible size.  Yes, really 32: "complex long
3147              double" apparently has size=32 */
3148           || typeE.Te.TyBase.szB < 0 || typeE.Te.TyBase.szB > 32
3149           /* and a plausible encoding */
3150           || (typeE.Te.TyBase.enc != 'U'
3151               && typeE.Te.TyBase.enc != 'S'
3152               && typeE.Te.TyBase.enc != 'F'
3153               && typeE.Te.TyBase.enc != 'C'))
3154          goto_bad_DIE;
3155       /* Last minute hack: if we see this
3156          <1><515>: DW_TAG_base_type
3157              DW_AT_byte_size   : 0
3158              DW_AT_encoding    : 5
3159              DW_AT_name        : void
3160          convert it into a real Void type. */
3161       if (typeE.Te.TyBase.szB == 0
3162           && 0 == VG_(strcmp)("void", typeE.Te.TyBase.name)) {
3163          ML_(TyEnt__make_EMPTY)(&typeE);
3164          typeE.tag = Te_TyVoid;
3165          typeE.Te.TyVoid.isFake = False; /* it's a real one! */
3166       }
3167 
3168       goto acquire_Type;
3169    }
3170 
3171    /*
3172     * An example of DW_TAG_rvalue_reference_type:
3173     *
3174     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3175     *  <1><1014>: Abbrev Number: 55 (DW_TAG_rvalue_reference_type)
3176     *     <1015>   DW_AT_byte_size   : 4
3177     *     <1016>   DW_AT_type        : <0xe52>
3178     */
3179    if (dtag == DW_TAG_pointer_type || dtag == DW_TAG_reference_type
3180        || dtag == DW_TAG_ptr_to_member_type
3181        || dtag == DW_TAG_rvalue_reference_type) {
3182       /* This seems legit for _pointer_type and _reference_type.  I
3183          don't know if rolling _ptr_to_member_type in here really is
3184          legit, but it's better than not handling it at all. */
3185       VG_(memset)(&typeE, 0, sizeof(typeE));
3186       typeE.cuOff = D3_INVALID_CUOFF;
3187       switch (dtag) {
3188       case DW_TAG_pointer_type:
3189          typeE.tag = Te_TyPtr;
3190          break;
3191       case DW_TAG_reference_type:
3192          typeE.tag = Te_TyRef;
3193          break;
3194       case DW_TAG_ptr_to_member_type:
3195          typeE.tag = Te_TyPtrMbr;
3196          break;
3197       case DW_TAG_rvalue_reference_type:
3198          typeE.tag = Te_TyRvalRef;
3199          break;
3200       default:
3201          vg_assert(False);
3202       }
3203       /* target type defaults to void */
3204       typeE.Te.TyPorR.typeR = D3_FAKEVOID_CUOFF;
3205       /* These four type kinds don't *have* to specify their size, in
3206          which case we assume it's a machine word.  But if they do
3207          specify it, it must be a machine word :-)  This probably
3208          assumes that the word size of the Dwarf3 we're reading is the
3209          same size as that on the machine.  gcc appears to give a size
3210          whereas icc9 doesn't. */
3211       typeE.Te.TyPorR.szB = sizeof(UWord);
3212       nf_i = 0;
3213       while (True) {
3214          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3215          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3216          nf_i++;
3217          if (attr == 0 && form == 0) break;
3218          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3219          if (attr == DW_AT_byte_size && cts.szB > 0) {
3220             typeE.Te.TyPorR.szB = cts.u.val;
3221          }
3222          if (attr == DW_AT_type && cts.szB > 0) {
3223             typeE.Te.TyPorR.typeR
3224                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3225          }
3226       }
3227       /* Do we have something that looks sane? */
3228       if (typeE.Te.TyPorR.szB != sizeof(UWord))
3229          goto_bad_DIE;
3230       else
3231          goto acquire_Type;
3232    }
3233 
3234    if (dtag == DW_TAG_enumeration_type) {
3235       /* Create a new Type to hold the results. */
3236       VG_(memset)(&typeE, 0, sizeof(typeE));
3237       typeE.cuOff = posn;
3238       typeE.tag   = Te_TyEnum;
3239       Bool is_decl = False;
3240       typeE.Te.TyEnum.atomRs
3241          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.enum_type.1",
3242                        ML_(dinfo_free),
3243                        sizeof(UWord) );
3244       nf_i=0;
3245       while (True) {
3246          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3247          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3248          nf_i++;
3249          if (attr == 0 && form == 0) break;
3250          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3251          if (attr == DW_AT_name && cts.szB < 0) {
3252             typeE.Te.TyEnum.name
3253                = ML_(cur_read_strdup)( cts.u.cur,
3254                                        "di.readdwarf3.pTD.enum_type.2" );
3255          }
3256          if (attr == DW_AT_byte_size && cts.szB > 0) {
3257             typeE.Te.TyEnum.szB = cts.u.val;
3258          }
3259          if (attr == DW_AT_declaration) {
3260             is_decl = True;
3261          }
3262       }
3263 
3264       if (!typeE.Te.TyEnum.name)
3265          typeE.Te.TyEnum.name
3266             = ML_(dinfo_strdup)( "di.readdwarf3.pTD.enum_type.3",
3267                                  "<anon_enum_type>" );
3268 
3269       /* Do we have something that looks sane? */
3270       if (typeE.Te.TyEnum.szB == 0
3271           /* we must know the size */
3272           /* but not for Ada, which uses such dummy
3273              enumerations as helper for gdb ada mode.
3274              Also GCC allows incomplete enums as GNU extension.
3275              http://gcc.gnu.org/onlinedocs/gcc/Incomplete-Enums.html
3276              These are marked as DW_AT_declaration and won't have
3277              a size. They can only be used in declaration or as
3278              pointer types.  You can't allocate variables or storage
3279              using such an enum type. (Also GCC seems to have a bug
3280              that will put such an enumeration_type into a .debug_types
3281              unit which should only contain complete types.) */
3282           && (parser->language != 'A' && !is_decl)) {
3283          goto_bad_DIE;
3284       }
3285 
3286       /* On't stack! */
3287       typestack_push( cc, parser, td3, &typeE, level );
3288       goto acquire_Type;
3289    }
3290 
3291    /* gcc (GCC) 4.4.0 20081017 (experimental) occasionally produces
3292       DW_TAG_enumerator with only a DW_AT_name but no
3293       DW_AT_const_value.  This is in violation of the Dwarf3 standard,
3294       and appears to be a new "feature" of gcc - versions 4.3.x and
3295       earlier do not appear to do this.  So accept DW_TAG_enumerator
3296       which only have a name but no value.  An example:
3297 
3298       <1><180>: Abbrev Number: 6 (DW_TAG_enumeration_type)
3299          <181>   DW_AT_name        : (indirect string, offset: 0xda70):
3300                                      QtMsgType
3301          <185>   DW_AT_byte_size   : 4
3302          <186>   DW_AT_decl_file   : 14
3303          <187>   DW_AT_decl_line   : 1480
3304          <189>   DW_AT_sibling     : <0x1a7>
3305       <2><18d>: Abbrev Number: 7 (DW_TAG_enumerator)
3306          <18e>   DW_AT_name        : (indirect string, offset: 0x9e18):
3307                                      QtDebugMsg
3308       <2><192>: Abbrev Number: 7 (DW_TAG_enumerator)
3309          <193>   DW_AT_name        : (indirect string, offset: 0x1505f):
3310                                      QtWarningMsg
3311       <2><197>: Abbrev Number: 7 (DW_TAG_enumerator)
3312          <198>   DW_AT_name        : (indirect string, offset: 0x16f4a):
3313                                      QtCriticalMsg
3314       <2><19c>: Abbrev Number: 7 (DW_TAG_enumerator)
3315          <19d>   DW_AT_name        : (indirect string, offset: 0x156dd):
3316                                      QtFatalMsg
3317       <2><1a1>: Abbrev Number: 7 (DW_TAG_enumerator)
3318          <1a2>   DW_AT_name        : (indirect string, offset: 0x13660):
3319                                      QtSystemMsg
3320    */
3321    if (dtag == DW_TAG_enumerator) {
3322       VG_(memset)( &atomE, 0, sizeof(atomE) );
3323       atomE.cuOff = posn;
3324       atomE.tag   = Te_Atom;
3325       nf_i = 0;
3326       while (True) {
3327          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3328          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3329          nf_i++;
3330          if (attr == 0 && form == 0) break;
3331          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3332          if (attr == DW_AT_name && cts.szB < 0) {
3333             atomE.Te.Atom.name
3334               = ML_(cur_read_strdup)( cts.u.cur,
3335                                       "di.readdwarf3.pTD.enumerator.1" );
3336          }
3337          if (attr == DW_AT_const_value && cts.szB > 0) {
3338             atomE.Te.Atom.value      = cts.u.val;
3339             atomE.Te.Atom.valueKnown = True;
3340          }
3341       }
3342       /* Do we have something that looks sane? */
3343       if (atomE.Te.Atom.name == NULL)
3344          goto_bad_DIE;
3345       /* Do we have a plausible parent? */
3346       if (typestack_is_empty(parser)) goto_bad_DIE;
3347       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3348       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3349       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3350       if (parser->qparentE[parser->sp].tag != Te_TyEnum) goto_bad_DIE;
3351       /* Record this child in the parent */
3352       vg_assert(parser->qparentE[parser->sp].Te.TyEnum.atomRs);
3353       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyEnum.atomRs,
3354                     &atomE );
3355       /* And record the child itself */
3356       goto acquire_Atom;
3357    }
3358 
3359    /* Treat DW_TAG_class_type as if it was a DW_TAG_structure_type.  I
3360       don't know if this is correct, but it at least makes this reader
3361       usable for gcc-4.3 produced Dwarf3. */
3362    if (dtag == DW_TAG_structure_type || dtag == DW_TAG_class_type
3363        || dtag == DW_TAG_union_type) {
3364       Bool have_szB = False;
3365       Bool is_decl  = False;
3366       Bool is_spec  = False;
3367       /* Create a new Type to hold the results. */
3368       VG_(memset)(&typeE, 0, sizeof(typeE));
3369       typeE.cuOff = posn;
3370       typeE.tag   = Te_TyStOrUn;
3371       typeE.Te.TyStOrUn.name = NULL;
3372       typeE.Te.TyStOrUn.typeR = D3_INVALID_CUOFF;
3373       typeE.Te.TyStOrUn.fieldRs
3374          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.pTD.struct_type.1",
3375                        ML_(dinfo_free),
3376                        sizeof(UWord) );
3377       typeE.Te.TyStOrUn.complete = True;
3378       typeE.Te.TyStOrUn.isStruct = dtag == DW_TAG_structure_type
3379                                    || dtag == DW_TAG_class_type;
3380       nf_i = 0;
3381       while (True) {
3382          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3383          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3384          nf_i++;
3385          if (attr == 0 && form == 0) break;
3386          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3387          if (attr == DW_AT_name && cts.szB < 0) {
3388             typeE.Te.TyStOrUn.name
3389                = ML_(cur_read_strdup)( cts.u.cur,
3390                                        "di.readdwarf3.ptD.struct_type.2" );
3391          }
3392          if (attr == DW_AT_byte_size && cts.szB >= 0) {
3393             typeE.Te.TyStOrUn.szB = cts.u.val;
3394             have_szB = True;
3395          }
3396          if (attr == DW_AT_declaration && cts.szB > 0 && cts.u.val > 0) {
3397             is_decl = True;
3398          }
3399          if (attr == DW_AT_specification && cts.szB > 0 && cts.u.val > 0) {
3400             is_spec = True;
3401          }
3402          if (attr == DW_AT_signature && form == DW_FORM_ref_sig8
3403              && cts.szB > 0) {
3404             have_szB = True;
3405             typeE.Te.TyStOrUn.szB = 8;
3406             typeE.Te.TyStOrUn.typeR
3407                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3408          }
3409       }
3410       /* Do we have something that looks sane? */
3411       if (is_decl && (!is_spec)) {
3412          /* It's a DW_AT_declaration.  We require the name but
3413             nothing else. */
3414          /* JRS 2012-06-28: following discussion w/ tromey, if the
3415             type doesn't have name, just make one up, and accept it.
3416             It might be referred to by other DIEs, so ignoring it
3417             doesn't seem like a safe option. */
3418          if (typeE.Te.TyStOrUn.name == NULL)
3419             typeE.Te.TyStOrUn.name
3420                = ML_(dinfo_strdup)( "di.readdwarf3.ptD.struct_type.3",
3421                                     "<anon_struct_type>" );
3422          typeE.Te.TyStOrUn.complete = False;
3423          /* JRS 2009 Aug 10: <possible kludge>? */
3424          /* Push this tyent on the stack, even though it's incomplete.
3425             It appears that gcc-4.4 on Fedora 11 will sometimes create
3426             DW_TAG_member entries for it, and so we need to have a
3427             plausible parent present in order for that to work.  See
3428             #200029 comments 8 and 9. */
3429          typestack_push( cc, parser, td3, &typeE, level );
3430          /* </possible kludge> */
3431          goto acquire_Type;
3432       }
3433       if ((!is_decl) /* && (!is_spec) */) {
3434          /* this is the common, ordinary case */
3435          /* The name can be present, or not */
3436          if (!have_szB) {
3437             /* We must know the size.
3438                But in Ada, record with discriminants might have no size.
3439                But in C, VLA in the middle of a struct (gcc extension)
3440                might have no size.
3441                Instead, some GNAT dwarf extensions and/or dwarf entries
3442                allow to calculate the struct size at runtime.
3443                We cannot do that (yet?) so, the temporary kludge is to use
3444                a small size. */
3445             typeE.Te.TyStOrUn.szB = 1;
3446          }
3447          /* On't stack! */
3448          typestack_push( cc, parser, td3, &typeE, level );
3449          goto acquire_Type;
3450       }
3451       else {
3452          /* don't know how to handle any other variants just now */
3453          goto_bad_DIE;
3454       }
3455    }
3456 
3457    if (dtag == DW_TAG_member) {
3458       /* Acquire member entries for both DW_TAG_structure_type and
3459          DW_TAG_union_type.  They differ minorly, in that struct
3460          members must have a DW_AT_data_member_location expression
3461          whereas union members must not. */
3462       Bool parent_is_struct;
3463       VG_(memset)( &fieldE, 0, sizeof(fieldE) );
3464       fieldE.cuOff = posn;
3465       fieldE.tag   = Te_Field;
3466       fieldE.Te.Field.typeR = D3_INVALID_CUOFF;
3467       nf_i = 0;
3468       while (True) {
3469          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3470          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3471          nf_i++;
3472          if (attr == 0 && form == 0) break;
3473          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3474          if (attr == DW_AT_name && cts.szB < 0) {
3475             fieldE.Te.Field.name
3476                = ML_(cur_read_strdup)( cts.u.cur,
3477                                        "di.readdwarf3.ptD.member.1" );
3478          }
3479          if (attr == DW_AT_type && cts.szB > 0) {
3480             fieldE.Te.Field.typeR
3481                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3482          }
3483          /* There are 2 different cases for DW_AT_data_member_location.
3484             If it is a constant class attribute, it contains byte offset
3485             from the beginning of the containing entity.
3486             Otherwise it is a location expression.  */
3487          if (attr == DW_AT_data_member_location && cts.szB > 0) {
3488             fieldE.Te.Field.nLoc = -1;
3489             fieldE.Te.Field.pos.offset = cts.u.val;
3490          }
3491          if (attr == DW_AT_data_member_location && cts.szB <= 0) {
3492             fieldE.Te.Field.nLoc = (UWord)(-cts.szB);
3493             fieldE.Te.Field.pos.loc
3494                = ML_(cur_read_memdup)( cts.u.cur,
3495                                        (SizeT)fieldE.Te.Field.nLoc,
3496                                        "di.readdwarf3.ptD.member.2" );
3497          }
3498       }
3499       /* Do we have a plausible parent? */
3500       if (typestack_is_empty(parser)) goto_bad_DIE;
3501       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3502       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3503       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3504       if (parser->qparentE[parser->sp].tag != Te_TyStOrUn) goto_bad_DIE;
3505       /* Do we have something that looks sane?  If this a member of a
3506          struct, we must have a location expression; but if a member
3507          of a union that is irrelevant (D3 spec sec 5.6.6).  We ought
3508          to reject in the latter case, but some compilers have been
3509          observed to emit constant-zero expressions.  So just ignore
3510          them. */
3511       parent_is_struct
3512          = parser->qparentE[parser->sp].Te.TyStOrUn.isStruct;
3513       if (!fieldE.Te.Field.name)
3514          fieldE.Te.Field.name
3515             = ML_(dinfo_strdup)( "di.readdwarf3.ptD.member.3",
3516                                  "<anon_field>" );
3517       if (fieldE.Te.Field.typeR == D3_INVALID_CUOFF)
3518          goto_bad_DIE;
3519       if (fieldE.Te.Field.nLoc) {
3520          if (!parent_is_struct) {
3521             /* If this is a union type, pretend we haven't seen the data
3522                member location expression, as it is by definition
3523                redundant (it must be zero). */
3524             if (fieldE.Te.Field.nLoc > 0)
3525                ML_(dinfo_free)(fieldE.Te.Field.pos.loc);
3526             fieldE.Te.Field.pos.loc = NULL;
3527             fieldE.Te.Field.nLoc = 0;
3528          }
3529          /* Record this child in the parent */
3530          fieldE.Te.Field.isStruct = parent_is_struct;
3531          vg_assert(parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs);
3532          VG_(addToXA)( parser->qparentE[parser->sp].Te.TyStOrUn.fieldRs,
3533                        &posn );
3534          /* And record the child itself */
3535          goto acquire_Field;
3536       } else {
3537          /* Member with no location - this can happen with static
3538             const members in C++ code which are compile time constants
3539             that do no exist in the class. They're not of any interest
3540             to us so we ignore them. */
3541          ML_(TyEnt__make_EMPTY)(&fieldE);
3542       }
3543    }
3544 
3545    if (dtag == DW_TAG_array_type) {
3546       VG_(memset)(&typeE, 0, sizeof(typeE));
3547       typeE.cuOff = posn;
3548       typeE.tag   = Te_TyArray;
3549       typeE.Te.TyArray.typeR = D3_INVALID_CUOFF;
3550       typeE.Te.TyArray.boundRs
3551          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ptD.array_type.1",
3552                        ML_(dinfo_free),
3553                        sizeof(UWord) );
3554       nf_i = 0;
3555       while (True) {
3556          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3557          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3558          nf_i++;
3559          if (attr == 0 && form == 0) break;
3560          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3561          if (attr == DW_AT_type && cts.szB > 0) {
3562             typeE.Te.TyArray.typeR
3563                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3564          }
3565       }
3566       if (typeE.Te.TyArray.typeR == D3_INVALID_CUOFF)
3567          goto_bad_DIE;
3568       /* On't stack! */
3569       typestack_push( cc, parser, td3, &typeE, level );
3570       goto acquire_Type;
3571    }
3572 
3573    /* this is a subrange type defining the bounds of an array. */
3574    if (dtag == DW_TAG_subrange_type
3575        && subrange_type_denotes_array_bounds(parser, dtag)) {
3576       Bool have_lower = False;
3577       Bool have_upper = False;
3578       Bool have_count = False;
3579       Long lower = 0;
3580       Long upper = 0;
3581 
3582       switch (parser->language) {
3583          case 'C': have_lower = True;  lower = 0; break;
3584          case 'F': have_lower = True;  lower = 1; break;
3585          case '?': have_lower = False; break;
3586          case 'A': have_lower = False; break;
3587          default:  vg_assert(0); /* assured us by handling of
3588                                     DW_TAG_compile_unit in this fn */
3589       }
3590 
3591       VG_(memset)( &boundE, 0, sizeof(boundE) );
3592       boundE.cuOff = D3_INVALID_CUOFF;
3593       boundE.tag   = Te_Bound;
3594       nf_i = 0;
3595       while (True) {
3596          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3597          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3598          nf_i++;
3599          if (attr == 0 && form == 0) break;
3600          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3601          if (attr == DW_AT_lower_bound && cts.szB > 0) {
3602             lower      = (Long)cts.u.val;
3603             have_lower = True;
3604          }
3605          if (attr == DW_AT_upper_bound && cts.szB > 0) {
3606             upper      = (Long)cts.u.val;
3607             have_upper = True;
3608          }
3609          if (attr == DW_AT_count && cts.szB > 0) {
3610             /*count    = (Long)cts.u.val;*/
3611             have_count = True;
3612          }
3613       }
3614       /* FIXME: potentially skip the rest if no parent present, since
3615          it could be the case that this subrange type is free-standing
3616          (not being used to describe the bounds of a containing array
3617          type) */
3618       /* Do we have a plausible parent? */
3619       if (typestack_is_empty(parser)) goto_bad_DIE;
3620       vg_assert(ML_(TyEnt__is_type)(&parser->qparentE[parser->sp]));
3621       vg_assert(parser->qparentE[parser->sp].cuOff != D3_INVALID_CUOFF);
3622       if (level != parser->qlevel[parser->sp]+1) goto_bad_DIE;
3623       if (parser->qparentE[parser->sp].tag != Te_TyArray) goto_bad_DIE;
3624 
3625       /* Figure out if we have a definite range or not */
3626       if (have_lower && have_upper && (!have_count)) {
3627          boundE.Te.Bound.knownL = True;
3628          boundE.Te.Bound.knownU = True;
3629          boundE.Te.Bound.boundL = lower;
3630          boundE.Te.Bound.boundU = upper;
3631       }
3632       else if (have_lower && (!have_upper) && (!have_count)) {
3633          boundE.Te.Bound.knownL = True;
3634          boundE.Te.Bound.knownU = False;
3635          boundE.Te.Bound.boundL = lower;
3636          boundE.Te.Bound.boundU = 0;
3637       }
3638       else if ((!have_lower) && have_upper && (!have_count)) {
3639          boundE.Te.Bound.knownL = False;
3640          boundE.Te.Bound.knownU = True;
3641          boundE.Te.Bound.boundL = 0;
3642          boundE.Te.Bound.boundU = upper;
3643       }
3644       else if ((!have_lower) && (!have_upper) && (!have_count)) {
3645          boundE.Te.Bound.knownL = False;
3646          boundE.Te.Bound.knownU = False;
3647          boundE.Te.Bound.boundL = 0;
3648          boundE.Te.Bound.boundU = 0;
3649       } else {
3650          /* FIXME: handle more cases */
3651          goto_bad_DIE;
3652       }
3653 
3654       /* Record this bound in the parent */
3655       boundE.cuOff = posn;
3656       vg_assert(parser->qparentE[parser->sp].Te.TyArray.boundRs);
3657       VG_(addToXA)( parser->qparentE[parser->sp].Te.TyArray.boundRs,
3658                     &boundE.cuOff );
3659       /* And record the child itself */
3660       goto acquire_Bound;
3661    }
3662 
3663    /* typedef or subrange_type other than array bounds. */
3664    if (dtag == DW_TAG_typedef
3665        || (dtag == DW_TAG_subrange_type
3666            && !subrange_type_denotes_array_bounds(parser, dtag))) {
3667       /* subrange_type other than array bound is only for Ada. */
3668       vg_assert (dtag == DW_TAG_typedef || parser->language == 'A');
3669       /* We can pick up a new typedef/subrange_type any time. */
3670       VG_(memset)(&typeE, 0, sizeof(typeE));
3671       typeE.cuOff = D3_INVALID_CUOFF;
3672       typeE.tag   = Te_TyTyDef;
3673       typeE.Te.TyTyDef.name = NULL;
3674       typeE.Te.TyTyDef.typeR = D3_INVALID_CUOFF;
3675       nf_i = 0;
3676       while (True) {
3677          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3678          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3679          nf_i++;
3680          if (attr == 0 && form == 0) break;
3681          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3682          if (attr == DW_AT_name && cts.szB < 0) {
3683             typeE.Te.TyTyDef.name
3684                = ML_(cur_read_strdup)( cts.u.cur,
3685                                        "di.readdwarf3.ptD.typedef.1" );
3686          }
3687          if (attr == DW_AT_type && cts.szB > 0) {
3688             typeE.Te.TyTyDef.typeR
3689                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3690          }
3691       }
3692       /* Do we have something that looks sane?
3693          gcc gnat Ada generates minimal typedef
3694          such as the below
3695          <6><91cc>: DW_TAG_typedef
3696             DW_AT_abstract_ori: <9066>
3697          g++ for OMP can generate artificial functions that have
3698          parameters that refer to pointers to unnamed typedefs.
3699          See https://bugs.kde.org/show_bug.cgi?id=273475
3700          So we cannot require a name for a DW_TAG_typedef.
3701       */
3702       goto acquire_Type;
3703    }
3704 
3705    if (dtag == DW_TAG_subroutine_type) {
3706       /* function type? just record that one fact and ask no
3707          further questions. */
3708       VG_(memset)(&typeE, 0, sizeof(typeE));
3709       typeE.cuOff = D3_INVALID_CUOFF;
3710       typeE.tag   = Te_TyFn;
3711       goto acquire_Type;
3712    }
3713 
3714    if (dtag == DW_TAG_volatile_type || dtag == DW_TAG_const_type
3715        || dtag == DW_TAG_restrict_type) {
3716       Int have_ty = 0;
3717       VG_(memset)(&typeE, 0, sizeof(typeE));
3718       typeE.cuOff = D3_INVALID_CUOFF;
3719       typeE.tag   = Te_TyQual;
3720       typeE.Te.TyQual.qual
3721          = (dtag == DW_TAG_volatile_type ? 'V'
3722             : (dtag == DW_TAG_const_type ? 'C' : 'R'));
3723       /* target type defaults to 'void' */
3724       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3725       nf_i = 0;
3726       while (True) {
3727          DW_AT   attr = (DW_AT)  abbv->nf[nf_i].at_name;
3728          DW_FORM form = (DW_FORM)abbv->nf[nf_i].at_form;
3729          nf_i++;
3730          if (attr == 0 && form == 0) break;
3731          get_Form_contents( &cts, cc, c_die, False/*td3*/, form );
3732          if (attr == DW_AT_type && cts.szB > 0) {
3733             typeE.Te.TyQual.typeR
3734                = cook_die_using_form( cc, (UWord)cts.u.val, form );
3735             have_ty++;
3736          }
3737       }
3738       /* gcc sometimes generates DW_TAG_const/volatile_type without
3739          DW_AT_type and GDB appears to interpret the type as 'const
3740          void' (resp. 'volatile void').  So just allow it .. */
3741       if (have_ty == 1 || have_ty == 0)
3742          goto acquire_Type;
3743       else
3744          goto_bad_DIE;
3745    }
3746 
3747    /*
3748     * Treat DW_TAG_unspecified_type as type void. An example of DW_TAG_unspecified_type:
3749     *
3750     * $ readelf --debug-dump /usr/lib/debug/usr/lib/libstdc++.so.6.0.16.debug
3751     *  <1><10d4>: Abbrev Number: 53 (DW_TAG_unspecified_type)
3752     *     <10d5>   DW_AT_name        : (indirect string, offset: 0xdb7): decltype(nullptr)
3753     */
3754    if (dtag == DW_TAG_unspecified_type) {
3755       VG_(memset)(&typeE, 0, sizeof(typeE));
3756       typeE.cuOff           = D3_INVALID_CUOFF;
3757       typeE.tag             = Te_TyQual;
3758       typeE.Te.TyQual.typeR = D3_FAKEVOID_CUOFF;
3759       goto acquire_Type;
3760    }
3761 
3762    /* else ignore this DIE */
3763    return;
3764    /*NOTREACHED*/
3765 
3766   acquire_Type:
3767    if (0) VG_(printf)("YYYY Acquire Type\n");
3768    vg_assert(ML_(TyEnt__is_type)( &typeE ));
3769    vg_assert(typeE.cuOff == D3_INVALID_CUOFF || typeE.cuOff == posn);
3770    typeE.cuOff = posn;
3771    VG_(addToXA)( tyents, &typeE );
3772    return;
3773    /*NOTREACHED*/
3774 
3775   acquire_Atom:
3776    if (0) VG_(printf)("YYYY Acquire Atom\n");
3777    vg_assert(atomE.tag == Te_Atom);
3778    vg_assert(atomE.cuOff == D3_INVALID_CUOFF || atomE.cuOff == posn);
3779    atomE.cuOff = posn;
3780    VG_(addToXA)( tyents, &atomE );
3781    return;
3782    /*NOTREACHED*/
3783 
3784   acquire_Field:
3785    /* For union members, Expr should be absent */
3786    if (0) VG_(printf)("YYYY Acquire Field\n");
3787    vg_assert(fieldE.tag == Te_Field);
3788    vg_assert(fieldE.Te.Field.nLoc <= 0 || fieldE.Te.Field.pos.loc != NULL);
3789    vg_assert(fieldE.Te.Field.nLoc != 0 || fieldE.Te.Field.pos.loc == NULL);
3790    if (fieldE.Te.Field.isStruct) {
3791       vg_assert(fieldE.Te.Field.nLoc != 0);
3792    } else {
3793       vg_assert(fieldE.Te.Field.nLoc == 0);
3794    }
3795    vg_assert(fieldE.cuOff == D3_INVALID_CUOFF || fieldE.cuOff == posn);
3796    fieldE.cuOff = posn;
3797    VG_(addToXA)( tyents, &fieldE );
3798    return;
3799    /*NOTREACHED*/
3800 
3801   acquire_Bound:
3802    if (0) VG_(printf)("YYYY Acquire Bound\n");
3803    vg_assert(boundE.tag == Te_Bound);
3804    vg_assert(boundE.cuOff == D3_INVALID_CUOFF || boundE.cuOff == posn);
3805    boundE.cuOff = posn;
3806    VG_(addToXA)( tyents, &boundE );
3807    return;
3808    /*NOTREACHED*/
3809 
3810   bad_DIE:
3811    dump_bad_die_and_barf("parse_type_DIE", dtag, posn, level,
3812                          c_die, saved_die_c_offset,
3813                          abbv,
3814                          cc);
3815    /*NOTREACHED*/
3816 }
3817 
3818 
3819 /*------------------------------------------------------------*/
3820 /*---                                                      ---*/
3821 /*--- Compression of type DIE information                  ---*/
3822 /*---                                                      ---*/
3823 /*------------------------------------------------------------*/
3824 
chase_cuOff(Bool * changed,const XArray * ents,TyEntIndexCache * ents_cache,UWord cuOff)3825 static UWord chase_cuOff ( Bool* changed,
3826                            const XArray* /* of TyEnt */ ents,
3827                            TyEntIndexCache* ents_cache,
3828                            UWord cuOff )
3829 {
3830    TyEnt* ent;
3831    ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache, cuOff );
3832 
3833    if (!ent) {
3834       VG_(printf)("chase_cuOff: no entry for 0x%05lx\n", cuOff);
3835       *changed = False;
3836       return cuOff;
3837    }
3838 
3839    vg_assert(ent->tag != Te_EMPTY);
3840    if (ent->tag != Te_INDIR) {
3841       *changed = False;
3842       return cuOff;
3843    } else {
3844       vg_assert(ent->Te.INDIR.indR < cuOff);
3845       *changed = True;
3846       return ent->Te.INDIR.indR;
3847    }
3848 }
3849 
3850 static
chase_cuOffs_in_XArray(Bool * changed,const XArray * ents,TyEntIndexCache * ents_cache,XArray * cuOffs)3851 void chase_cuOffs_in_XArray ( Bool* changed,
3852                               const XArray* /* of TyEnt */ ents,
3853                               TyEntIndexCache* ents_cache,
3854                               /*MOD*/XArray* /* of UWord */ cuOffs )
3855 {
3856    Bool b2 = False;
3857    Word i, n = VG_(sizeXA)( cuOffs );
3858    for (i = 0; i < n; i++) {
3859       Bool   b = False;
3860       UWord* p = VG_(indexXA)( cuOffs, i );
3861       *p = chase_cuOff( &b, ents, ents_cache, *p );
3862       if (b)
3863          b2 = True;
3864    }
3865    *changed = b2;
3866 }
3867 
TyEnt__subst_R_fields(const XArray * ents,TyEntIndexCache * ents_cache,TyEnt * te)3868 static Bool TyEnt__subst_R_fields ( const XArray* /* of TyEnt */ ents,
3869                                     TyEntIndexCache* ents_cache,
3870                                     /*MOD*/TyEnt* te )
3871 {
3872    Bool b, changed = False;
3873    switch (te->tag) {
3874       case Te_EMPTY:
3875          break;
3876       case Te_INDIR:
3877          te->Te.INDIR.indR
3878             = chase_cuOff( &b, ents, ents_cache, te->Te.INDIR.indR );
3879          if (b) changed = True;
3880          break;
3881       case Te_UNKNOWN:
3882          break;
3883       case Te_Atom:
3884          break;
3885       case Te_Field:
3886          te->Te.Field.typeR
3887             = chase_cuOff( &b, ents, ents_cache, te->Te.Field.typeR );
3888          if (b) changed = True;
3889          break;
3890       case Te_Bound:
3891          break;
3892       case Te_TyBase:
3893          break;
3894       case Te_TyPtr:
3895       case Te_TyRef:
3896       case Te_TyPtrMbr:
3897       case Te_TyRvalRef:
3898          te->Te.TyPorR.typeR
3899             = chase_cuOff( &b, ents, ents_cache, te->Te.TyPorR.typeR );
3900          if (b) changed = True;
3901          break;
3902       case Te_TyTyDef:
3903          te->Te.TyTyDef.typeR
3904             = chase_cuOff( &b, ents, ents_cache, te->Te.TyTyDef.typeR );
3905          if (b) changed = True;
3906          break;
3907       case Te_TyStOrUn:
3908          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyStOrUn.fieldRs );
3909          if (b) changed = True;
3910          break;
3911       case Te_TyEnum:
3912          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyEnum.atomRs );
3913          if (b) changed = True;
3914          break;
3915       case Te_TyArray:
3916          te->Te.TyArray.typeR
3917             = chase_cuOff( &b, ents, ents_cache, te->Te.TyArray.typeR );
3918          if (b) changed = True;
3919          chase_cuOffs_in_XArray( &b, ents, ents_cache, te->Te.TyArray.boundRs );
3920          if (b) changed = True;
3921          break;
3922       case Te_TyFn:
3923          break;
3924       case Te_TyQual:
3925          te->Te.TyQual.typeR
3926             = chase_cuOff( &b, ents, ents_cache, te->Te.TyQual.typeR );
3927          if (b) changed = True;
3928          break;
3929       case Te_TyVoid:
3930          break;
3931       default:
3932          ML_(pp_TyEnt)(te);
3933          vg_assert(0);
3934    }
3935    return changed;
3936 }
3937 
3938 /* Make a pass over 'ents'.  For each tyent, inspect the target of any
3939    'R' or 'Rs' fields (those which refer to other tyents), and replace
3940    any which point to INDIR nodes with the target of the indirection
3941    (which should not itself be an indirection).  In summary, this
3942    routine shorts out all references to indirection nodes. */
3943 static
dedup_types_substitution_pass(XArray * ents,TyEntIndexCache * ents_cache)3944 Word dedup_types_substitution_pass ( /*MOD*/XArray* /* of TyEnt */ ents,
3945                                      TyEntIndexCache* ents_cache )
3946 {
3947    Word i, n, nChanged = 0;
3948    Bool b;
3949    n = VG_(sizeXA)( ents );
3950    for (i = 0; i < n; i++) {
3951       TyEnt* ent = VG_(indexXA)( ents, i );
3952       vg_assert(ent->tag != Te_EMPTY);
3953       /* We have to substitute everything, even indirections, so as to
3954          ensure that chains of indirections don't build up. */
3955       b = TyEnt__subst_R_fields( ents, ents_cache, ent );
3956       if (b)
3957          nChanged++;
3958    }
3959 
3960    return nChanged;
3961 }
3962 
3963 
3964 /* Make a pass over 'ents', building a dictionary of TyEnts as we go.
3965    Look up each new tyent in the dictionary in turn.  If it is already
3966    in the dictionary, replace this tyent with an indirection to the
3967    existing one, and delete any malloc'd stuff hanging off this one.
3968    In summary, this routine commons up all tyents that are identical
3969    as defined by TyEnt__cmp_by_all_except_cuOff. */
3970 static
dedup_types_commoning_pass(XArray * ents)3971 Word dedup_types_commoning_pass ( /*MOD*/XArray* /* of TyEnt */ ents )
3972 {
3973    Word    n, i, nDeleted;
3974    WordFM* dict; /* TyEnt* -> void */
3975    TyEnt*  ent;
3976    UWord   keyW, valW;
3977 
3978    dict = VG_(newFM)(
3979              ML_(dinfo_zalloc), "di.readdwarf3.dtcp.1",
3980              ML_(dinfo_free),
3981              (Word(*)(UWord,UWord)) ML_(TyEnt__cmp_by_all_except_cuOff)
3982           );
3983 
3984    nDeleted = 0;
3985    n = VG_(sizeXA)( ents );
3986    for (i = 0; i < n; i++) {
3987       ent = VG_(indexXA)( ents, i );
3988       vg_assert(ent->tag != Te_EMPTY);
3989 
3990       /* Ignore indirections, although check that they are
3991          not forming a cycle. */
3992       if (ent->tag == Te_INDIR) {
3993          vg_assert(ent->Te.INDIR.indR < ent->cuOff);
3994          continue;
3995       }
3996 
3997       keyW = valW = 0;
3998       if (VG_(lookupFM)( dict, &keyW, &valW, (UWord)ent )) {
3999          /* it's already in the dictionary. */
4000          TyEnt* old = (TyEnt*)keyW;
4001          vg_assert(valW == 0);
4002          vg_assert(old != ent);
4003          vg_assert(old->tag != Te_INDIR);
4004          /* since we are traversing the array in increasing order of
4005             cuOff: */
4006          vg_assert(old->cuOff < ent->cuOff);
4007          /* So anyway, dump this entry and replace it with an
4008             indirection to the one in the dictionary.  Note that the
4009             assertion above guarantees that we cannot create cycles of
4010             indirections, since we are always creating an indirection
4011             to a tyent with a cuOff lower than this one. */
4012          ML_(TyEnt__make_EMPTY)( ent );
4013          ent->tag = Te_INDIR;
4014          ent->Te.INDIR.indR = old->cuOff;
4015          nDeleted++;
4016       } else {
4017          /* not in dictionary; add it and keep going. */
4018          VG_(addToFM)( dict, (UWord)ent, 0 );
4019       }
4020    }
4021 
4022    VG_(deleteFM)( dict, NULL, NULL );
4023 
4024    return nDeleted;
4025 }
4026 
4027 
4028 static
dedup_types(Bool td3,XArray * ents,TyEntIndexCache * ents_cache)4029 void dedup_types ( Bool td3,
4030                    /*MOD*/XArray* /* of TyEnt */ ents,
4031                    TyEntIndexCache* ents_cache )
4032 {
4033    Word m, n, i, nDel, nSubst, nThresh;
4034    if (0) td3 = True;
4035 
4036    n = VG_(sizeXA)( ents );
4037 
4038    /* If a commoning pass and a substitution pass both make fewer than
4039       this many changes, just stop.  It's pointless to burn up CPU
4040       time trying to compress the last 1% or so out of the array. */
4041    nThresh = n / 200;
4042 
4043    /* First we must sort .ents by its .cuOff fields, so we
4044       can index into it. */
4045    VG_(setCmpFnXA)( ents, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4046    VG_(sortXA)( ents );
4047 
4048    /* Now repeatedly do commoning and substitution passes over
4049       the array, until there are no more changes. */
4050    do {
4051       nDel   = dedup_types_commoning_pass ( ents );
4052       nSubst = dedup_types_substitution_pass ( ents, ents_cache );
4053       vg_assert(nDel >= 0 && nSubst >= 0);
4054       TRACE_D3("   %ld deletions, %ld substitutions\n", nDel, nSubst);
4055    } while (nDel > nThresh || nSubst > nThresh);
4056 
4057    /* Sanity check: all INDIR nodes should point at a non-INDIR thing.
4058       In fact this should be true at the end of every loop iteration
4059       above (a commoning pass followed by a substitution pass), but
4060       checking it on every iteration is excessively expensive.  Note,
4061       this loop also computes 'm' for the stats printing below it. */
4062    m = 0;
4063    n = VG_(sizeXA)( ents );
4064    for (i = 0; i < n; i++) {
4065       TyEnt *ent, *ind;
4066       ent = VG_(indexXA)( ents, i );
4067       if (ent->tag != Te_INDIR) continue;
4068       m++;
4069       ind = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4070                                          ent->Te.INDIR.indR );
4071       vg_assert(ind);
4072       vg_assert(ind->tag != Te_INDIR);
4073    }
4074 
4075    TRACE_D3("Overall: %ld before, %ld after\n", n, n-m);
4076 }
4077 
4078 
4079 /*------------------------------------------------------------*/
4080 /*---                                                      ---*/
4081 /*--- Resolution of references to type DIEs                ---*/
4082 /*---                                                      ---*/
4083 /*------------------------------------------------------------*/
4084 
4085 /* Make a pass through the (temporary) variables array.  Examine the
4086    type of each variable, check is it found, and chase any Te_INDIRs.
4087    Postcondition is: each variable has a typeR field that refers to a
4088    valid type in tyents, or a Te_UNKNOWN, and is certainly guaranteed
4089    not to refer to a Te_INDIR.  (This is so that we can throw all the
4090    Te_INDIRs away later). */
4091 
4092 __attribute__((noinline))
resolve_variable_types(void (* barf)(const HChar *),XArray * ents,TyEntIndexCache * ents_cache,XArray * vars)4093 static void resolve_variable_types (
4094                void (*barf)( const HChar* ) __attribute__((noreturn)),
4095                /*R-O*/XArray* /* of TyEnt */ ents,
4096                /*MOD*/TyEntIndexCache* ents_cache,
4097                /*MOD*/XArray* /* of TempVar* */ vars
4098             )
4099 {
4100    Word i, n;
4101    n = VG_(sizeXA)( vars );
4102    for (i = 0; i < n; i++) {
4103       TempVar* var = *(TempVar**)VG_(indexXA)( vars, i );
4104       /* This is the stated type of the variable.  But it might be
4105          an indirection, so be careful. */
4106       TyEnt* ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4107                                                 var->typeR );
4108       if (ent && ent->tag == Te_INDIR) {
4109          ent = ML_(TyEnts__index_by_cuOff)( ents, ents_cache,
4110                                             ent->Te.INDIR.indR );
4111          vg_assert(ent);
4112          vg_assert(ent->tag != Te_INDIR);
4113       }
4114 
4115       /* Deal first with "normal" cases */
4116       if (ent && ML_(TyEnt__is_type)(ent)) {
4117          var->typeR = ent->cuOff;
4118          continue;
4119       }
4120 
4121       /* If there's no ent, it probably we did not manage to read a
4122          type at the cuOffset which is stated as being this variable's
4123          type.  Maybe a deficiency in parse_type_DIE.  Complain. */
4124       if (ent == NULL) {
4125          VG_(printf)("\n: Invalid cuOff = 0x%05lx\n", var->typeR );
4126          barf("resolve_variable_types: "
4127               "cuOff does not refer to a known type");
4128       }
4129       vg_assert(ent);
4130       /* If ent has any other tag, something bad happened, along the
4131          lines of var->typeR not referring to a type at all. */
4132       vg_assert(ent->tag == Te_UNKNOWN);
4133       /* Just accept it; the type will be useless, but at least keep
4134          going. */
4135       var->typeR = ent->cuOff;
4136    }
4137 }
4138 
4139 
4140 /*------------------------------------------------------------*/
4141 /*---                                                      ---*/
4142 /*--- Parsing of Compilation Units                         ---*/
4143 /*---                                                      ---*/
4144 /*------------------------------------------------------------*/
4145 
cmp_TempVar_by_dioff(const void * v1,const void * v2)4146 static Int cmp_TempVar_by_dioff ( const void* v1, const void* v2 ) {
4147    const TempVar* t1 = *(const TempVar *const *)v1;
4148    const TempVar* t2 = *(const TempVar *const *)v2;
4149    if (t1->dioff < t2->dioff) return -1;
4150    if (t1->dioff > t2->dioff) return 1;
4151    return 0;
4152 }
4153 
read_DIE(WordFM * rangestree,XArray * tyents,XArray * tempvars,XArray * gexprs,D3TypeParser * typarser,D3VarParser * varparser,D3InlParser * inlparser,Cursor * c,Bool td3,CUConst * cc,Int level)4154 static void read_DIE (
4155    /*MOD*/WordFM* /* of (XArray* of AddrRange, void) */ rangestree,
4156    /*MOD*/XArray* /* of TyEnt */ tyents,
4157    /*MOD*/XArray* /* of TempVar* */ tempvars,
4158    /*MOD*/XArray* /* of GExpr* */ gexprs,
4159    /*MOD*/D3TypeParser* typarser,
4160    /*MOD*/D3VarParser* varparser,
4161    /*MOD*/D3InlParser* inlparser,
4162    Cursor* c, Bool td3, CUConst* cc, Int level
4163 )
4164 {
4165    const g_abbv *abbv;
4166    ULong  atag, abbv_code;
4167    UWord  posn;
4168    UInt   has_children;
4169    UWord  start_die_c_offset;
4170    UWord  after_die_c_offset;
4171    // If the DIE we will parse has a sibling and the parser(s) are
4172    // all indicating that parse_children is not necessary, then
4173    // we will skip the children by jumping to the sibling of this DIE
4174    // (if it has a sibling).
4175    UWord  sibling = 0;
4176    Bool   parse_children = False;
4177 
4178    /* --- Deal with this DIE --- */
4179    posn      = cook_die( cc, get_position_of_Cursor( c ) );
4180    abbv_code = get_ULEB128( c );
4181    abbv = get_abbv(cc, abbv_code);
4182    atag      = abbv->atag;
4183 
4184    if (TD3) {
4185       TRACE_D3("\n");
4186       trace_DIE ((DW_TAG)atag, posn, level,
4187                  get_position_of_Cursor( c ), abbv, cc);
4188    }
4189 
4190    if (atag == 0)
4191       cc->barf("read_DIE: invalid zero tag on DIE");
4192 
4193    has_children = abbv->has_children;
4194    if (has_children != DW_children_no && has_children != DW_children_yes)
4195       cc->barf("read_DIE: invalid has_children value");
4196 
4197    /* We're set up to look at the fields of this DIE.  Hand it off to
4198       any parser(s) that want to see it.  Since they will in general
4199       advance the DIE cursor, remember the current settings so that we
4200       can then back up. . */
4201    start_die_c_offset  = get_position_of_Cursor( c );
4202    after_die_c_offset  = 0; // set to c position if a parser has read the DIE.
4203 
4204    if (VG_(clo_read_var_info)) {
4205       parse_type_DIE( tyents,
4206                       typarser,
4207                       (DW_TAG)atag,
4208                       posn,
4209                       level,
4210                       c,     /* DIE cursor */
4211                       abbv,  /* abbrev */
4212                       cc,
4213                       td3 );
4214       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4215          after_die_c_offset = get_position_of_Cursor( c );
4216          set_position_of_Cursor( c, start_die_c_offset );
4217       }
4218 
4219       parse_var_DIE( rangestree,
4220                      tempvars,
4221                      gexprs,
4222                      varparser,
4223                      (DW_TAG)atag,
4224                      posn,
4225                      level,
4226                      c,     /* DIE cursor */
4227                      abbv,  /* abbrev */
4228                      cc,
4229                      td3 );
4230       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4231          after_die_c_offset = get_position_of_Cursor( c );
4232          set_position_of_Cursor( c, start_die_c_offset );
4233       }
4234 
4235       parse_children = True;
4236       // type and var parsers do not have logic to skip childrens and establish
4237       // the value of sibling.
4238    }
4239 
4240    if (VG_(clo_read_inline_info)) {
4241       inlparser->sibling = 0;
4242       parse_children =
4243          parse_inl_DIE( inlparser,
4244                         (DW_TAG)atag,
4245                         posn,
4246                         level,
4247                         c,     /* DIE cursor */
4248                         abbv, /* abbrev */
4249                         cc,
4250                         td3 )
4251          || parse_children;
4252       if (get_position_of_Cursor( c ) != start_die_c_offset) {
4253          after_die_c_offset = get_position_of_Cursor( c );
4254          // Last parser, no need to reset the cursor to start_die_c_offset.
4255       }
4256       if (sibling == 0)
4257          sibling = inlparser->sibling;
4258       vg_assert (inlparser->sibling == 0 || inlparser->sibling == sibling);
4259    }
4260 
4261    if (after_die_c_offset > 0) {
4262       // DIE was read by a parser above, so we know where the DIE ends.
4263       set_position_of_Cursor( c, after_die_c_offset );
4264    } else {
4265       /* No parser has parsed this DIE. So, we need to skip the DIE,
4266          in order to read the next DIE.
4267          At the same time, establish sibling value if the DIE has one. */
4268       TRACE_D3("    uninteresting DIE -> skipping ...\n");
4269       skip_DIE (&sibling, c, abbv, cc);
4270    }
4271 
4272    /* --- Now recurse into its children, if any
4273       and the parsing of the children is requested by a parser --- */
4274    if (has_children == DW_children_yes) {
4275       if (parse_children || sibling == 0) {
4276          if (0) TRACE_D3("BEGIN children of level %d\n", level);
4277          while (True) {
4278             atag = peek_ULEB128( c );
4279             if (atag == 0) break;
4280             read_DIE( rangestree, tyents, tempvars, gexprs,
4281                       typarser, varparser, inlparser,
4282                       c, td3, cc, level+1 );
4283          }
4284          /* Now we need to eat the terminating zero */
4285          atag = get_ULEB128( c );
4286          vg_assert(atag == 0);
4287          if (0) TRACE_D3("END children of level %d\n", level);
4288       } else {
4289          // We can skip the childrens, by jumping to the sibling
4290          TRACE_D3("    SKIPPING DIE's children,"
4291                   "jumping to sibling <%d><%lx>\n",
4292                   level, sibling);
4293          set_position_of_Cursor( c, sibling );
4294       }
4295    }
4296 
4297 }
4298 
trace_debug_loc(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_loc)4299 static void trace_debug_loc (const DebugInfo* di,
4300                              __attribute__((noreturn)) void (*barf)( const HChar* ),
4301                              DiSlice escn_debug_loc)
4302 {
4303 #if 0
4304    /* This doesn't work properly because it assumes all entries are
4305       packed end to end, with no holes.  But that doesn't always
4306       appear to be the case, so it loses sync.  And the D3 spec
4307       doesn't appear to require a no-hole situation either. */
4308    /* Display .debug_loc */
4309    Addr  dl_base;
4310    UWord dl_offset;
4311    Cursor loc; /* for showing .debug_loc */
4312    Bool td3 = di->trace_symtab;
4313 
4314    TRACE_SYMTAB("\n");
4315    TRACE_SYMTAB("\n------ The contents of .debug_loc ------\n");
4316    TRACE_SYMTAB("    Offset   Begin    End      Expression\n");
4317    if (ML_(sli_is_valid)(escn_debug_loc)) {
4318       init_Cursor( &loc, escn_debug_loc, 0, barf,
4319                    "Overrun whilst reading .debug_loc section(1)" );
4320       dl_base = 0;
4321       dl_offset = 0;
4322       while (True) {
4323          UWord  w1, w2;
4324          UWord  len;
4325          if (is_at_end_Cursor( &loc ))
4326             break;
4327 
4328          /* Read a (host-)word pair.  This is something of a hack since
4329             the word size to read is really dictated by the ELF file;
4330             however, we assume we're reading a file with the same
4331             word-sizeness as the host.  Reasonably enough. */
4332          w1 = get_UWord( &loc );
4333          w2 = get_UWord( &loc );
4334 
4335          if (w1 == 0 && w2 == 0) {
4336             /* end of list.  reset 'base' */
4337             TRACE_D3("    %08lx <End of list>\n", dl_offset);
4338             dl_base = 0;
4339             dl_offset = get_position_of_Cursor( &loc );
4340             continue;
4341          }
4342 
4343          if (w1 == -1UL) {
4344             /* new value for 'base' */
4345             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4346                      dl_offset, w1, w2);
4347             dl_base = w2;
4348             continue;
4349          }
4350 
4351          /* else a location expression follows */
4352          TRACE_D3("    %08lx %08lx %08lx ",
4353                   dl_offset, w1 + dl_base, w2 + dl_base);
4354          len = (UWord)get_UShort( &loc );
4355          while (len > 0) {
4356             UChar byte = get_UChar( &loc );
4357             TRACE_D3("%02x", (UInt)byte);
4358             len--;
4359          }
4360          TRACE_SYMTAB("\n");
4361       }
4362    }
4363 #endif
4364 }
4365 
trace_debug_ranges(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_ranges)4366 static void trace_debug_ranges (const DebugInfo* di,
4367                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4368                                 DiSlice escn_debug_ranges)
4369 {
4370    Cursor ranges; /* for showing .debug_ranges */
4371    Addr  dr_base;
4372    UWord dr_offset;
4373    Bool td3 = di->trace_symtab;
4374 
4375    /* Display .debug_ranges */
4376    TRACE_SYMTAB("\n");
4377    TRACE_SYMTAB("\n------ The contents of .debug_ranges ------\n");
4378    TRACE_SYMTAB("    Offset   Begin    End\n");
4379    if (ML_(sli_is_valid)(escn_debug_ranges)) {
4380       init_Cursor( &ranges, escn_debug_ranges, 0, barf,
4381                    "Overrun whilst reading .debug_ranges section(1)" );
4382       dr_base = 0;
4383       dr_offset = 0;
4384       while (True) {
4385          UWord  w1, w2;
4386 
4387          if (is_at_end_Cursor( &ranges ))
4388             break;
4389 
4390          /* Read a (host-)word pair.  This is something of a hack since
4391             the word size to read is really dictated by the ELF file;
4392             however, we assume we're reading a file with the same
4393             word-sizeness as the host.  Reasonably enough. */
4394          w1 = get_UWord( &ranges );
4395          w2 = get_UWord( &ranges );
4396 
4397          if (w1 == 0 && w2 == 0) {
4398             /* end of list.  reset 'base' */
4399             TRACE_D3("    %08lx <End of list>\n", dr_offset);
4400             dr_base = 0;
4401             dr_offset = get_position_of_Cursor( &ranges );
4402             continue;
4403          }
4404 
4405          if (w1 == -1UL) {
4406             /* new value for 'base' */
4407             TRACE_D3("    %08lx %16lx %08lx (base address)\n",
4408                      dr_offset, w1, w2);
4409             dr_base = w2;
4410             continue;
4411          }
4412 
4413          /* else a range [w1+base, w2+base) is denoted */
4414          TRACE_D3("    %08lx %08lx %08lx\n",
4415                   dr_offset, w1 + dr_base, w2 + dr_base);
4416       }
4417    }
4418 }
4419 
trace_debug_abbrev(const DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_abbv)4420 static void trace_debug_abbrev (const DebugInfo* di,
4421                                 __attribute__((noreturn)) void (*barf)( const HChar* ),
4422                                 DiSlice escn_debug_abbv)
4423 {
4424    Cursor abbv; /* for showing .debug_abbrev */
4425    Bool td3 = di->trace_symtab;
4426 
4427    /* Display .debug_abbrev */
4428    TRACE_SYMTAB("\n");
4429    TRACE_SYMTAB("\n------ The contents of .debug_abbrev ------\n");
4430    if (ML_(sli_is_valid)(escn_debug_abbv)) {
4431       init_Cursor( &abbv, escn_debug_abbv, 0, barf,
4432                    "Overrun whilst reading .debug_abbrev section" );
4433       while (True) {
4434          if (is_at_end_Cursor( &abbv ))
4435             break;
4436          /* Read one abbreviation table */
4437          TRACE_D3("  Number TAG\n");
4438          while (True) {
4439             ULong atag;
4440             UInt  has_children;
4441             ULong acode = get_ULEB128( &abbv );
4442             if (acode == 0) break; /* end of the table */
4443             atag = get_ULEB128( &abbv );
4444             has_children = get_UChar( &abbv );
4445             TRACE_D3("   %llu      %s    [%s]\n",
4446                      acode, ML_(pp_DW_TAG)(atag),
4447                             ML_(pp_DW_children)(has_children));
4448             while (True) {
4449                ULong at_name = get_ULEB128( &abbv );
4450                ULong at_form = get_ULEB128( &abbv );
4451                if (at_name == 0 && at_form == 0) break;
4452                TRACE_D3("    %-18s %s\n",
4453                         ML_(pp_DW_AT)(at_name), ML_(pp_DW_FORM)(at_form));
4454             }
4455          }
4456       }
4457    }
4458 }
4459 
4460 static
new_dwarf3_reader_wrk(DebugInfo * di,void (* barf)(const HChar *),DiSlice escn_debug_info,DiSlice escn_debug_types,DiSlice escn_debug_abbv,DiSlice escn_debug_line,DiSlice escn_debug_str,DiSlice escn_debug_ranges,DiSlice escn_debug_loc,DiSlice escn_debug_info_alt,DiSlice escn_debug_abbv_alt,DiSlice escn_debug_line_alt,DiSlice escn_debug_str_alt)4461 void new_dwarf3_reader_wrk (
4462    DebugInfo* di,
4463    __attribute__((noreturn)) void (*barf)( const HChar* ),
4464    DiSlice escn_debug_info,      DiSlice escn_debug_types,
4465    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
4466    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
4467    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
4468    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
4469    DiSlice escn_debug_str_alt
4470 )
4471 {
4472    XArray* /* of TyEnt */     tyents = NULL;
4473    XArray* /* of TyEnt */     tyents_to_keep = NULL;
4474    XArray* /* of GExpr* */    gexprs = NULL;
4475    XArray* /* of TempVar* */  tempvars = NULL;
4476    WordFM* /* of (XArray* of AddrRange, void) */ rangestree = NULL;
4477    TyEntIndexCache* tyents_cache = NULL;
4478    TyEntIndexCache* tyents_to_keep_cache = NULL;
4479    TempVar *varp, *varp2;
4480    GExpr* gexpr;
4481    Cursor info; /* primary cursor for parsing .debug_info */
4482    D3TypeParser typarser;
4483    D3VarParser varparser;
4484    D3InlParser inlparser;
4485    Word  i, j, n;
4486    Bool td3 = di->trace_symtab;
4487    XArray* /* of TempVar* */ dioff_lookup_tab;
4488    Int pass;
4489    VgHashTable *signature_types = NULL;
4490 
4491    /* Display/trace various information, if requested. */
4492    if (TD3) {
4493       trace_debug_loc    (di, barf, escn_debug_loc);
4494       trace_debug_ranges (di, barf, escn_debug_ranges);
4495       trace_debug_abbrev (di, barf, escn_debug_abbv);
4496       TRACE_SYMTAB("\n");
4497    }
4498 
4499    /* Zero out all parsers. Parsers will really be initialised
4500       according to VG_(clo_read_*_info). */
4501    VG_(memset)( &inlparser, 0, sizeof(inlparser) );
4502 
4503    if (VG_(clo_read_var_info)) {
4504       /* We'll park the harvested type information in here.  Also create
4505          a fake "void" entry with offset D3_FAKEVOID_CUOFF, so we always
4506          have at least one type entry to refer to.  D3_FAKEVOID_CUOFF is
4507          huge and presumably will not occur in any valid DWARF3 file --
4508          it would need to have a .debug_info section 4GB long for that to
4509          happen.  These type entries end up in the DebugInfo. */
4510       tyents = VG_(newXA)( ML_(dinfo_zalloc),
4511                            "di.readdwarf3.ndrw.1 (TyEnt temp array)",
4512                            ML_(dinfo_free), sizeof(TyEnt) );
4513       { TyEnt tyent;
4514         VG_(memset)(&tyent, 0, sizeof(tyent));
4515         tyent.tag   = Te_TyVoid;
4516         tyent.cuOff = D3_FAKEVOID_CUOFF;
4517         tyent.Te.TyVoid.isFake = True;
4518         VG_(addToXA)( tyents, &tyent );
4519       }
4520       { TyEnt tyent;
4521         VG_(memset)(&tyent, 0, sizeof(tyent));
4522         tyent.tag   = Te_UNKNOWN;
4523         tyent.cuOff = D3_INVALID_CUOFF;
4524         VG_(addToXA)( tyents, &tyent );
4525       }
4526 
4527       /* This is a tree used to unique-ify the range lists that are
4528          manufactured by parse_var_DIE.  References to the keys in the
4529          tree wind up in .rngMany fields in TempVars.  We'll need to
4530          delete this tree, and the XArrays attached to it, at the end of
4531          this function. */
4532       rangestree = VG_(newFM)( ML_(dinfo_zalloc),
4533                                "di.readdwarf3.ndrw.2 (rangestree)",
4534                                ML_(dinfo_free),
4535                                (Word(*)(UWord,UWord))cmp__XArrays_of_AddrRange );
4536 
4537       /* List of variables we're accumulating.  These don't end up in the
4538          DebugInfo; instead their contents are handed to ML_(addVar) and
4539          the list elements are then deleted. */
4540       tempvars = VG_(newXA)( ML_(dinfo_zalloc),
4541                              "di.readdwarf3.ndrw.3 (TempVar*s array)",
4542                              ML_(dinfo_free),
4543                              sizeof(TempVar*) );
4544 
4545       /* List of GExprs we're accumulating.  These wind up in the
4546          DebugInfo. */
4547       gexprs = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.4",
4548                            ML_(dinfo_free), sizeof(GExpr*) );
4549 
4550       /* We need a D3TypeParser to keep track of partially constructed
4551          types.  It'll be discarded as soon as we've completed the CU,
4552          since the resulting information is tipped in to 'tyents' as it
4553          is generated. */
4554       type_parser_init(&typarser);
4555 
4556       var_parser_init(&varparser);
4557 
4558       signature_types = VG_(HT_construct) ("signature_types");
4559    }
4560 
4561    /* Do an initial pass to scan the .debug_types section, if any, and
4562       fill in the signatured types hash table.  This lets us handle
4563       mapping from a type signature to a (cooked) DIE offset directly
4564       in get_Form_contents.  */
4565    if (VG_(clo_read_var_info) && ML_(sli_is_valid)(escn_debug_types)) {
4566       init_Cursor( &info, escn_debug_types, 0, barf,
4567                    "Overrun whilst reading .debug_types section" );
4568       TRACE_D3("\n------ Collecting signatures from "
4569                ".debug_types section ------\n");
4570 
4571       while (True) {
4572          UWord   cu_start_offset, cu_offset_now;
4573          CUConst cc;
4574 
4575          cu_start_offset = get_position_of_Cursor( &info );
4576          TRACE_D3("\n");
4577          TRACE_D3("  Compilation Unit @ offset 0x%lx:\n", cu_start_offset);
4578          /* parse_CU_header initialises the CU's abbv hash table.  */
4579          parse_CU_Header( &cc, td3, &info, escn_debug_abbv, True, False );
4580 
4581          /* Needed by cook_die.  */
4582          cc.types_cuOff_bias = escn_debug_info.szB;
4583 
4584          record_signatured_type( signature_types, cc.type_signature,
4585                                  cook_die( &cc, cc.type_offset ));
4586 
4587          /* Until proven otherwise we assume we don't need the icc9
4588             workaround in this case; see the DIE-reading loop below
4589             for details.  */
4590          cu_offset_now = (cu_start_offset + cc.unit_length
4591                           + (cc.is_dw64 ? 12 : 4));
4592 
4593          clear_CUConst ( &cc);
4594 
4595          if (cu_offset_now >= escn_debug_types.szB) {
4596             break;
4597          }
4598 
4599          set_position_of_Cursor ( &info, cu_offset_now );
4600       }
4601    }
4602 
4603    /* Perform three DIE-reading passes.  The first pass reads DIEs from
4604       alternate .debug_info (if any), the second pass reads DIEs from
4605       .debug_info, and the third pass reads DIEs from .debug_types.
4606       Moving the body of this loop into a separate function would
4607       require a large number of arguments to be passed in, so it is
4608       kept inline instead.  */
4609    for (pass = 0; pass < 3; ++pass) {
4610       ULong section_size;
4611 
4612       if (pass == 0) {
4613          if (!ML_(sli_is_valid)(escn_debug_info_alt))
4614 	    continue;
4615          /* Now loop over the Compilation Units listed in the alternate
4616             .debug_info section (see D3SPEC sec 7.5) paras 1 and 2.
4617             Each compilation unit contains a Compilation Unit Header
4618             followed by precisely one DW_TAG_compile_unit or
4619             DW_TAG_partial_unit DIE. */
4620          init_Cursor( &info, escn_debug_info_alt, 0, barf,
4621                       "Overrun whilst reading alternate .debug_info section" );
4622          section_size = escn_debug_info_alt.szB;
4623 
4624          TRACE_D3("\n------ Parsing alternate .debug_info section ------\n");
4625       } else if (pass == 1) {
4626          /* Now loop over the Compilation Units listed in the .debug_info
4627             section (see D3SPEC sec 7.5) paras 1 and 2.  Each compilation
4628             unit contains a Compilation Unit Header followed by precisely
4629             one DW_TAG_compile_unit or DW_TAG_partial_unit DIE. */
4630          init_Cursor( &info, escn_debug_info, 0, barf,
4631                       "Overrun whilst reading .debug_info section" );
4632          section_size = escn_debug_info.szB;
4633 
4634          TRACE_D3("\n------ Parsing .debug_info section ------\n");
4635       } else {
4636          if (!ML_(sli_is_valid)(escn_debug_types))
4637             continue;
4638          if (!VG_(clo_read_var_info))
4639             continue; // Types not needed when only reading inline info.
4640          init_Cursor( &info, escn_debug_types, 0, barf,
4641                       "Overrun whilst reading .debug_types section" );
4642          section_size = escn_debug_types.szB;
4643 
4644          TRACE_D3("\n------ Parsing .debug_types section ------\n");
4645       }
4646 
4647       while (True) {
4648          ULong   cu_start_offset, cu_offset_now;
4649          CUConst cc;
4650          /* It may be that the stated size of this CU is larger than the
4651             amount of stuff actually in it.  icc9 seems to generate CUs
4652             thusly.  We use these variables to figure out if this is
4653             indeed the case, and if so how many bytes we need to skip to
4654             get to the start of the next CU.  Not skipping those bytes
4655             causes us to misidentify the start of the next CU, and it all
4656             goes badly wrong after that (not surprisingly). */
4657          UWord cu_size_including_IniLen, cu_amount_used;
4658 
4659          /* It seems icc9 finishes the DIE info before debug_info_sz
4660             bytes have been used up.  So be flexible, and declare the
4661             sequence complete if there is not enough remaining bytes to
4662             hold even the smallest conceivable CU header.  (11 bytes I
4663             reckon). */
4664          /* JRS 23Jan09: I suspect this is no longer necessary now that
4665             the code below contains a 'while (cu_amount_used <
4666             cu_size_including_IniLen ...'  style loop, which skips over
4667             any leftover bytes at the end of a CU in the case where the
4668             CU's stated size is larger than its actual size (as
4669             determined by reading all its DIEs).  However, for prudence,
4670             I'll leave the following test in place.  I can't see that a
4671             CU header can be smaller than 11 bytes, so I don't think
4672             there's any harm possible through the test -- it just adds
4673             robustness. */
4674          Word avail = get_remaining_length_Cursor( &info );
4675          if (avail < 11) {
4676             if (avail > 0)
4677                TRACE_D3("new_dwarf3_reader_wrk: warning: "
4678                         "%ld unused bytes after end of DIEs\n", avail);
4679             break;
4680          }
4681 
4682          if (VG_(clo_read_var_info)) {
4683             /* Check the varparser's stack is in a sane state. */
4684             vg_assert(varparser.sp == -1);
4685             /* Check the typarser's stack is in a sane state. */
4686             vg_assert(typarser.sp == -1);
4687          }
4688 
4689          cu_start_offset = get_position_of_Cursor( &info );
4690          TRACE_D3("\n");
4691          TRACE_D3("  Compilation Unit @ offset 0x%llx:\n", cu_start_offset);
4692          /* parse_CU_header initialises the CU's hashtable of abbvs ht_abbvs */
4693          if (pass == 0) {
4694             parse_CU_Header( &cc, td3, &info, escn_debug_abbv_alt,
4695                              False, True );
4696          } else {
4697             parse_CU_Header( &cc, td3, &info, escn_debug_abbv,
4698                              pass == 2, False );
4699          }
4700          cc.escn_debug_str      = pass == 0 ? escn_debug_str_alt
4701                                             : escn_debug_str;
4702          cc.escn_debug_ranges   = escn_debug_ranges;
4703          cc.escn_debug_loc      = escn_debug_loc;
4704          cc.escn_debug_line     = pass == 0 ? escn_debug_line_alt
4705                                             : escn_debug_line;
4706          cc.escn_debug_info     = pass == 0 ? escn_debug_info_alt
4707                                             : escn_debug_info;
4708          cc.escn_debug_types    = escn_debug_types;
4709          cc.escn_debug_info_alt = escn_debug_info_alt;
4710          cc.escn_debug_str_alt  = escn_debug_str_alt;
4711          cc.types_cuOff_bias    = escn_debug_info.szB;
4712          cc.alt_cuOff_bias      = escn_debug_info.szB + escn_debug_types.szB;
4713          cc.cu_start_offset     = cu_start_offset;
4714          cc.di = di;
4715          /* The CU's svma can be deduced by looking at the AT_low_pc
4716             value in the top level TAG_compile_unit, which is the topmost
4717             DIE.  We'll leave it for the 'varparser' to acquire that info
4718             and fill it in -- since it is the only party to want to know
4719             it. */
4720          cc.cu_svma_known = False;
4721          cc.cu_svma       = 0;
4722 
4723          if (VG_(clo_read_var_info)) {
4724             cc.signature_types = signature_types;
4725 
4726             /* Create a fake outermost-level range covering the entire
4727                address range.  So we always have *something* to catch all
4728                variable declarations. */
4729             varstack_push( &cc, &varparser, td3,
4730                            unitary_range_list(0UL, ~0UL),
4731                            -1, False/*isFunc*/, NULL/*fbGX*/ );
4732 
4733             /* And set up the fndn_ix_Table.  When we come across the top
4734                level DIE for this CU (which is what the next call to
4735                read_DIE should process) we will copy all the file names out
4736                of the .debug_line img area and use this table to look up the
4737                copies when we later see filename numbers in DW_TAG_variables
4738                etc. */
4739             vg_assert(!varparser.fndn_ix_Table );
4740             varparser.fndn_ix_Table
4741                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5var",
4742                              ML_(dinfo_free),
4743                              sizeof(UInt) );
4744          }
4745 
4746          if (VG_(clo_read_inline_info)) {
4747             /* fndn_ix_Table for the inlined call parser */
4748             vg_assert(!inlparser.fndn_ix_Table );
4749             inlparser.fndn_ix_Table
4750                = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.5inl",
4751                              ML_(dinfo_free),
4752                              sizeof(UInt) );
4753          }
4754 
4755          /* Now read the one-and-only top-level DIE for this CU. */
4756          vg_assert(!VG_(clo_read_var_info) || varparser.sp == 0);
4757          read_DIE( rangestree,
4758                    tyents, tempvars, gexprs,
4759                    &typarser, &varparser, &inlparser,
4760                    &info, td3, &cc, 0 );
4761 
4762          cu_offset_now = get_position_of_Cursor( &info );
4763 
4764          if (0) VG_(printf)("Travelled: %llu  size %llu\n",
4765                             cu_offset_now - cc.cu_start_offset,
4766                             cc.unit_length + (cc.is_dw64 ? 12 : 4));
4767 
4768          /* How big the CU claims it is .. */
4769          cu_size_including_IniLen = cc.unit_length + (cc.is_dw64 ? 12 : 4);
4770          /* .. vs how big we have found it to be */
4771          cu_amount_used = cu_offset_now - cc.cu_start_offset;
4772 
4773          if (1) TRACE_D3("offset now %llu, d-i-size %llu\n",
4774                          cu_offset_now, section_size);
4775          if (cu_offset_now > section_size)
4776             barf("toplevel DIEs beyond end of CU");
4777 
4778          /* If the CU is bigger than it claims to be, we've got a serious
4779             problem. */
4780          if (cu_amount_used > cu_size_including_IniLen)
4781             barf("CU's actual size appears to be larger than it claims it is");
4782 
4783          /* If the CU is smaller than it claims to be, we need to skip some
4784             bytes.  Loop updates cu_offset_new and cu_amount_used. */
4785          while (cu_amount_used < cu_size_including_IniLen
4786                 && get_remaining_length_Cursor( &info ) > 0) {
4787             if (0) VG_(printf)("SKIP\n");
4788             (void)get_UChar( &info );
4789             cu_offset_now = get_position_of_Cursor( &info );
4790             cu_amount_used = cu_offset_now - cc.cu_start_offset;
4791          }
4792 
4793          if (VG_(clo_read_var_info)) {
4794             /* Preen to level -2.  DIEs have level >= 0 so -2 cannot occur
4795                anywhere else at all.  Our fake the-entire-address-space
4796                range is at level -1, so preening to -2 should completely
4797                empty the stack out. */
4798             TRACE_D3("\n");
4799             varstack_preen( &varparser, td3, -2 );
4800             /* Similarly, empty the type stack out. */
4801             typestack_preen( &typarser, td3, -2 );
4802          }
4803 
4804          if (VG_(clo_read_var_info)) {
4805             vg_assert(varparser.fndn_ix_Table );
4806             VG_(deleteXA)( varparser.fndn_ix_Table );
4807             varparser.fndn_ix_Table = NULL;
4808          }
4809          if (VG_(clo_read_inline_info)) {
4810             vg_assert(inlparser.fndn_ix_Table );
4811             VG_(deleteXA)( inlparser.fndn_ix_Table );
4812             inlparser.fndn_ix_Table = NULL;
4813          }
4814          clear_CUConst(&cc);
4815 
4816          if (cu_offset_now == section_size)
4817             break;
4818          /* else keep going */
4819       }
4820    }
4821 
4822 
4823    if (VG_(clo_read_var_info)) {
4824       /* From here on we're post-processing the stuff we got
4825          out of the .debug_info section. */
4826       if (TD3) {
4827          TRACE_D3("\n");
4828          ML_(pp_TyEnts)(tyents, "Initial type entity (TyEnt) array");
4829          TRACE_D3("\n");
4830          TRACE_D3("------ Compressing type entries ------\n");
4831       }
4832 
4833       tyents_cache = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.6",
4834                                         sizeof(TyEntIndexCache) );
4835       ML_(TyEntIndexCache__invalidate)( tyents_cache );
4836       dedup_types( td3, tyents, tyents_cache );
4837       if (TD3) {
4838          TRACE_D3("\n");
4839          ML_(pp_TyEnts)(tyents, "After type entity (TyEnt) compression");
4840       }
4841 
4842       TRACE_D3("\n");
4843       TRACE_D3("------ Resolving the types of variables ------\n" );
4844       resolve_variable_types( barf, tyents, tyents_cache, tempvars );
4845 
4846       /* Copy all the non-INDIR tyents into a new table.  For large
4847          .so's, about 90% of the tyents will by now have been resolved to
4848          INDIRs, and we no longer need them, and so don't need to store
4849          them. */
4850       tyents_to_keep
4851          = VG_(newXA)( ML_(dinfo_zalloc),
4852                        "di.readdwarf3.ndrw.7 (TyEnt to-keep array)",
4853                        ML_(dinfo_free), sizeof(TyEnt) );
4854       n = VG_(sizeXA)( tyents );
4855       for (i = 0; i < n; i++) {
4856          TyEnt* ent = VG_(indexXA)( tyents, i );
4857          if (ent->tag != Te_INDIR)
4858             VG_(addToXA)( tyents_to_keep, ent );
4859       }
4860 
4861       VG_(deleteXA)( tyents );
4862       tyents = NULL;
4863       ML_(dinfo_free)( tyents_cache );
4864       tyents_cache = NULL;
4865 
4866       /* Sort tyents_to_keep so we can lookup in it.  A complete (if
4867          minor) waste of time, since tyents itself is sorted, but
4868          necessary since VG_(lookupXA) refuses to cooperate if we
4869          don't. */
4870       VG_(setCmpFnXA)( tyents_to_keep, (XACmpFn_t) ML_(TyEnt__cmp_by_cuOff_only) );
4871       VG_(sortXA)( tyents_to_keep );
4872 
4873       /* Enable cacheing on tyents_to_keep */
4874       tyents_to_keep_cache
4875          = ML_(dinfo_zalloc)( "di.readdwarf3.ndrw.8",
4876                               sizeof(TyEntIndexCache) );
4877       ML_(TyEntIndexCache__invalidate)( tyents_to_keep_cache );
4878 
4879       /* And record the tyents in the DebugInfo.  We do this before
4880          starting to hand variables to ML_(addVar), since if ML_(addVar)
4881          wants to do debug printing (of the types of said vars) then it
4882          will need the tyents.*/
4883       vg_assert(!di->admin_tyents);
4884       di->admin_tyents = tyents_to_keep;
4885 
4886       /* Bias all the location expressions. */
4887       TRACE_D3("\n");
4888       TRACE_D3("------ Biasing the location expressions ------\n" );
4889 
4890       n = VG_(sizeXA)( gexprs );
4891       for (i = 0; i < n; i++) {
4892          gexpr = *(GExpr**)VG_(indexXA)( gexprs, i );
4893          bias_GX( gexpr, di );
4894       }
4895 
4896       TRACE_D3("\n");
4897       TRACE_D3("------ Acquired the following variables: ------\n\n");
4898 
4899       /* Park (pointers to) all the vars in an XArray, so we can look up
4900          abstract origins quickly.  The array is sorted (hence, looked-up
4901          by) the .dioff fields.  Since the .dioffs should be in strictly
4902          ascending order, there is no need to sort the array after
4903          construction.  The ascendingness is however asserted for. */
4904       dioff_lookup_tab
4905          = VG_(newXA)( ML_(dinfo_zalloc), "di.readdwarf3.ndrw.9",
4906                        ML_(dinfo_free),
4907                        sizeof(TempVar*) );
4908 
4909       n = VG_(sizeXA)( tempvars );
4910       Word first_primary_var = 0;
4911       for (first_primary_var = 0;
4912            escn_debug_info_alt.szB/*really?*/ && first_primary_var < n;
4913            first_primary_var++) {
4914          varp = *(TempVar**)VG_(indexXA)( tempvars, first_primary_var );
4915          if (varp->dioff < escn_debug_info.szB + escn_debug_types.szB)
4916             break;
4917       }
4918       for (i = 0; i < n; i++) {
4919          varp = *(TempVar**)VG_(indexXA)( tempvars, (i + first_primary_var) % n );
4920          if (i > first_primary_var) {
4921             varp2 = *(TempVar**)VG_(indexXA)( tempvars,
4922                                               (i + first_primary_var - 1) % n );
4923             /* why should this hold?  Only, I think, because we've
4924                constructed the array by reading .debug_info sequentially,
4925                and so the array .dioff fields should reflect that, and be
4926                strictly ascending. */
4927             vg_assert(varp2->dioff < varp->dioff);
4928          }
4929          VG_(addToXA)( dioff_lookup_tab, &varp );
4930       }
4931       VG_(setCmpFnXA)( dioff_lookup_tab, cmp_TempVar_by_dioff );
4932       VG_(sortXA)( dioff_lookup_tab ); /* POINTLESS; FIXME: rm */
4933 
4934       /* Now visit each var.  Collect up as much info as possible for
4935          each var and hand it to ML_(addVar). */
4936       n = VG_(sizeXA)( tempvars );
4937       for (j = 0; j < n; j++) {
4938          TyEnt* ent;
4939          varp = *(TempVar**)VG_(indexXA)( tempvars, j );
4940 
4941          /* Possibly show .. */
4942          if (TD3) {
4943             VG_(printf)("<%lx> addVar: level %d: %s :: ",
4944                         varp->dioff,
4945                         varp->level,
4946                         varp->name ? varp->name : "<anon_var>" );
4947             if (varp->typeR) {
4948                ML_(pp_TyEnt_C_ishly)( tyents_to_keep, varp->typeR );
4949             } else {
4950                VG_(printf)("NULL");
4951             }
4952             VG_(printf)("\n  Loc=");
4953             if (varp->gexpr) {
4954                ML_(pp_GX)(varp->gexpr);
4955             } else {
4956                VG_(printf)("NULL");
4957             }
4958             VG_(printf)("\n");
4959             if (varp->fbGX) {
4960                VG_(printf)("  FrB=");
4961                ML_(pp_GX)( varp->fbGX );
4962                VG_(printf)("\n");
4963             } else {
4964                VG_(printf)("  FrB=none\n");
4965             }
4966             VG_(printf)("  declared at: %u %s:%d\n",
4967                         varp->fndn_ix,
4968                         ML_(fndn_ix2filename) (di, varp->fndn_ix),
4969                         varp->fLine );
4970             if (varp->absOri != (UWord)D3_INVALID_CUOFF)
4971                VG_(printf)("  abstract origin: <%lx>\n", varp->absOri);
4972          }
4973 
4974          /* Skip variables which have no location.  These must be
4975             abstract instances; they are useless as-is since with no
4976             location they have no specified memory location.  They will
4977             presumably be referred to via the absOri fields of other
4978             variables. */
4979          if (!varp->gexpr) {
4980             TRACE_D3("  SKIP (no location)\n\n");
4981             continue;
4982          }
4983 
4984          /* So it has a location, at least.  If it refers to some other
4985             entry through its absOri field, pull in further info through
4986             that. */
4987          if (varp->absOri != (UWord)D3_INVALID_CUOFF) {
4988             Bool found;
4989             Word ixFirst, ixLast;
4990             TempVar key;
4991             TempVar* keyp = &key;
4992             TempVar *varAI;
4993             VG_(memset)(&key, 0, sizeof(key)); /* not necessary */
4994             key.dioff = varp->absOri; /* this is what we want to find */
4995             found = VG_(lookupXA)( dioff_lookup_tab, &keyp,
4996                                    &ixFirst, &ixLast );
4997             if (!found) {
4998                /* barf("DW_AT_abstract_origin can't be resolved"); */
4999                TRACE_D3("  SKIP (DW_AT_abstract_origin can't be resolved)\n\n");
5000                continue;
5001             }
5002             /* If the following fails, there is more than one entry with
5003                the same dioff.  Which can't happen. */
5004             vg_assert(ixFirst == ixLast);
5005             varAI = *(TempVar**)VG_(indexXA)( dioff_lookup_tab, ixFirst );
5006             /* stay sane */
5007             vg_assert(varAI);
5008             vg_assert(varAI->dioff == varp->absOri);
5009 
5010             /* Copy what useful info we can. */
5011             if (varAI->typeR && !varp->typeR)
5012                varp->typeR = varAI->typeR;
5013             if (varAI->name && !varp->name)
5014                varp->name = varAI->name;
5015             if (varAI->fndn_ix && !varp->fndn_ix)
5016                varp->fndn_ix = varAI->fndn_ix;
5017             if (varAI->fLine > 0 && varp->fLine == 0)
5018                varp->fLine = varAI->fLine;
5019          }
5020 
5021          /* Give it a name if it doesn't have one. */
5022          if (!varp->name)
5023             varp->name = ML_(addStr)( di, "<anon_var>", -1 );
5024 
5025          /* So now does it have enough info to be useful? */
5026          /* NOTE: re typeR: this is a hack.  If typeR is Te_UNKNOWN then
5027             the type didn't get resolved.  Really, in that case
5028             something's broken earlier on, and should be fixed, rather
5029             than just skipping the variable. */
5030          ent = ML_(TyEnts__index_by_cuOff)( tyents_to_keep,
5031                                             tyents_to_keep_cache,
5032                                             varp->typeR );
5033          /* The next two assertions should be guaranteed by
5034             our previous call to resolve_variable_types. */
5035          vg_assert(ent);
5036          vg_assert(ML_(TyEnt__is_type)(ent) || ent->tag == Te_UNKNOWN);
5037 
5038          if (ent->tag == Te_UNKNOWN) continue;
5039 
5040          vg_assert(varp->gexpr);
5041          vg_assert(varp->name);
5042          vg_assert(varp->typeR);
5043          vg_assert(varp->level >= 0);
5044 
5045          /* Ok.  So we're going to keep it.  Call ML_(addVar) once for
5046             each address range in which the variable exists. */
5047          TRACE_D3("  ACQUIRE for range(s) ");
5048          { AddrRange  oneRange;
5049            AddrRange* varPcRanges;
5050            Word       nVarPcRanges;
5051            /* Set up to iterate over address ranges, however
5052               represented. */
5053            if (varp->nRanges == 0 || varp->nRanges == 1) {
5054               vg_assert(!varp->rngMany);
5055               if (varp->nRanges == 0) {
5056                  vg_assert(varp->rngOneMin == 0);
5057                  vg_assert(varp->rngOneMax == 0);
5058               }
5059               nVarPcRanges = varp->nRanges;
5060               oneRange.aMin = varp->rngOneMin;
5061               oneRange.aMax = varp->rngOneMax;
5062               varPcRanges = &oneRange;
5063            } else {
5064               vg_assert(varp->rngMany);
5065               vg_assert(varp->rngOneMin == 0);
5066               vg_assert(varp->rngOneMax == 0);
5067               nVarPcRanges = VG_(sizeXA)(varp->rngMany);
5068               vg_assert(nVarPcRanges >= 2);
5069               vg_assert(nVarPcRanges == (Word)varp->nRanges);
5070               varPcRanges = VG_(indexXA)(varp->rngMany, 0);
5071            }
5072            if (varp->level == 0)
5073               vg_assert( nVarPcRanges == 1 );
5074            /* and iterate */
5075            for (i = 0; i < nVarPcRanges; i++) {
5076               Addr pcMin = varPcRanges[i].aMin;
5077               Addr pcMax = varPcRanges[i].aMax;
5078               vg_assert(pcMin <= pcMax);
5079               /* Level 0 is the global address range.  So at level 0 we
5080                  don't want to bias pcMin/pcMax; but at all other levels
5081                  we do since those are derived from svmas in the Dwarf
5082                  we're reading.  Be paranoid ... */
5083               if (varp->level == 0) {
5084                  vg_assert(pcMin == (Addr)0);
5085                  vg_assert(pcMax == ~(Addr)0);
5086               } else {
5087                  /* vg_assert(pcMin > (Addr)0);
5088                     No .. we can legitimately expect to see ranges like
5089                     0x0-0x11D (pre-biasing, of course). */
5090                  vg_assert(pcMax < ~(Addr)0);
5091               }
5092 
5093               /* Apply text biasing, for non-global variables. */
5094               if (varp->level > 0) {
5095                  pcMin += di->text_debug_bias;
5096                  pcMax += di->text_debug_bias;
5097               }
5098 
5099               if (i > 0 && (i%2) == 0)
5100                  TRACE_D3("\n                       ");
5101               TRACE_D3("[%#lx,%#lx] ", pcMin, pcMax );
5102 
5103               ML_(addVar)(
5104                  di, varp->level,
5105                      pcMin, pcMax,
5106                      varp->name,  varp->typeR,
5107                      varp->gexpr, varp->fbGX,
5108                      varp->fndn_ix, varp->fLine, td3
5109               );
5110            }
5111          }
5112 
5113          TRACE_D3("\n\n");
5114          /* and move on to the next var */
5115       }
5116 
5117       /* Now free all the TempVars */
5118       n = VG_(sizeXA)( tempvars );
5119       for (i = 0; i < n; i++) {
5120          varp = *(TempVar**)VG_(indexXA)( tempvars, i );
5121          ML_(dinfo_free)(varp);
5122       }
5123       VG_(deleteXA)( tempvars );
5124       tempvars = NULL;
5125 
5126       /* and the temp lookup table */
5127       VG_(deleteXA)( dioff_lookup_tab );
5128 
5129       /* and the ranges tree.  Note that we need to also free the XArrays
5130          which constitute the keys, hence pass VG_(deleteXA) as a
5131          key-finalizer. */
5132       VG_(deleteFM)( rangestree, (void(*)(UWord))VG_(deleteXA), NULL );
5133 
5134       /* and the tyents_to_keep cache */
5135       ML_(dinfo_free)( tyents_to_keep_cache );
5136       tyents_to_keep_cache = NULL;
5137 
5138       vg_assert( varparser.fndn_ix_Table == NULL );
5139 
5140       /* And the signatured type hash.  */
5141       VG_(HT_destruct) ( signature_types, ML_(dinfo_free) );
5142 
5143       /* record the GExprs in di so they can be freed later */
5144       vg_assert(!di->admin_gexprs);
5145       di->admin_gexprs = gexprs;
5146    }
5147 
5148    // Free up dynamically allocated memory
5149    if (VG_(clo_read_var_info)) {
5150       type_parser_release(&typarser);
5151       var_parser_release(&varparser);
5152    }
5153 }
5154 
5155 
5156 /*------------------------------------------------------------*/
5157 /*---                                                      ---*/
5158 /*--- The "new" DWARF3 reader -- top level control logic   ---*/
5159 /*---                                                      ---*/
5160 /*------------------------------------------------------------*/
5161 
5162 static Bool               d3rd_jmpbuf_valid  = False;
5163 static const HChar*       d3rd_jmpbuf_reason = NULL;
5164 static VG_MINIMAL_JMP_BUF(d3rd_jmpbuf);
5165 
barf(const HChar * reason)5166 static __attribute__((noreturn)) void barf ( const HChar* reason ) {
5167    vg_assert(d3rd_jmpbuf_valid);
5168    d3rd_jmpbuf_reason = reason;
5169    VG_MINIMAL_LONGJMP(d3rd_jmpbuf);
5170    /*NOTREACHED*/
5171    vg_assert(0);
5172 }
5173 
5174 
5175 void
ML_(new_dwarf3_reader)5176 ML_(new_dwarf3_reader) (
5177    DebugInfo* di,
5178    DiSlice escn_debug_info,      DiSlice escn_debug_types,
5179    DiSlice escn_debug_abbv,      DiSlice escn_debug_line,
5180    DiSlice escn_debug_str,       DiSlice escn_debug_ranges,
5181    DiSlice escn_debug_loc,       DiSlice escn_debug_info_alt,
5182    DiSlice escn_debug_abbv_alt,  DiSlice escn_debug_line_alt,
5183    DiSlice escn_debug_str_alt
5184 )
5185 {
5186    volatile Int  jumped;
5187    volatile Bool td3 = di->trace_symtab;
5188 
5189    /* Run the _wrk function to read the dwarf3.  If it succeeds, it
5190       just returns normally.  If there is any failure, it longjmp's
5191       back here, having first set d3rd_jmpbuf_reason to something
5192       useful. */
5193    vg_assert(d3rd_jmpbuf_valid  == False);
5194    vg_assert(d3rd_jmpbuf_reason == NULL);
5195 
5196    d3rd_jmpbuf_valid = True;
5197    jumped = VG_MINIMAL_SETJMP(d3rd_jmpbuf);
5198    if (jumped == 0) {
5199       /* try this ... */
5200       new_dwarf3_reader_wrk( di, barf,
5201                              escn_debug_info,     escn_debug_types,
5202                              escn_debug_abbv,     escn_debug_line,
5203                              escn_debug_str,      escn_debug_ranges,
5204                              escn_debug_loc,      escn_debug_info_alt,
5205                              escn_debug_abbv_alt, escn_debug_line_alt,
5206                              escn_debug_str_alt );
5207       d3rd_jmpbuf_valid = False;
5208       TRACE_D3("\n------ .debug_info reading was successful ------\n");
5209    } else {
5210       /* It longjmp'd. */
5211       d3rd_jmpbuf_valid = False;
5212       /* Can't longjump without giving some sort of reason. */
5213       vg_assert(d3rd_jmpbuf_reason != NULL);
5214 
5215       TRACE_D3("\n------ .debug_info reading failed ------\n");
5216 
5217       ML_(symerr)(di, True, d3rd_jmpbuf_reason);
5218    }
5219 
5220    d3rd_jmpbuf_valid  = False;
5221    d3rd_jmpbuf_reason = NULL;
5222 }
5223 
5224 
5225 
5226 /* --- Unused code fragments which might be useful one day. --- */
5227 
5228 #if 0
5229    /* Read the arange tables */
5230    TRACE_SYMTAB("\n");
5231    TRACE_SYMTAB("\n------ The contents of .debug_arange ------\n");
5232    init_Cursor( &aranges, debug_aranges_img,
5233                 debug_aranges_sz, 0, barf,
5234                 "Overrun whilst reading .debug_aranges section" );
5235    while (True) {
5236       ULong  len, d_i_offset;
5237       Bool   is64;
5238       UShort version;
5239       UChar  asize, segsize;
5240 
5241       if (is_at_end_Cursor( &aranges ))
5242          break;
5243       /* Read one arange thingy */
5244       /* initial_length field */
5245       len = get_Initial_Length( &is64, &aranges,
5246                "in .debug_aranges: invalid initial-length field" );
5247       version    = get_UShort( &aranges );
5248       d_i_offset = get_Dwarfish_UWord( &aranges, is64 );
5249       asize      = get_UChar( &aranges );
5250       segsize    = get_UChar( &aranges );
5251       TRACE_D3("  Length:                   %llu\n", len);
5252       TRACE_D3("  Version:                  %d\n", (Int)version);
5253       TRACE_D3("  Offset into .debug_info:  %llx\n", d_i_offset);
5254       TRACE_D3("  Pointer Size:             %d\n", (Int)asize);
5255       TRACE_D3("  Segment Size:             %d\n", (Int)segsize);
5256       TRACE_D3("\n");
5257       TRACE_D3("    Address            Length\n");
5258 
5259       while ((get_position_of_Cursor( &aranges ) % (2 * asize)) > 0) {
5260          (void)get_UChar( & aranges );
5261       }
5262       while (True) {
5263          ULong address = get_Dwarfish_UWord( &aranges, asize==8 );
5264          ULong length = get_Dwarfish_UWord( &aranges, asize==8 );
5265          TRACE_D3("    0x%016llx 0x%llx\n", address, length);
5266          if (address == 0 && length == 0) break;
5267       }
5268    }
5269    TRACE_SYMTAB("\n");
5270 #endif
5271 
5272 #endif // defined(VGO_linux) || defined(VGO_darwin) || defined(VGO_solaris)
5273 
5274 /*--------------------------------------------------------------------*/
5275 /*--- end                                                          ---*/
5276 /*--------------------------------------------------------------------*/
5277