1 
2 /*--------------------------------------------------------------------*/
3 /*--- Demangling of C++ mangled names.                  demangle.c ---*/
4 /*--------------------------------------------------------------------*/
5 
6 /*
7    This file is part of Valgrind, a dynamic binary instrumentation
8    framework.
9 
10    Copyright (C) 2000-2015 Julian Seward
11       jseward@acm.org
12 
13    This program is free software; you can redistribute it and/or
14    modify it under the terms of the GNU General Public License as
15    published by the Free Software Foundation; either version 2 of the
16    License, or (at your option) any later version.
17 
18    This program is distributed in the hope that it will be useful, but
19    WITHOUT ANY WARRANTY; without even the implied warranty of
20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21    General Public License for more details.
22 
23    You should have received a copy of the GNU General Public License
24    along with this program; if not, write to the Free Software
25    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26    02111-1307, USA.
27 
28    The GNU General Public License is contained in the file COPYING.
29 */
30 
31 #include "pub_core_basics.h"
32 #include "pub_core_demangle.h"
33 #include "pub_core_libcassert.h"
34 #include "pub_core_libcbase.h"
35 #include "pub_core_libcprint.h"
36 #include "pub_core_mallocfree.h"
37 #include "pub_core_options.h"
38 
39 #include "vg_libciface.h"
40 #include "demangle.h"
41 
42 /* The demangler's job is to take a raw symbol name and turn it into
43    something a Human Bean can understand.  There are two levels of
44    mangling.
45 
46    1. First, C++ names are mangled by the compiler.  So we'll have to
47       undo that.
48 
49    2. Optionally, in relatively rare cases, the resulting name is then
50       itself encoded using Z-escaping (see pub_core_redir.h) so as to
51       become part of a redirect-specification.
52 
53    Therefore, VG_(demangle) first tries to undo (2).  If successful,
54    the soname part is discarded (humans don't want to see that).
55    Then, it tries to undo (1) (using demangling code from GNU/FSF).
56 
57    Finally, change the name of all symbols which are known to be
58    functions below main() to "(below main)".  This helps reduce
59    variability of stack traces, something which has been a problem for
60    the testsuite for a long time.
61 
62    --------
63    If do_cxx_demangle == True, does all the above stages:
64    - undo (2) [Z-encoding]
65    - undo (1) [C++ mangling]
66    - do the below-main hack
67 
68    If do_cxx_demangle == False, the middle stage is skipped:
69    - undo (2) [Z-encoding]
70    - do the below-main hack
71 */
72 
73 /* Note that the C++ demangler is from GNU libiberty and is almost
74    completely unmodified.  We use vg_libciface.h as a way to
75    impedance-match the libiberty code into our own framework.
76 
77    The libiberty code included here was taken from the GCC repository
78    and is released under the LGPL 2.1 license, which AFAICT is compatible
79    with "GPL 2 or later" and so is OK for inclusion in Valgrind.
80 
81    To update to a newer libiberty, use the "update-demangler" script
82    which is included in the valgrind repository. */
83 
84 /* This is the main, standard demangler entry point. */
85 
86 /* Upon return, *RESULT will point to the demangled name.
87    The memory buffer that holds the demangled name is allocated on the
88    heap and will be deallocated in the next invocation. Conceptually,
89    that buffer is owned by VG_(demangle). That means two things:
90    (1) Users of VG_(demangle) must not free that buffer.
91    (2) If the demangled name needs to be stashed away for later use,
92        the contents of the buffer needs to be copied. It is not sufficient
93        to just store the pointer as it will point to deallocated memory
94        after the next VG_(demangle) invocation. */
VG_(demangle)95 void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
96                      /* IN */  const HChar  *orig,
97                      /* OUT */ const HChar **result )
98 {
99    /* Possibly undo (2) */
100    /* Z-Demangling was requested.
101       The fastest way to see if it's a Z-mangled name is just to attempt
102       to Z-demangle it (with NULL for the soname buffer, since we're not
103       interested in that). */
104    if (do_z_demangling) {
105       const HChar *z_demangled;
106 
107       if (VG_(maybe_Z_demangle)( orig, NULL, /*soname*/
108                                  &z_demangled, NULL, NULL, NULL )) {
109          orig = z_demangled;
110       }
111    }
112 
113    /* Possibly undo (1) */
114    if (do_cxx_demangling && VG_(clo_demangle)) {
115       static HChar* demangled = NULL;
116 
117       /* Free up previously demangled name */
118       if (demangled) VG_(arena_free) (VG_AR_DEMANGLE, demangled);
119 
120       demangled = ML_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS );
121 
122       *result = (demangled == NULL) ? orig : demangled;
123    } else {
124       *result = orig;
125    }
126 
127    // 13 Mar 2005: We used to check here that the demangler wasn't leaking
128    // by calling the (now-removed) function VG_(is_empty_arena)().  But,
129    // very rarely (ie. I've heard of it twice in 3 years), the demangler
130    // does leak.  But, we can't do much about it, and it's not a disaster,
131    // so we just let it slide without aborting or telling the user.
132 }
133 
134 
135 /*------------------------------------------------------------*/
136 /*--- DEMANGLE Z-ENCODED NAMES                             ---*/
137 /*------------------------------------------------------------*/
138 
139 /* Demangle a Z-encoded name as described in pub_tool_redir.h.
140    Z-encoded names are used by Valgrind for doing function
141    interception/wrapping.
142 
143    Demangle 'sym' into its soname and fnname parts, putting them in
144    the specified buffers.  Returns a Bool indicating whether the
145    demangled failed or not.  A failure can occur because the prefix
146    isn't recognised, the internal Z-escaping is wrong, or because one
147    or the other (or both) of the output buffers becomes full.  Passing
148    'so' as NULL is acceptable if the caller is only interested in the
149    function name part. */
150 
VG_(maybe_Z_demangle)151 Bool VG_(maybe_Z_demangle) ( const HChar* sym,
152                              /*OUT*/const HChar** so,
153                              /*OUT*/const HChar** fn,
154                              /*OUT*/Bool* isWrap,
155                              /*OUT*/Int*  eclassTag,
156                              /*OUT*/Int*  eclassPrio )
157 {
158    static HChar *sobuf;
159    static HChar *fnbuf;
160    static SizeT  buf_len = 0;
161 
162    /* The length of the name after undoing Z-encoding is always smaller
163       than the mangled name. Making the soname and fnname buffers as large
164       as the demangled name is therefore always safe and overflow can never
165       occur. */
166    SizeT len = VG_(strlen)(sym) + 1;
167 
168    if (buf_len < len) {
169       sobuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", sobuf, len);
170       fnbuf = VG_(arena_realloc)(VG_AR_DEMANGLE, "Z-demangle", fnbuf, len);
171       buf_len = len;
172    }
173    sobuf[0] = fnbuf[0] = '\0';
174 
175    if (so)
176      *so = sobuf;
177    *fn = fnbuf;
178 
179 #  define EMITSO(ch)                           \
180       do {                                     \
181          if (so) {                             \
182             sobuf[soi++] = ch; sobuf[soi] = 0; \
183          }                                     \
184       } while (0)
185 #  define EMITFN(ch)                           \
186       do {                                     \
187          fnbuf[fni++] = ch; fnbuf[fni] = 0;    \
188       } while (0)
189 
190    Bool error, valid, fn_is_encoded, is_VG_Z_prefixed;
191    Int  soi, fni, i;
192 
193    error = False;
194    soi = 0;
195    fni = 0;
196 
197    valid =     sym[0] == '_'
198            &&  sym[1] == 'v'
199            &&  sym[2] == 'g'
200            && (sym[3] == 'r' || sym[3] == 'w')
201            &&  VG_(isdigit)(sym[4])
202            &&  VG_(isdigit)(sym[5])
203            &&  VG_(isdigit)(sym[6])
204            &&  VG_(isdigit)(sym[7])
205            &&  VG_(isdigit)(sym[8])
206            &&  sym[9] == 'Z'
207            && (sym[10] == 'Z' || sym[10] == 'U')
208            &&  sym[11] == '_';
209 
210    if (valid
211        && sym[4] == '0' && sym[5] == '0' && sym[6] == '0' && sym[7] == '0'
212        && sym[8] != '0') {
213       /* If the eclass tag is 0000 (meaning "no eclass"), the priority
214          must be 0 too. */
215       valid = False;
216    }
217 
218    if (!valid)
219       return False;
220 
221    fn_is_encoded = sym[10] == 'Z';
222 
223    if (isWrap)
224       *isWrap = sym[3] == 'w';
225 
226    if (eclassTag) {
227       *eclassTag =    1000 * ((Int)sym[4] - '0')
228                    +  100 * ((Int)sym[5] - '0')
229                    +  10 * ((Int)sym[6] - '0')
230                    +  1 * ((Int)sym[7] - '0');
231       vg_assert(*eclassTag >= 0 && *eclassTag <= 9999);
232    }
233 
234    if (eclassPrio) {
235       *eclassPrio = ((Int)sym[8]) - '0';
236       vg_assert(*eclassPrio >= 0 && *eclassPrio <= 9);
237    }
238 
239    /* Now check the soname prefix isn't "VG_Z_", as described in
240       pub_tool_redir.h. */
241    is_VG_Z_prefixed =
242       sym[12] == 'V' &&
243       sym[13] == 'G' &&
244       sym[14] == '_' &&
245       sym[15] == 'Z' &&
246       sym[16] == '_';
247    if (is_VG_Z_prefixed) {
248       vg_assert2(0, "symbol with a 'VG_Z_' prefix: %s.\n"
249                     "see pub_tool_redir.h for an explanation.", sym);
250    }
251 
252    /* Now scan the Z-encoded soname. */
253    i = 12;
254    while (True) {
255 
256       if (sym[i] == '_')
257       /* Found the delimiter.  Move on to the fnname loop. */
258          break;
259 
260       if (sym[i] == 0) {
261          error = True;
262          goto out;
263       }
264 
265       if (sym[i] != 'Z') {
266          EMITSO(sym[i]);
267          i++;
268          continue;
269       }
270 
271       /* We've got a Z-escape. */
272       i++;
273       switch (sym[i]) {
274          case 'a': EMITSO('*'); break;
275          case 'c': EMITSO(':'); break;
276          case 'd': EMITSO('.'); break;
277          case 'h': EMITSO('-'); break;
278          case 'p': EMITSO('+'); break;
279          case 's': EMITSO(' '); break;
280          case 'u': EMITSO('_'); break;
281          case 'A': EMITSO('@'); break;
282          case 'D': EMITSO('$'); break;
283          case 'L': EMITSO('('); break;
284          case 'R': EMITSO(')'); break;
285          case 'S': EMITSO('/'); break;
286          case 'Z': EMITSO('Z'); break;
287          default: error = True; goto out;
288       }
289       i++;
290    }
291 
292    vg_assert(sym[i] == '_');
293    i++;
294 
295    /* Now deal with the function name part. */
296    if (!fn_is_encoded) {
297 
298       /* simple; just copy. */
299       while (True) {
300          if (sym[i] == 0)
301             break;
302          EMITFN(sym[i]);
303          i++;
304       }
305       goto out;
306 
307    }
308 
309    /* else use a Z-decoding loop like with soname */
310    while (True) {
311 
312       if (sym[i] == 0)
313          break;
314 
315       if (sym[i] != 'Z') {
316          EMITFN(sym[i]);
317          i++;
318          continue;
319       }
320 
321       /* We've got a Z-escape. */
322       i++;
323       switch (sym[i]) {
324          case 'a': EMITFN('*'); break;
325          case 'c': EMITFN(':'); break;
326          case 'd': EMITFN('.'); break;
327          case 'h': EMITFN('-'); break;
328          case 'p': EMITFN('+'); break;
329          case 's': EMITFN(' '); break;
330          case 'u': EMITFN('_'); break;
331          case 'A': EMITFN('@'); break;
332          case 'D': EMITFN('$'); break;
333          case 'L': EMITFN('('); break;
334          case 'R': EMITFN(')'); break;
335          case 'Z': EMITFN('Z'); break;
336          default: error = True; goto out;
337       }
338       i++;
339    }
340 
341   out:
342    EMITSO(0);
343    EMITFN(0);
344 
345    if (error) {
346       /* Something's wrong.  Give up. */
347       VG_(message)(Vg_UserMsg,
348                    "m_demangle: error Z-demangling: %s\n", sym);
349       return False;
350    }
351 
352    return True;
353 }
354 
355 
356 /*--------------------------------------------------------------------*/
357 /*--- end                                                          ---*/
358 /*--------------------------------------------------------------------*/
359