1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 //
6 // Author: Dodji Seketeli
7 
8 /// @file
9 ///
10 /// This file contains the definitions of the entry points to
11 /// de-serialize an instance of @ref abigail::corpus from a file in
12 /// elf format, containing dwarf information.
13 
14 #include "config.h"
15 #include <sys/types.h>
16 #include <sys/stat.h>
17 #include <fcntl.h>
18 #include <unistd.h>
19 #include <libgen.h>
20 #include <assert.h>
21 #include <limits.h>
22 #include <elfutils/libdwfl.h>
23 #include <dwarf.h>
24 #include <algorithm>
25 #include <cmath>
26 #include <cstring>
27 #include <deque>
28 #include <list>
29 #include <memory>
30 #include <ostream>
31 #include <sstream>
32 #include <stack>
33 #include <unordered_map>
34 #include <unordered_set>
35 #include <map>
36 
37 #include "abg-ir-priv.h"
38 #include "abg-suppression-priv.h"
39 #include "abg-corpus-priv.h"
40 #include "abg-elf-helpers.h"
41 #include "abg-internal.h"
42 
43 // <headers defining libabigail's API go under here>
44 ABG_BEGIN_EXPORT_DECLARATIONS
45 
46 #include "abg-dwarf-reader.h"
47 #include "abg-sptr-utils.h"
48 #include "abg-symtab-reader.h"
49 #include "abg-tools-utils.h"
50 
51 ABG_END_EXPORT_DECLARATIONS
52 // </headers defining libabigail's API>
53 
54 #ifndef UINT64_MAX
55 #define UINT64_MAX 0xffffffffffffffff
56 #endif
57 
58 using std::string;
59 
60 namespace abigail
61 {
62 
63 using std::cerr;
64 
65 /// The namespace for the DWARF reader.
66 namespace dwarf_reader
67 {
68 
69 using std::dynamic_pointer_cast;
70 using std::static_pointer_cast;
71 using std::unordered_map;
72 using std::unordered_set;
73 using std::stack;
74 using std::deque;
75 using std::list;
76 using std::map;
77 
78 using namespace elf_helpers; // TODO: avoid using namespace
79 
80 /// Where a DIE comes from. For instance, a DIE can come from the main
81 /// debug info section, the alternate debug info section or from the
82 /// type unit section.
83 enum die_source
84 {
85   NO_DEBUG_INFO_DIE_SOURCE,
86   PRIMARY_DEBUG_INFO_DIE_SOURCE,
87   ALT_DEBUG_INFO_DIE_SOURCE,
88   TYPE_UNIT_DIE_SOURCE,
89   NUMBER_OF_DIE_SOURCES,	// This one must always be the latest
90 				// enumerator
91 };
92 
93 /// Prefix increment operator for @ref die_source.
94 ///
95 /// @param source the die_source to increment.
96 /// @return the incremented source.
97 static die_source&
operator ++(die_source & source)98 operator++(die_source& source)
99 {
100   source = static_cast<die_source>(source + 1);
101   return source;
102 }
103 
104 /// A functor used by @ref dwfl_sptr.
105 struct dwfl_deleter
106 {
107   void
operator ()abigail::dwarf_reader::dwfl_deleter108   operator()(Dwfl* dwfl)
109   {dwfl_end(dwfl);}
110 };//end struct dwfl_deleter
111 
112 /// A convenience typedef for a shared pointer to a Dwfl.
113 typedef shared_ptr<Dwfl> dwfl_sptr;
114 
115 /// A convenience typedef for a vector of Dwarf_Off.
116 typedef vector<Dwarf_Off> dwarf_offsets_type;
117 
118 /// Convenience typedef for a map which key is the offset of a dwarf
119 /// die and which value is the corresponding artefact.
120 typedef unordered_map<Dwarf_Off, type_or_decl_base_sptr> die_artefact_map_type;
121 
122 /// Convenience typedef for a map which key is the offset of a dwarf
123 /// die, (given by dwarf_dieoffset()) and which value is the
124 /// corresponding class_decl.
125 typedef unordered_map<Dwarf_Off, class_decl_sptr> die_class_map_type;
126 
127 /// Convenience typedef for a map which key is the offset of a dwarf
128 /// die, (given by dwarf_dieoffset()) and which value is the
129 /// corresponding class_or_union_sptr.
130 typedef unordered_map<Dwarf_Off, class_or_union_sptr> die_class_or_union_map_type;
131 
132 /// Convenience typedef for a map which key the offset of a dwarf die
133 /// and which value is the corresponding function_decl.
134 typedef unordered_map<Dwarf_Off, function_decl_sptr> die_function_decl_map_type;
135 
136 /// Convenience typedef for a map which key is the offset of a dwarf
137 /// die and which value is the corresponding function_type.
138 typedef unordered_map<Dwarf_Off, function_type_sptr> die_function_type_map_type;
139 
140 /// Convenience typedef for a map which key is the offset of a
141 /// DW_TAG_compile_unit and the value is the corresponding @ref
142 /// translation_unit_sptr.
143 typedef unordered_map<Dwarf_Off, translation_unit_sptr> die_tu_map_type;
144 
145 /// Convenience typedef for a map which key is the offset of a DIE and
146 /// the value is the corresponding qualified name of the DIE.
147 typedef unordered_map<Dwarf_Off, interned_string> die_istring_map_type;
148 
149 /// Convenience typedef for a map which is an interned_string and
150 /// which value is a vector of offsets.
151 typedef unordered_map<interned_string,
152 		      dwarf_offsets_type,
153 		      hash_interned_string>
154 istring_dwarf_offsets_map_type;
155 
156 /// Convenience typedef for a map which key is an elf address and
157 /// which value is an elf_symbol_sptr.
158 typedef unordered_map<GElf_Addr, elf_symbol_sptr> addr_elf_symbol_sptr_map_type;
159 
160 /// Convenience typedef for a set of ELF addresses.
161 typedef unordered_set<GElf_Addr> address_set_type;
162 
163 typedef unordered_set<interned_string, hash_interned_string> istring_set_type;
164 
165 /// Convenience typedef for a shared pointer to an @ref address_set_type.
166 typedef shared_ptr<address_set_type> address_set_sptr;
167 
168 /// Convenience typedef for a shared pointer to an
169 /// addr_elf_symbol_sptr_map_type.
170 typedef shared_ptr<addr_elf_symbol_sptr_map_type> addr_elf_symbol_sptr_map_sptr;
171 
172 /// Convenience typedef for a map that associates an @ref
173 /// interned_string to a @ref function_type_sptr.
174 typedef unordered_map<interned_string,
175 		      function_type_sptr,
176 		      hash_interned_string> istring_fn_type_map_type;
177 
178 /// Convenience typedef for a stack containing the scopes up to the
179 /// current point in the abigail Internal Representation (aka IR) tree
180 /// that is being built.
181 typedef stack<scope_decl*> scope_stack_type;
182 
183 /// Convenience typedef for a map which key is a dwarf offset.  The
184 /// value is also a dwarf offset.
185 typedef unordered_map<Dwarf_Off, Dwarf_Off> offset_offset_map_type;
186 
187 /// Convenience typedef for a map which key is a string and which
188 /// value is a vector of smart pointer to a class.
189 typedef unordered_map<string, classes_type> string_classes_map;
190 
191 /// Convenience typedef for a map which key is a string and which
192 /// value is a vector of smart pointer to a enum.
193 typedef unordered_map<string, enums_type> string_enums_map;
194 
195 /// The abstraction of the place where a partial unit has been
196 /// imported.  This is what the DW_TAG_imported_unit DIE expresses.
197 ///
198 /// This type thus contains:
199 ///	- the offset to which the partial unit is imported
200 ///	- the offset of the imported partial unit.
201 ///	- the offset of the imported partial unit.
202 struct imported_unit_point
203 {
204   Dwarf_Off	offset_of_import;
205   // The boolean below is true iff the imported unit comes from the
206   // alternate debug info file.
207   die_source	imported_unit_die_source;
208   Dwarf_Off	imported_unit_die_off;
209   Dwarf_Off	imported_unit_cu_off;
210   Dwarf_Off	imported_unit_child_off;
211 
212   /// Default constructor for @ref the type imported_unit_point.
imported_unit_pointabigail::dwarf_reader::imported_unit_point213   imported_unit_point()
214     : offset_of_import(),
215       imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
216       imported_unit_die_off(),
217       imported_unit_cu_off(),
218       imported_unit_child_off()
219   {}
220 
221   /// Constructor of @ref the type imported_unit_point.
222   ///
223   /// @param import_off the offset of the point at which the unit has
224   /// been imported.
imported_unit_pointabigail::dwarf_reader::imported_unit_point225   imported_unit_point(Dwarf_Off import_off)
226     : offset_of_import(import_off),
227       imported_unit_die_source(PRIMARY_DEBUG_INFO_DIE_SOURCE),
228       imported_unit_die_off(),
229       imported_unit_cu_off(),
230       imported_unit_child_off()
231   {}
232 
233   /// Constructor of @ref the type imported_unit_point.
234   ///
235   /// @param import_off the offset of the point at which the unit has
236   /// been imported.
237   ///
238   /// @param from where the imported DIE comes from.
239   ///
240   /// @param imported_die the die of the unit that has been imported.
imported_unit_pointabigail::dwarf_reader::imported_unit_point241   imported_unit_point(Dwarf_Off	import_off,
242 		      const Dwarf_Die& imported_die,
243 		      die_source from)
244     : offset_of_import(import_off),
245       imported_unit_die_source(from),
246       imported_unit_die_off(dwarf_dieoffset
247 			    (const_cast<Dwarf_Die*>(&imported_die))),
248       imported_unit_cu_off(),
249       imported_unit_child_off()
250   {
251     Dwarf_Die imported_unit_child;
252 
253     ABG_ASSERT(dwarf_child(const_cast<Dwarf_Die*>(&imported_die),
254 			   &imported_unit_child) == 0);
255 
256     imported_unit_child_off =
257       dwarf_dieoffset(const_cast<Dwarf_Die*>(&imported_unit_child));
258 
259     Dwarf_Die cu_die_memory;
260     Dwarf_Die *cu_die;
261 
262     cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&imported_unit_child),
263 			 &cu_die_memory, 0, 0);
264     imported_unit_cu_off = dwarf_dieoffset(cu_die);
265   }
266 }; // struct imported_unit_point
267 
268 /// Convenience typedef for a vector of @ref imported_unit_point.
269 typedef vector<imported_unit_point> imported_unit_points_type;
270 
271 /// Convenience typedef for a vector of @ref imported_unit_point.
272 typedef unordered_map<Dwarf_Off, imported_unit_points_type>
273 tu_die_imported_unit_points_map_type;
274 
275 /// "Less than" operator for instances of @ref imported_unit_point
276 /// type.
277 ///
278 /// @param the left hand side operand of the "Less than" operator.
279 ///
280 /// @param the right hand side operand of the "Less than" operator.
281 ///
282 /// @return true iff @p l is less than @p r.
283 static bool
operator <(const imported_unit_point & l,const imported_unit_point & r)284 operator<(const imported_unit_point& l, const imported_unit_point& r)
285 {return l.offset_of_import < r.offset_of_import;}
286 
287 static bool
288 get_parent_die(const read_context&	ctxt,
289 	       const Dwarf_Die*	die,
290 	       Dwarf_Die&		parent_die,
291 	       size_t			where_offset);
292 
293 static bool
294 get_scope_die(const read_context&	ctxt,
295 	      const Dwarf_Die*		die,
296 	      size_t			where_offset,
297 	      Dwarf_Die&		scope_die);
298 
299 static bool
300 die_is_anonymous(const Dwarf_Die* die);
301 
302 static bool
303 die_is_type(const Dwarf_Die* die);
304 
305 static bool
306 die_is_decl(const Dwarf_Die* die);
307 
308 static bool
309 die_is_namespace(const Dwarf_Die* die);
310 
311 static bool
312 die_is_unspecified(Dwarf_Die* die);
313 
314 static bool
315 die_is_void_type(Dwarf_Die* die);
316 
317 static bool
318 die_is_pointer_type(const Dwarf_Die* die);
319 
320 static bool
321 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die);
322 
323 static bool
324 die_is_reference_type(const Dwarf_Die* die);
325 
326 static bool
327 die_is_pointer_or_reference_type(const Dwarf_Die* die);
328 
329 static bool
330 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die);
331 
332 static bool
333 die_is_class_type(const Dwarf_Die* die);
334 
335 static bool
336 die_is_qualified_type(const Dwarf_Die* die);
337 
338 static bool
339 die_is_function_type(const Dwarf_Die *die);
340 
341 static bool
342 die_has_object_pointer(const Dwarf_Die* die,
343 		       Dwarf_Die& object_pointer);
344 
345 static bool
346 die_has_children(const Dwarf_Die* die);
347 
348 static bool
349 die_this_pointer_from_object_pointer(Dwarf_Die* die,
350 				     Dwarf_Die& this_pointer);
351 
352 static bool
353 die_this_pointer_is_const(Dwarf_Die* die);
354 
355 static bool
356 die_object_pointer_is_for_const_method(Dwarf_Die* die);
357 
358 static bool
359 die_is_at_class_scope(const read_context& ctxt,
360 		      const Dwarf_Die* die,
361 		      size_t where_offset,
362 		      Dwarf_Die& class_scope_die);
363 static bool
364 eval_last_constant_dwarf_sub_expr(Dwarf_Op*	expr,
365 				  uint64_t	expr_len,
366 				  int64_t&	value,
367 				  bool&	is_tls_address);
368 
369 static translation_unit::language
370 dwarf_language_to_tu_language(size_t l);
371 
372 static bool
373 die_unsigned_constant_attribute(const Dwarf_Die*	die,
374 				unsigned		attr_name,
375 				uint64_t&		cst);
376 
377 static bool
378 die_signed_constant_attribute(const Dwarf_Die*die,
379 			      unsigned	attr_name,
380 			      int64_t&	cst);
381 
382 static bool
383 die_constant_attribute(const Dwarf_Die *die,
384 		       unsigned attr_name,
385 		       bool is_signed,
386 		       array_type_def::subrange_type::bound_value &value);
387 
388 static bool
389 form_is_DW_FORM_strx(unsigned form);
390 
391 static bool
392 form_is_DW_FORM_line_strp(unsigned form);
393 
394 static bool
395 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result);
396 
397 static string
398 die_name(const Dwarf_Die* die);
399 
400 static location
401 die_location(const read_context& ctxt, const Dwarf_Die* die);
402 
403 static bool
404 die_location_address(Dwarf_Die*	die,
405 		     Dwarf_Addr&	address,
406 		     bool&		is_tls_address);
407 
408 static bool
409 die_die_attribute(const Dwarf_Die* die,
410 		  unsigned attr_name,
411 		  Dwarf_Die& result,
412 		  bool recursively = true);
413 
414 static string
415 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die);
416 
417 static string
418 build_internal_anonymous_die_name(const string &base_name,
419 				  size_t anonymous_type_index);
420 
421 static string
422 get_internal_anonymous_die_name(Dwarf_Die *die,
423 				size_t anonymous_type_index);
424 
425 static string
426 build_internal_underlying_enum_type_name(const string &base_name,
427 					 bool is_anonymous,
428 					 uint64_t size);
429 
430 static string
431 die_qualified_type_name(const read_context& ctxt,
432 			const Dwarf_Die* die,
433 			size_t where);
434 
435 static string
436 die_qualified_decl_name(const read_context& ctxt,
437 			const Dwarf_Die* die,
438 			size_t where);
439 
440 static string
441 die_qualified_name(const read_context& ctxt,
442 		   const Dwarf_Die* die,
443 		   size_t where);
444 
445 static bool
446 die_qualified_type_name_empty(const read_context& ctxt,
447 			      const Dwarf_Die* die, size_t where,
448 			      string &qualified_name);
449 
450 static void
451 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
452 					   const Dwarf_Die* die,
453 					   size_t where_offset,
454 					   bool pretty_print,
455 					   string &return_type_name,
456 					   string &class_name,
457 					   vector<string>& parm_names,
458 					   bool& is_const,
459 					   bool& is_static);
460 
461 static string
462 die_function_signature(const read_context& ctxt,
463 		       const Dwarf_Die *die,
464 		       size_t where_offset);
465 
466 static bool
467 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die);
468 
469 static bool
470 die_function_type_is_method_type(const read_context& ctxt,
471 				 const Dwarf_Die *die,
472 				 size_t where_offset,
473 				 Dwarf_Die& object_pointer_die,
474 				 Dwarf_Die& class_die,
475 				 bool& is_static);
476 
477 static string
478 die_pretty_print_type(read_context& ctxt,
479 		      const Dwarf_Die* die,
480 		      size_t where_offset);
481 
482 static string
483 die_pretty_print_decl(read_context& ctxt,
484 		      const Dwarf_Die* die,
485 		      size_t where_offset);
486 
487 static string
488 die_pretty_print(read_context& ctxt,
489 		 const Dwarf_Die* die,
490 		 size_t where_offset);
491 
492 static void
493 maybe_canonicalize_type(const Dwarf_Die* die,
494 			read_context& ctxt);
495 
496 static void
497 maybe_canonicalize_type(const type_base_sptr&	t,
498 			read_context&		ctxt);
499 
500 static uint64_t
501 get_default_array_lower_bound(translation_unit::language l);
502 
503 static bool
504 find_lower_bound_in_imported_unit_points(const imported_unit_points_type&,
505 					 Dwarf_Off,
506 					 imported_unit_points_type::const_iterator&);
507 
508 static array_type_def::subrange_sptr
509 build_subrange_type(read_context&	ctxt,
510 		    const Dwarf_Die*	die,
511 		    size_t		where_offset,
512 		    bool		associate_type_to_die = true);
513 
514 static void
515 build_subranges_from_array_type_die(read_context&			ctxt,
516 				    const Dwarf_Die*			die,
517 				    array_type_def::subranges_type&	subranges,
518 				    size_t				where_offset,
519 				    bool				associate_type_to_die = true);
520 
521 static bool
522 compare_dies(const read_context& ctxt,
523 	     const Dwarf_Die *l, const Dwarf_Die *r,
524 	     bool update_canonical_dies_on_the_fly);
525 
526 
527 /// Find the file name of the alternate debug info file.
528 ///
529 /// @param elf_module the elf module to consider.
530 ///
531 /// @param out parameter.  Is set to the file name of the alternate
532 /// debug info file, iff this function returns true.
533 ///
534 /// @return true iff the location of the alternate debug info file was
535 /// found.
536 static bool
find_alt_debug_info_link(Dwfl_Module * elf_module,string & alt_file_name)537 find_alt_debug_info_link(Dwfl_Module *elf_module,
538 			 string &alt_file_name)
539 {
540   GElf_Addr bias = 0;
541   Dwarf *dwarf = dwfl_module_getdwarf(elf_module, &bias);
542   Elf *elf = dwarf_getelf(dwarf);
543   GElf_Ehdr ehmem, *elf_header;
544   elf_header = gelf_getehdr(elf, &ehmem);
545 
546   Elf_Scn* section = 0;
547   while ((section = elf_nextscn(elf, section)) != 0)
548     {
549       GElf_Shdr header_mem, *header;
550       header = gelf_getshdr(section, &header_mem);
551       if (header->sh_type != SHT_PROGBITS)
552 	continue;
553 
554       const char *section_name = elf_strptr(elf,
555 					    elf_header->e_shstrndx,
556 					    header->sh_name);
557 
558       char *alt_name = 0;
559       char *buildid = 0;
560       size_t buildid_len = 0;
561       if (section_name != 0
562 	  && strcmp(section_name, ".gnu_debugaltlink") == 0)
563 	{
564 	  Elf_Data *data = elf_getdata(section, 0);
565 	  if (data != 0 && data->d_size != 0)
566 	    {
567 	      alt_name = (char*) data->d_buf;
568 	      char *end_of_alt_name =
569 		(char *) memchr(alt_name, '\0', data->d_size);
570 	      buildid_len = data->d_size - (end_of_alt_name - alt_name + 1);
571 	      if (buildid_len == 0)
572 		return false;
573 	      buildid = end_of_alt_name + 1;
574 	    }
575 	}
576       else
577 	continue;
578 
579       if (buildid == 0 || alt_name == 0)
580 	return false;
581 
582       alt_file_name = alt_name;
583       return true;
584     }
585 
586   return false;
587 }
588 
589 /// Find alternate debuginfo file of a given "link" under a set of
590 /// root directories.
591 ///
592 /// The link is a string that is read by the function
593 /// find_alt_debug_info_link().  That link is a path that is relative
594 /// to a given debug info file, e.g, "../../../.dwz/something.debug".
595 /// It designates the alternate debug info file associated to a given
596 /// debug info file.
597 ///
598 /// This function will thus try to find the .dwz/something.debug file
599 /// under some given root directories.
600 ///
601 /// @param root_dirs the set of root directories to look from.
602 ///
603 /// @param alt_file_name a relative path to the alternate debug info
604 /// file to look for.
605 ///
606 /// @param alt_file_path the resulting absolute path to the alternate
607 /// debuginfo path denoted by @p alt_file_name and found under one of
608 /// the directories in @p root_dirs.  This is set iff the function
609 /// returns true.
610 ///
611 /// @return true iff the function found the alternate debuginfo file.
612 static bool
find_alt_debug_info_path(const vector<char ** > root_dirs,const string & alt_file_name,string & alt_file_path)613 find_alt_debug_info_path(const vector<char**> root_dirs,
614 			 const string &alt_file_name,
615 			 string &alt_file_path)
616 {
617   if (alt_file_name.empty())
618     return false;
619 
620   string altfile_name = tools_utils::trim_leading_string(alt_file_name, "../");
621 
622   for (vector<char**>::const_iterator i = root_dirs.begin();
623        i != root_dirs.end();
624        ++i)
625     if (tools_utils::find_file_under_dir(**i, altfile_name, alt_file_path))
626       return true;
627 
628   return false;
629 }
630 
631 /// Return the alternate debug info associated to a given main debug
632 /// info file.
633 ///
634 /// @param elf_module the elf module to consider.
635 ///
636 /// @param debug_root_dirs a set of root debuginfo directories under
637 /// which too look for the alternate debuginfo file.
638 ///
639 /// @param alt_file_name output parameter.  This is set to the file
640 /// path of the alternate debug info file associated to @p elf_module.
641 /// This is set iff the function returns a non-null result.
642 ///
643 /// @param alt_fd the file descriptor used to access the alternate
644 /// debug info.  If this parameter is set by the function, then the
645 /// caller needs to fclose it, otherwise the file descriptor is going
646 /// to be leaked.  Note however that on recent versions of elfutils
647 /// where libdw.h contains the function dwarf_getalt(), this parameter
648 /// is set to 0, so it doesn't need to be fclosed.
649 ///
650 /// Note that the alternate debug info file is a DWARF extension as of
651 /// DWARF 4 ans is decribed at
652 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
653 ///
654 /// @return the alternate debuginfo, or null.  If @p alt_fd is
655 /// non-zero, then the caller of this function needs to call
656 /// dwarf_end() on the returned alternate debuginfo pointer,
657 /// otherwise, it's going to be leaked.
658 static Dwarf*
find_alt_debug_info(Dwfl_Module * elf_module,const vector<char ** > debug_root_dirs,string & alt_file_name,int & alt_fd)659 find_alt_debug_info(Dwfl_Module *elf_module,
660 		    const vector<char**> debug_root_dirs,
661 		    string& alt_file_name,
662 		    int& alt_fd)
663 {
664   if (elf_module == 0)
665     return 0;
666 
667   Dwarf* result = 0;
668   find_alt_debug_info_link(elf_module, alt_file_name);
669 
670 #ifdef LIBDW_HAS_DWARF_GETALT
671   // We are on recent versions of elfutils where the function
672   // dwarf_getalt exists, so let's use it.
673   Dwarf_Addr bias = 0;
674   Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
675   result = dwarf_getalt(dwarf);
676   alt_fd = 0;
677 #else
678   // We are on an old version of elfutils where the function
679   // dwarf_getalt doesn't exist yet, so let's open code its
680   // functionality
681   char *alt_name = 0;
682   const char *file_name = 0;
683   void **user_data = 0;
684   Dwarf_Addr low_addr = 0;
685   char *alt_file = 0;
686 
687   file_name = dwfl_module_info(elf_module, &user_data,
688 			       &low_addr, 0, 0, 0, 0, 0);
689 
690   alt_fd = dwfl_standard_find_debuginfo(elf_module, user_data,
691 					file_name, low_addr,
692 					alt_name, file_name,
693 					0, &alt_file);
694 
695   result = dwarf_begin(alt_fd, DWARF_C_READ);
696 #endif
697 
698   if (result == 0)
699     {
700       // So we didn't find the alternate debuginfo file from the
701       // information that is in the debuginfo file associated to
702       // elf_module.  Maybe the alternate debuginfo file is located
703       // under one of the directories in debug_root_dirs.  So let's
704       // look in there.
705       string alt_file_path;
706       if (!find_alt_debug_info_path(debug_root_dirs,
707 				    alt_file_name,
708 				    alt_file_path))
709 	return result;
710 
711       // If we reach this point it means we have found the path to the
712       // alternate debuginfo file and it's in alt_file_path.  So let's
713       // open it and read it.
714       int fd = open(alt_file_path.c_str(), O_RDONLY);
715       if (fd == -1)
716 	return result;
717       result = dwarf_begin(fd, DWARF_C_READ);
718 
719 #ifdef LIBDW_HAS_DWARF_GETALT
720       Dwarf_Addr bias = 0;
721       Dwarf* dwarf = dwfl_module_getdwarf(elf_module, &bias);
722       dwarf_setalt(dwarf, result);
723 #endif
724     }
725 
726   return result;
727 }
728 
729 /// Compare a symbol name against another name, possibly demangling
730 /// the symbol_name before performing the comparison.
731 ///
732 /// @param symbol_name the symbol_name to take in account.
733 ///
734 /// @param name the second name to take in account.
735 ///
736 /// @param demangle if true, demangle @p symbol_name and compare the
737 /// result of the demangling with @p name.
738 ///
739 /// @return true iff symbol_name equals name.
740 static bool
compare_symbol_name(const string & symbol_name,const string & name,bool demangle)741 compare_symbol_name(const string& symbol_name,
742 		    const string& name,
743 		    bool demangle)
744 {
745   if (demangle)
746     {
747       string m = demangle_cplus_mangled_name(symbol_name);
748       return m == name;
749     }
750   return symbol_name == name;
751 }
752 
753 /// Lookup a symbol using the SysV ELF hash table.
754 ///
755 /// Note that this function hasn't been tested.  So it hasn't been
756 /// debugged yet.  IOW, it is not known to work.  Or rather, it's
757 /// almost like it's surely doesn't work ;-)
758 ///
759 /// Use it at your own risks.  :-)
760 ///
761 ///@parm env the environment we are operating from.
762 ///
763 /// @param elf_handle the elf_handle to use.
764 ///
765 /// @param sym_name the symbol name to look for.
766 ///
767 /// @param ht_index the index (in the section headers table) of the
768 /// hash table section to use.
769 ///
770 /// @param sym_tab_index the index (in the section headers table) of
771 /// the symbol table to use.
772 ///
773 /// @param demangle if true, demangle @p sym_name before comparing it
774 /// to names from the symbol table.
775 ///
776 /// @param syms_found a vector of symbols found with the name @p
777 /// sym_name.  table.
778 static bool
lookup_symbol_from_sysv_hash_tab(const environment * env,Elf * elf_handle,const string & sym_name,size_t ht_index,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)779 lookup_symbol_from_sysv_hash_tab(const environment*		env,
780 				 Elf*				elf_handle,
781 				 const string&			sym_name,
782 				 size_t			ht_index,
783 				 size_t			sym_tab_index,
784 				 bool				demangle,
785 				 vector<elf_symbol_sptr>&	syms_found)
786 {
787   Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
788   ABG_ASSERT(sym_tab_section);
789 
790   Elf_Data* sym_tab_data = elf_getdata(sym_tab_section, 0);
791   ABG_ASSERT(sym_tab_data);
792 
793   GElf_Shdr sheader_mem;
794   GElf_Shdr* sym_tab_section_header = gelf_getshdr(sym_tab_section,
795 						   &sheader_mem);
796   Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
797   ABG_ASSERT(hash_section);
798 
799   // Poke at the different parts of the hash table and get them ready
800   // to be used.
801   unsigned long hash = elf_hash(sym_name.c_str());
802   Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
803   Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
804   size_t nb_buckets = ht_data[0];
805   size_t nb_chains = ht_data[1];
806 
807   if (nb_buckets == 0)
808     // An empty hash table.  Not sure if that is possible, but it
809     // would mean an empty table of exported symbols.
810     return false;
811 
812   //size_t nb_chains = ht_data[1];
813   Elf32_Word* ht_buckets = &ht_data[2];
814   Elf32_Word* ht_chains = &ht_buckets[nb_buckets];
815 
816   // Now do the real work.
817   size_t bucket = hash % nb_buckets;
818   size_t symbol_index = ht_buckets[bucket];
819 
820   GElf_Sym symbol;
821   const char* sym_name_str;
822   size_t sym_size;
823   elf_symbol::type sym_type;
824   elf_symbol::binding sym_binding;
825   elf_symbol::visibility sym_visibility;
826   bool found = false;
827   Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
828   size_t strings_ndx = strings_section
829     ? elf_ndxscn(strings_section)
830     : 0;
831 
832   do
833     {
834       ABG_ASSERT(gelf_getsym(sym_tab_data, symbol_index, &symbol));
835       sym_name_str = elf_strptr(elf_handle,
836 				sym_tab_section_header->sh_link,
837 				symbol.st_name);
838       if (sym_name_str
839 	  && compare_symbol_name(sym_name_str, sym_name, demangle))
840 	{
841 	  sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
842 	  sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
843 	  sym_visibility =
844 	    stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
845 	  sym_size = symbol.st_size;
846 	  elf_symbol::version ver;
847 	  if (get_version_for_symbol(elf_handle, symbol_index,
848 				     /*get_def_version=*/true, ver))
849 	    ABG_ASSERT(!ver.str().empty());
850 	  elf_symbol_sptr symbol_found =
851 	    elf_symbol::create(env,
852 			       symbol_index,
853 			       sym_size,
854 			       sym_name_str,
855 			       sym_type,
856 			       sym_binding,
857 			       symbol.st_shndx != SHN_UNDEF,
858 			       symbol.st_shndx == SHN_COMMON,
859 			       ver, sym_visibility,
860 			       symbol.st_shndx == strings_ndx);
861 	  syms_found.push_back(symbol_found);
862 	  found = true;
863 	}
864       symbol_index = ht_chains[symbol_index];
865     } while (symbol_index != STN_UNDEF || symbol_index >= nb_chains);
866 
867   return found;
868 }
869 
870 /// Get the size of the elf class, in bytes.
871 ///
872 /// @param elf_handle the elf handle to use.
873 ///
874 /// @return the size computed.
875 static char
get_elf_class_size_in_bytes(Elf * elf_handle)876 get_elf_class_size_in_bytes(Elf* elf_handle)
877 {
878   char result = 0;
879   GElf_Ehdr hdr;
880 
881   ABG_ASSERT(gelf_getehdr(elf_handle, &hdr));
882   int c = hdr.e_ident[EI_CLASS];
883 
884   switch (c)
885     {
886     case ELFCLASS32:
887       result = 4;
888       break;
889     case ELFCLASS64:
890       result = 8;
891       break;
892     default:
893       ABG_ASSERT_NOT_REACHED;
894     }
895 
896   return result;
897 }
898 
899 /// Get a given word of a bloom filter, referred to by the index of
900 /// the word.
901 ///
902 /// The bloom word size depends on the current elf class (32 bits for
903 /// an ELFCLASS32 or 64 bits for an ELFCLASS64 one) and this function
904 /// abstracts that nicely.
905 ///
906 /// @param elf_handle the elf handle to use.
907 ///
908 /// @param bloom_filter the bloom filter to consider.
909 ///
910 /// @param index the index of the bloom filter to return.
911 ///
912 /// @return a 64 bits work containing the bloom word found at index @p
913 /// index.  Note that if we are looking at an ELFCLASS32 binary, the 4
914 /// most significant bytes of the result are going to be zero.
915 static Elf64_Xword
bloom_word_at(Elf * elf_handle,Elf32_Word * bloom_filter,size_t index)916 bloom_word_at(Elf*		elf_handle,
917 	      Elf32_Word*	bloom_filter,
918 	      size_t		index)
919 {
920   Elf64_Xword result = 0;
921   GElf_Ehdr h;
922   ABG_ASSERT(gelf_getehdr(elf_handle, &h));
923   int c;
924   c = h.e_ident[EI_CLASS];
925 
926   switch(c)
927     {
928     case ELFCLASS32:
929       result = bloom_filter[index];
930       break ;
931     case ELFCLASS64:
932       {
933 	Elf64_Xword* f= reinterpret_cast<Elf64_Xword*>(bloom_filter);
934 	result = f[index];
935       }
936       break;
937     default:
938       abort();
939     }
940 
941   return result;
942 }
943 
944 /// The abstraction of the gnu elf hash table.
945 ///
946 /// The members of this struct are explained at
947 ///   - https://sourceware.org/ml/binutils/2006-10/msg00377.html
948 ///   - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
949 struct gnu_ht
950 {
951   size_t nb_buckets;
952   Elf32_Word* buckets;
953   Elf32_Word* chain;
954   size_t first_sym_index;
955   size_t bf_nwords;
956   size_t bf_size;
957   Elf32_Word* bloom_filter;
958   size_t shift;
959   size_t sym_count;
960   Elf_Scn* sym_tab_section;
961   GElf_Shdr sym_tab_section_header;
962 
gnu_htabigail::dwarf_reader::gnu_ht963   gnu_ht()
964     : nb_buckets(0),
965       buckets(0),
966       chain(0),
967       first_sym_index(0),
968       bf_nwords(0),
969       bf_size(0),
970       bloom_filter(0),
971       shift(0),
972       sym_count(0),
973       sym_tab_section(0)
974   {}
975 }; // end struct gnu_ht
976 
977 /// Setup the members of the gnu hash table.
978 ///
979 /// @param elf_handle a handle on the elf file to use.
980 ///
981 /// @param ht_index the index  (into the elf section headers table) of
982 /// the hash table section to use.
983 ///
984 /// @param sym_tab_index the index (into the elf section headers
985 /// table) of the symbol table the gnu hash table is about.
986 ///
987 /// @param ht the resulting hash table.
988 ///
989 /// @return true iff the hash table @ ht could be setup.
990 static bool
setup_gnu_ht(Elf * elf_handle,size_t ht_index,size_t sym_tab_index,gnu_ht & ht)991 setup_gnu_ht(Elf* elf_handle,
992 	     size_t ht_index,
993 	     size_t sym_tab_index,
994 	     gnu_ht& ht)
995 {
996   ht.sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
997   ABG_ASSERT(ht.sym_tab_section);
998   ABG_ASSERT(gelf_getshdr(ht.sym_tab_section, &ht.sym_tab_section_header));
999   ht.sym_count =
1000     ht.sym_tab_section_header.sh_size / ht.sym_tab_section_header.sh_entsize;
1001   Elf_Scn* hash_section = elf_getscn(elf_handle, ht_index);
1002   ABG_ASSERT(hash_section);
1003 
1004   // Poke at the different parts of the hash table and get them ready
1005   // to be used.
1006   Elf_Data* ht_section_data = elf_getdata(hash_section, 0);
1007   Elf32_Word* ht_data = reinterpret_cast<Elf32_Word*>(ht_section_data->d_buf);
1008 
1009   ht.nb_buckets = ht_data[0];
1010   if (ht.nb_buckets == 0)
1011     // An empty hash table.  Not sure if that is possible, but it
1012     // would mean an empty table of exported symbols.
1013     return false;
1014   ht.first_sym_index = ht_data[1];
1015   // The number of words used by the bloom filter.  A size of a word
1016   // is ELFCLASS.
1017   ht.bf_nwords = ht_data[2];
1018   // The shift used by the bloom filter code.
1019   ht.shift = ht_data[3];
1020   // The data of the bloom filter proper.
1021   ht.bloom_filter = &ht_data[4];
1022   // The size of the bloom filter in 4 bytes word.  This is going to
1023   // be used to index the 'bloom_filter' above, which is of type
1024   // Elf32_Word*; thus we need that bf_size be expressed in 4 bytes
1025   // words.
1026   ht.bf_size = (get_elf_class_size_in_bytes(elf_handle) / 4) * ht.bf_nwords;
1027   // The buckets of the hash table.
1028   ht.buckets = ht.bloom_filter + ht.bf_size;
1029   // The chain of the hash table.
1030   ht.chain = ht.buckets + ht.nb_buckets;
1031 
1032   return true;
1033 }
1034 
1035 /// Look into the symbol tables of the underlying elf file and find
1036 /// the symbol we are being asked.
1037 ///
1038 /// This function uses the GNU hash table for the symbol lookup.
1039 ///
1040 /// The reference of for the implementation of this function can be
1041 /// found at:
1042 ///   - https://sourceware.org/ml/binutils/2006-10/msg00377.html
1043 ///   - https://blogs.oracle.com/ali/entry/gnu_hash_elf_sections.
1044 ///
1045 /// @param elf_handle the elf handle to use.
1046 ///
1047 /// @param sym_name the name of the symbol to look for.
1048 ///
1049 /// @param ht_index the index of the hash table header to use.
1050 ///
1051 /// @param sym_tab_index the index of the symbol table header to use
1052 /// with this hash table.
1053 ///
1054 /// @param demangle if true, demangle @p sym_name.
1055 ///
1056 /// @param syms_found the vector of symbols found with the name @p
1057 /// sym_name.
1058 ///
1059 /// @return true if a symbol was actually found.
1060 static bool
lookup_symbol_from_gnu_hash_tab(const environment * env,Elf * elf_handle,const string & sym_name,size_t ht_index,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)1061 lookup_symbol_from_gnu_hash_tab(const environment*		env,
1062 				Elf*				elf_handle,
1063 				const string&			sym_name,
1064 				size_t				ht_index,
1065 				size_t				sym_tab_index,
1066 				bool				demangle,
1067 				vector<elf_symbol_sptr>&	syms_found)
1068 {
1069   gnu_ht ht;
1070   if (!setup_gnu_ht(elf_handle, ht_index, sym_tab_index, ht))
1071     return false;
1072 
1073   // Now do the real work.
1074 
1075   // Compute bloom hashes (GNU hash and second bloom specific hashes).
1076   size_t h1 = elf_gnu_hash(sym_name.c_str());
1077   size_t h2 = h1 >> ht.shift;
1078   // The size of one of the words used in the bloom
1079   // filter, in bits.
1080   int c = get_elf_class_size_in_bytes(elf_handle) * 8;
1081   int n =  (h1 / c) % ht.bf_nwords;
1082   // The bitmask of the bloom filter has a size of either 32-bits on
1083   // ELFCLASS32 binaries or 64-bits on ELFCLASS64 binaries.  So we
1084   // need a 64-bits type to hold the bitmap, hence the Elf64_Xword
1085   // type used here.  When dealing with 32bits binaries, the upper
1086   // bits of the bitmask will be zero anyway.
1087   Elf64_Xword bitmask = (1ul << (h1 % c)) | (1ul << (h2 % c));
1088 
1089   // Test if the symbol is *NOT* present in this ELF file.
1090   if ((bloom_word_at(elf_handle, ht.bloom_filter, n) & bitmask) != bitmask)
1091     return false;
1092 
1093   size_t i = ht.buckets[h1 % ht.nb_buckets];
1094   if (i == STN_UNDEF)
1095     return false;
1096 
1097   Elf32_Word stop_word, *stop_wordp;
1098   elf_symbol::version ver;
1099   GElf_Sym symbol;
1100   const char* sym_name_str;
1101   bool found = false;
1102 
1103   elf_symbol::type sym_type;
1104   elf_symbol::binding sym_binding;
1105   elf_symbol::visibility sym_visibility;
1106   Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
1107     size_t strings_ndx = strings_section
1108     ? elf_ndxscn(strings_section)
1109     : 0;
1110 
1111   // Let's walk the hash table and record the versions of all the
1112   // symbols which name equal sym_name.
1113   for (i = ht.buckets[h1 % ht.nb_buckets],
1114 	 stop_wordp = &ht.chain[i - ht.first_sym_index];
1115        i != STN_UNDEF
1116 	 && (stop_wordp
1117 	     < ht.chain + (ht.sym_count - ht.first_sym_index));
1118        ++i, ++stop_wordp)
1119     {
1120       stop_word = *stop_wordp;
1121       if ((stop_word & ~ 1)!= (h1 & ~1))
1122 	// A given bucket can reference several hashes.  Here we
1123 	// stumbled across a hash value different from the one we are
1124 	// looking for.  Let's keep walking.
1125 	continue;
1126 
1127       ABG_ASSERT(gelf_getsym(elf_getdata(ht.sym_tab_section, 0),
1128 			 i, &symbol));
1129       sym_name_str = elf_strptr(elf_handle,
1130 				ht.sym_tab_section_header.sh_link,
1131 				symbol.st_name);
1132       if (sym_name_str
1133 	  && compare_symbol_name(sym_name_str, sym_name, demangle))
1134 	{
1135 	  // So we found a symbol (in the symbol table) that equals
1136 	  // sym_name.  Now lets try to get its version and record it.
1137 	  sym_type = stt_to_elf_symbol_type(GELF_ST_TYPE(symbol.st_info));
1138 	  sym_binding = stb_to_elf_symbol_binding(GELF_ST_BIND(symbol.st_info));
1139 	 sym_visibility =
1140 	   stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(symbol.st_other));
1141 
1142 	  if (get_version_for_symbol(elf_handle, i,
1143 				     /*get_def_version=*/true,
1144 				     ver))
1145 	    ABG_ASSERT(!ver.str().empty());
1146 
1147 	  elf_symbol_sptr symbol_found =
1148 	    elf_symbol::create(env, i,
1149 			       symbol.st_size,
1150 			       sym_name_str,
1151 			       sym_type, sym_binding,
1152 			       symbol.st_shndx != SHN_UNDEF,
1153 			       symbol.st_shndx == SHN_COMMON,
1154 			       ver, sym_visibility,
1155 			       symbol.st_shndx == strings_ndx);
1156 	  syms_found.push_back(symbol_found);
1157 	  found = true;
1158 	}
1159 
1160       if (stop_word & 1)
1161 	// The last bit of the stop_word is 1.  That means we need to
1162 	// stop here.  We reached the end of the chain of values
1163 	// referenced by the hask bucket.
1164 	break;
1165     }
1166   return found;
1167 }
1168 
1169 /// Look into the symbol tables of the underlying elf file and find
1170 /// the symbol we are being asked.
1171 ///
1172 /// This function uses the elf hash table (be it the GNU hash table or
1173 /// the sysv hash table) for the symbol lookup.
1174 ///
1175 /// @param env the environment we are operating from.
1176 ///
1177 /// @param elf_handle the elf handle to use.
1178 ///
1179 /// @param ht_kind the kind of hash table to use.  This is returned by
1180 /// the function function find_hash_table_section_index.
1181 ///
1182 /// @param ht_index the index (in the section headers table) of the
1183 /// hash table section to use.
1184 ///
1185 /// @param sym_tab_index the index (in section headers table) of the
1186 /// symbol table index to use with this hash table.
1187 ///
1188 /// @param symbol_name the name of the symbol to look for.
1189 ///
1190 /// @param demangle if true, demangle @p sym_name.
1191 ///
1192 /// @param syms_found the symbols that were actually found with the
1193 /// name @p symbol_name.
1194 ///
1195 /// @return true iff the function found the symbol from the elf hash
1196 /// table.
1197 static bool
lookup_symbol_from_elf_hash_tab(const environment * env,Elf * elf_handle,hash_table_kind ht_kind,size_t ht_index,size_t symtab_index,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms_found)1198 lookup_symbol_from_elf_hash_tab(const environment*		env,
1199 				Elf*				elf_handle,
1200 				hash_table_kind		ht_kind,
1201 				size_t				ht_index,
1202 				size_t				symtab_index,
1203 				const string&			symbol_name,
1204 				bool				demangle,
1205 				vector<elf_symbol_sptr>&	syms_found)
1206 {
1207   if (elf_handle == 0 || symbol_name.empty())
1208     return false;
1209 
1210   if (ht_kind == NO_HASH_TABLE_KIND)
1211     return false;
1212 
1213   if (ht_kind == SYSV_HASH_TABLE_KIND)
1214     return lookup_symbol_from_sysv_hash_tab(env,
1215 					    elf_handle, symbol_name,
1216 					    ht_index,
1217 					    symtab_index,
1218 					    demangle,
1219 					    syms_found);
1220   else if (ht_kind == GNU_HASH_TABLE_KIND)
1221     return lookup_symbol_from_gnu_hash_tab(env,
1222 					   elf_handle, symbol_name,
1223 					   ht_index,
1224 					   symtab_index,
1225 					   demangle,
1226 					   syms_found);
1227   return false;
1228 }
1229 
1230 /// Lookup a symbol from the symbol table directly.
1231 ///
1232 ///
1233 /// @param env the environment we are operating from.
1234 ///
1235 /// @param elf_handle the elf handle to use.
1236 ///
1237 /// @param sym_name the name of the symbol to look up.
1238 ///
1239 /// @param sym_tab_index the index (in the section headers table) of
1240 /// the symbol table section.
1241 ///
1242 /// @param demangle if true, demangle the names found in the symbol
1243 /// table before comparing them with @p sym_name.
1244 ///
1245 /// @param sym_name_found the actual name of the symbol found.
1246 ///
1247 /// @param sym_type the type of the symbol found.
1248 ///
1249 /// @param sym_binding the binding of the symbol found.
1250 ///
1251 /// @param sym_versions the versions of the symbol found.
1252 ///
1253 /// @return true iff the symbol was found.
1254 static bool
lookup_symbol_from_symtab(const environment * env,Elf * elf_handle,const string & sym_name,size_t sym_tab_index,bool demangle,vector<elf_symbol_sptr> & syms_found)1255 lookup_symbol_from_symtab(const environment*		env,
1256 			  Elf*				elf_handle,
1257 			  const string&		sym_name,
1258 			  size_t			sym_tab_index,
1259 			  bool				demangle,
1260 			  vector<elf_symbol_sptr>&	syms_found)
1261 {
1262   // TODO: read all of the symbol table, store it in memory in a data
1263   // structure that associates each symbol with its versions and in
1264   // which lookups of a given symbol is fast.
1265   Elf_Scn* sym_tab_section = elf_getscn(elf_handle, sym_tab_index);
1266   ABG_ASSERT(sym_tab_section);
1267 
1268   GElf_Shdr header_mem;
1269   GElf_Shdr * sym_tab_header = gelf_getshdr(sym_tab_section,
1270 					    &header_mem);
1271 
1272   size_t symcount = sym_tab_header->sh_size / sym_tab_header->sh_entsize;
1273   Elf_Data* symtab = elf_getdata(sym_tab_section, NULL);
1274   GElf_Sym* sym;
1275   char* name_str = 0;
1276   elf_symbol::version ver;
1277   bool found = false;
1278   Elf_Scn *strings_section = find_ksymtab_strings_section(elf_handle);
1279   size_t strings_ndx = strings_section
1280     ? elf_ndxscn(strings_section)
1281     : 0;
1282 
1283   for (size_t i = 0; i < symcount; ++i)
1284     {
1285       GElf_Sym sym_mem;
1286       sym = gelf_getsym(symtab, i, &sym_mem);
1287       name_str = elf_strptr(elf_handle,
1288 			    sym_tab_header->sh_link,
1289 			    sym->st_name);
1290 
1291       if (name_str && compare_symbol_name(name_str, sym_name, demangle))
1292 	{
1293 	  elf_symbol::type sym_type =
1294 	    stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info));
1295 	  elf_symbol::binding sym_binding =
1296 	    stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info));
1297 	  elf_symbol::visibility sym_visibility =
1298 	    stv_to_elf_symbol_visibility(GELF_ST_VISIBILITY(sym->st_other));
1299 	  bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
1300 	  bool sym_is_common = sym->st_shndx == SHN_COMMON;
1301 
1302 	  if (get_version_for_symbol(elf_handle, i,
1303 				     /*get_def_version=*/sym_is_defined,
1304 				     ver))
1305 	    ABG_ASSERT(!ver.str().empty());
1306 	  elf_symbol_sptr symbol_found =
1307 	    elf_symbol::create(env, i, sym->st_size,
1308 			       name_str, sym_type,
1309 			       sym_binding, sym_is_defined,
1310 			       sym_is_common, ver, sym_visibility,
1311 			       sym->st_shndx == strings_ndx);
1312 	  syms_found.push_back(symbol_found);
1313 	  found = true;
1314 	}
1315     }
1316 
1317   if (found)
1318     return true;
1319 
1320   return false;
1321 }
1322 
1323 /// Look into the symbol tables of the underlying elf file and see
1324 /// if we find a given symbol.
1325 ///
1326 /// @param env the environment we are operating from.
1327 ///
1328 /// @param symbol_name the name of the symbol to look for.
1329 ///
1330 /// @param demangle if true, try to demangle the symbol name found in
1331 /// the symbol table before comparing it to @p symbol_name.
1332 ///
1333 /// @param syms_found the list of symbols found, with the name @p
1334 /// symbol_name.
1335 ///
1336 /// @param sym_type this is set to the type of the symbol found.  This
1337 /// shall b a standard elf.h value for symbol types, that is SHT_OBJECT,
1338 /// STT_FUNC, STT_IFUNC, etc ...
1339 ///
1340 /// Note that this parameter is set iff the function returns true.
1341 ///
1342 /// @param sym_binding this is set to the binding of the symbol found.
1343 /// This is a standard elf.h value of the symbol binding kind, that
1344 /// is, STB_LOCAL, STB_GLOBAL, or STB_WEAK.
1345 ///
1346 /// @param symbol_versions the versions of the symbol @p symbol_name,
1347 /// if it was found.
1348 ///
1349 /// @return true iff a symbol with the name @p symbol_name was found.
1350 static bool
lookup_symbol_from_elf(const environment * env,Elf * elf_handle,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms_found)1351 lookup_symbol_from_elf(const environment*		env,
1352 		       Elf*				elf_handle,
1353 		       const string&			symbol_name,
1354 		       bool				demangle,
1355 		       vector<elf_symbol_sptr>&	syms_found)
1356 {
1357   size_t hash_table_index = 0, symbol_table_index = 0;
1358   hash_table_kind ht_kind = NO_HASH_TABLE_KIND;
1359 
1360   if (!demangle)
1361     ht_kind = find_hash_table_section_index(elf_handle,
1362 					    hash_table_index,
1363 					    symbol_table_index);
1364 
1365   if (ht_kind == NO_HASH_TABLE_KIND)
1366     {
1367       if (!find_symbol_table_section_index(elf_handle, symbol_table_index))
1368 	return false;
1369 
1370       return lookup_symbol_from_symtab(env,
1371 				       elf_handle,
1372 				       symbol_name,
1373 				       symbol_table_index,
1374 				       demangle,
1375 				       syms_found);
1376     }
1377 
1378   return lookup_symbol_from_elf_hash_tab(env,
1379 					 elf_handle,
1380 					 ht_kind,
1381 					 hash_table_index,
1382 					 symbol_table_index,
1383 					 symbol_name,
1384 					 demangle,
1385 					 syms_found);
1386 }
1387 
1388 /// Look into the symbol tables of the underlying elf file and see if
1389 /// we find a given public (global or weak) symbol of function type.
1390 ///
1391 /// @param env the environment we are operating from.
1392 ///
1393 /// @param elf_handle the elf handle to use for the query.
1394 ///
1395 /// @param symbol_name the function symbol to look for.
1396 ///
1397 /// @param func_syms the vector of public functions symbols found, if
1398 /// any.
1399 ///
1400 /// @return true iff the symbol was found.
1401 static bool
lookup_public_function_symbol_from_elf(const environment * env,Elf * elf_handle,const string & symbol_name,vector<elf_symbol_sptr> & func_syms)1402 lookup_public_function_symbol_from_elf(const environment*		env,
1403 				       Elf*				elf_handle,
1404 				       const string&			symbol_name,
1405 				       vector<elf_symbol_sptr>&	func_syms)
1406 {
1407   vector<elf_symbol_sptr> syms_found;
1408   bool found = false;
1409 
1410   if (lookup_symbol_from_elf(env, elf_handle, symbol_name,
1411 			     /*demangle=*/false, syms_found))
1412     {
1413       for (vector<elf_symbol_sptr>::const_iterator i = syms_found.begin();
1414 	   i != syms_found.end();
1415 	   ++i)
1416 	{
1417 	  elf_symbol::type type = (*i)->get_type();
1418 	  elf_symbol::binding binding = (*i)->get_binding();
1419 
1420 	  if ((type == elf_symbol::FUNC_TYPE
1421 	       || type == elf_symbol::GNU_IFUNC_TYPE
1422 	       || type == elf_symbol::COMMON_TYPE)
1423 	      && (binding == elf_symbol::GLOBAL_BINDING
1424 		  || binding == elf_symbol::WEAK_BINDING))
1425 	    {
1426 	      func_syms.push_back(*i);
1427 	      found = true;
1428 	    }
1429 	}
1430     }
1431 
1432   return found;
1433 }
1434 
1435 /// Get data tag information of an ELF file by looking up into its
1436 /// dynamic segment
1437 ///
1438 /// @param elf the elf handle to use for the query.
1439 ///
1440 /// @param dt_tag data tag to look for in dynamic segment
1441 /// @param dt_tag_data vector of found information for a given @p data_tag
1442 ///
1443 /// @return true iff data tag @p data_tag was found
1444 
1445 bool
lookup_data_tag_from_dynamic_segment(Elf * elf,Elf64_Sxword data_tag,vector<string> & dt_tag_data)1446 lookup_data_tag_from_dynamic_segment(Elf*                       elf,
1447                                      Elf64_Sxword               data_tag,
1448                                      vector<string>&            dt_tag_data)
1449 {
1450   size_t num_prog_headers = 0;
1451   bool found = false;
1452   if (elf_getphdrnum(elf, &num_prog_headers) < 0)
1453     return found;
1454 
1455   // Cycle through each program header.
1456   for (size_t i = 0; i < num_prog_headers; ++i)
1457     {
1458       GElf_Phdr phdr_mem;
1459       GElf_Phdr *phdr = gelf_getphdr(elf, i, &phdr_mem);
1460       if (phdr == NULL || phdr->p_type != PT_DYNAMIC)
1461         continue;
1462 
1463       // Poke at the dynamic segment like a section, so that we can
1464       // get its section header information; also we'd like to read
1465       // the data of the segment by using elf_getdata() but that
1466       // function needs a Elf_Scn data structure to act on.
1467       // Elfutils doesn't really have any particular function to
1468       // access segment data, other than the functions used to
1469       // access section data.
1470       Elf_Scn *dynamic_section = gelf_offscn(elf, phdr->p_offset);
1471       GElf_Shdr  shdr_mem;
1472       GElf_Shdr *dynamic_section_header = gelf_getshdr(dynamic_section,
1473 						       &shdr_mem);
1474       if (dynamic_section_header == NULL
1475           || dynamic_section_header->sh_type != SHT_DYNAMIC)
1476         continue;
1477 
1478       // Get data of the dynamic segment (seen as a section).
1479       Elf_Data *data = elf_getdata(dynamic_section, NULL);
1480       if (data == NULL)
1481         continue;
1482 
1483       // Get the index of the section headers string table.
1484       size_t string_table_index = 0;
1485       ABG_ASSERT (elf_getshdrstrndx(elf, &string_table_index) >= 0);
1486 
1487       size_t dynamic_section_header_entry_size = gelf_fsize(elf,
1488                                                             ELF_T_DYN, 1,
1489                                                             EV_CURRENT);
1490 
1491       GElf_Shdr link_mem;
1492       GElf_Shdr *link =
1493         gelf_getshdr(elf_getscn(elf,
1494                                 dynamic_section_header->sh_link),
1495 		     &link_mem);
1496       ABG_ASSERT(link != NULL);
1497 
1498       size_t num_dynamic_section_entries =
1499         dynamic_section_header->sh_size / dynamic_section_header_entry_size;
1500 
1501       // Now walk through all the DT_* data tags that are in the
1502       // segment/section
1503       for (size_t j = 0; j < num_dynamic_section_entries; ++j)
1504         {
1505           GElf_Dyn dynamic_section_mem;
1506           GElf_Dyn *dynamic_section = gelf_getdyn(data,
1507                                                   j,
1508                                                   &dynamic_section_mem);
1509           if (dynamic_section->d_tag == data_tag)
1510             {
1511               dt_tag_data.push_back(elf_strptr(elf,
1512                                                dynamic_section_header->sh_link,
1513 					       dynamic_section->d_un.d_val));
1514               found = true;
1515             }
1516         }
1517     }
1518   return found;
1519 }
1520 
1521 /// Convert the type of ELF file into @ref elf_type.
1522 ///
1523 /// @param elf the elf handle to use for the query.
1524 ///
1525 /// @return the @ref elf_type for a given elf type.
1526 static elf_type
elf_file_type(Elf * elf)1527 elf_file_type(Elf* elf)
1528 {
1529   GElf_Ehdr ehdr_mem;
1530   GElf_Ehdr *header = gelf_getehdr (elf, &ehdr_mem);
1531   vector<string> dt_debug_data;
1532 
1533   switch (header->e_type)
1534     {
1535     case ET_DYN:
1536       if (lookup_data_tag_from_dynamic_segment(elf, DT_DEBUG, dt_debug_data))
1537 	return ELF_TYPE_PI_EXEC;
1538       else
1539 	return ELF_TYPE_DSO;
1540     case ET_EXEC:
1541       return ELF_TYPE_EXEC;
1542     case ET_REL:
1543       return ELF_TYPE_RELOCATABLE;
1544     default:
1545       return ELF_TYPE_UNKNOWN;
1546     }
1547 }
1548 
1549 // ---------------------------------------
1550 // <location expression evaluation types>
1551 // ---------------------------------------
1552 
1553 /// An abstraction of a value representing the result of the
1554 /// evaluation of a dwarf expression.  This is abstraction represents
1555 /// a partial view on the possible values because we are only
1556 /// interested in extracting the latest and longuest constant
1557 /// sub-expression of a given dwarf expression.
1558 class expr_result
1559 {
1560   bool is_const_;
1561   int64_t const_value_;
1562 
1563 public:
expr_result()1564   expr_result()
1565     : is_const_(true),
1566       const_value_(0)
1567   {}
1568 
expr_result(bool is_const)1569   expr_result(bool is_const)
1570     : is_const_(is_const),
1571       const_value_(0)
1572   {}
1573 
expr_result(int64_t v)1574   explicit expr_result(int64_t v)
1575     :is_const_(true),
1576      const_value_(v)
1577   {}
1578 
1579   /// @return true if the value is a constant.  Otherwise, return
1580   /// false, meaning the value represents a quantity for which we need
1581   /// inferior (a running program) state to determine the value.
1582   bool
is_const() const1583   is_const() const
1584   {return is_const_;}
1585 
1586 
1587   /// @param f a flag saying if the value is set to a constant or not.
1588   void
is_const(bool f)1589   is_const(bool f)
1590   {is_const_ = f;}
1591 
1592   /// Get the current constant value iff this represents a
1593   /// constant.
1594   ///
1595   /// @param value the out parameter.  Is set to the constant value of
1596   /// the @ref expr_result.  This is set iff the function return true.
1597   ///
1598   ///@return true if this has a constant value, false otherwise.
1599   bool
const_value(int64_t & value)1600   const_value(int64_t& value)
1601   {
1602     if (is_const())
1603       {
1604 	value = const_value_;
1605 	return true;
1606       }
1607     return false;
1608   }
1609 
1610   /// Getter of the constant value of the current @ref expr_result.
1611   ///
1612   /// Note that the current @ref expr_result must be constant,
1613   /// otherwise the current process is aborted.
1614   ///
1615   /// @return the constant value of the current @ref expr_result.
1616   int64_t
const_value() const1617   const_value() const
1618   {
1619     ABG_ASSERT(is_const());
1620     return const_value_;
1621   }
1622 
operator int64_t() const1623   operator int64_t() const
1624   {return const_value();}
1625 
1626   expr_result&
operator =(const int64_t v)1627   operator=(const int64_t v)
1628   {
1629     const_value_ = v;
1630     return *this;
1631   }
1632 
1633   bool
operator ==(const expr_result & o) const1634   operator==(const expr_result& o) const
1635   {return const_value_ == o.const_value_ && is_const_ == o.is_const_;}
1636 
1637   bool
operator >=(const expr_result & o) const1638   operator>=(const expr_result& o) const
1639   {return const_value_ >= o.const_value_;}
1640 
1641   bool
operator <=(const expr_result & o) const1642   operator<=(const expr_result& o) const
1643   {return const_value_ <= o.const_value_;}
1644 
1645   bool
operator >(const expr_result & o) const1646   operator>(const expr_result& o) const
1647   {return const_value_ > o.const_value_;}
1648 
1649   bool
operator <(const expr_result & o) const1650   operator<(const expr_result& o) const
1651   {return const_value_ < o.const_value_;}
1652 
1653   expr_result
operator +(const expr_result & v) const1654   operator+(const expr_result& v) const
1655   {
1656     expr_result r(*this);
1657     r.const_value_ += v.const_value_;
1658     r.is_const_ = r.is_const_ && v.is_const_;
1659     return r;
1660   }
1661 
1662   expr_result&
operator +=(int64_t v)1663   operator+=(int64_t v)
1664   {
1665     const_value_ += v;
1666     return *this;
1667   }
1668 
1669   expr_result
operator -(const expr_result & v) const1670   operator-(const expr_result& v) const
1671   {
1672     expr_result r(*this);
1673     r.const_value_ -= v.const_value_;
1674     r.is_const_ = r.is_const_ && v.is_const_;
1675     return r;
1676   }
1677 
1678   expr_result
operator %(const expr_result & v) const1679   operator%(const expr_result& v) const
1680   {
1681     expr_result r(*this);
1682     r.const_value_ %= v.const_value_;
1683     r.is_const_ = r.is_const_ && v.is_const();
1684     return r;
1685   }
1686 
1687   expr_result
operator *(const expr_result & v) const1688   operator*(const expr_result& v) const
1689   {
1690     expr_result r(*this);
1691     r.const_value_ *= v.const_value_;
1692     r.is_const_ = r.is_const_ && v.is_const();
1693     return r;
1694   }
1695 
1696   expr_result
operator |(const expr_result & v) const1697   operator|(const expr_result& v) const
1698   {
1699     expr_result r(*this);
1700     r.const_value_ |= v.const_value_;
1701     r.is_const_ = r.is_const_ && v.is_const_;
1702     return r;
1703   }
1704 
1705   expr_result
operator ^(const expr_result & v) const1706   operator^(const expr_result& v) const
1707   {
1708     expr_result r(*this);
1709     r.const_value_ ^= v.const_value_;
1710     r.is_const_ = r.is_const_ && v.is_const_;
1711     return r;
1712   }
1713 
1714   expr_result
operator >>(const expr_result & v) const1715   operator>>(const expr_result& v) const
1716   {
1717     expr_result r(*this);
1718     r.const_value_ = r.const_value_ >> v.const_value_;
1719     r.is_const_ = r.is_const_ && v.is_const_;
1720     return r;
1721   }
1722 
1723   expr_result
operator <<(const expr_result & v) const1724   operator<<(const expr_result& v) const
1725   {
1726     expr_result r(*this);
1727     r.const_value_ = r.const_value_ << v.const_value_;
1728     r.is_const_ = r.is_const_ && v.is_const_;
1729     return r;
1730   }
1731 
1732   expr_result
operator ~() const1733   operator~() const
1734   {
1735     expr_result r(*this);
1736     r.const_value_ = ~r.const_value_;
1737     return r;
1738   }
1739 
1740   expr_result
neg() const1741   neg() const
1742   {
1743     expr_result r(*this);
1744     r.const_value_ = -r.const_value_;
1745     return r;
1746   }
1747 
1748   expr_result
abs() const1749   abs() const
1750   {
1751     expr_result r = *this;
1752     r.const_value_ = std::abs(static_cast<long double>(r.const_value()));
1753     return r;
1754   }
1755 
1756   expr_result
operator &(const expr_result & o)1757   operator&(const expr_result& o)
1758   {
1759     expr_result r(*this);
1760     r.const_value_ &= o.const_value_;
1761     r.is_const_ = r.is_const_ && o.is_const_;
1762     return r;
1763   }
1764 
1765   expr_result
operator /(const expr_result & o)1766   operator/(const expr_result& o)
1767   {
1768     expr_result r(*this);
1769     r.is_const_ = r.is_const_ && o.is_const_;
1770     return r.const_value() / o.const_value();
1771   }
1772 };// class end expr_result;
1773 
1774 /// A class that implements a stack of @ref expr_result, to be used in
1775 /// the engine evaluating DWARF expressions.
1776 class expr_result_stack_type
1777 {
1778   vector<expr_result> elems_;
1779 
1780 public:
1781 
expr_result_stack_type()1782   expr_result_stack_type()
1783   {elems_.reserve(4);}
1784 
1785   expr_result&
operator [](unsigned i)1786   operator[](unsigned i)
1787   {
1788     unsigned s = elems_.size();
1789     ABG_ASSERT(s > i);
1790     return elems_[s - 1 -i];
1791   }
1792 
1793   const expr_result&
operator [](unsigned i) const1794   operator[](unsigned i) const
1795   {return const_cast<expr_result_stack_type*>(this)->operator[](i);}
1796 
1797   unsigned
size() const1798   size() const
1799   {return elems_.size();}
1800 
1801   vector<expr_result>::reverse_iterator
begin()1802   begin()
1803   {return elems_.rbegin();}
1804 
1805   const vector<expr_result>::reverse_iterator
begin() const1806   begin() const
1807   {return const_cast<expr_result_stack_type*>(this)->begin();}
1808 
1809   vector<expr_result>::reverse_iterator
end()1810   end()
1811   {return elems_.rend();}
1812 
1813   const vector<expr_result>::reverse_iterator
end() const1814   end() const
1815   {return const_cast<expr_result_stack_type*>(this)->end();}
1816 
1817   expr_result&
front()1818   front()
1819   {return elems_.back();}
1820 
1821   const expr_result&
front() const1822   front() const
1823   {return const_cast<expr_result_stack_type*>(this)->front();}
1824 
1825   void
push_front(expr_result e)1826   push_front(expr_result e)
1827   {elems_.push_back(e);}
1828 
1829   expr_result
pop_front()1830   pop_front()
1831   {
1832     expr_result r = front();
1833     elems_.pop_back();
1834     return r;
1835   }
1836 
1837   void
erase(vector<expr_result>::reverse_iterator i)1838   erase(vector<expr_result>::reverse_iterator i)
1839   {elems_.erase(--i.base());}
1840 
1841   void
clear()1842   clear()
1843   {elems_.clear();}
1844 }; // end class expr_result_stack_type
1845 
1846 /// Abstraction of the evaluation context of a dwarf expression.
1847 struct dwarf_expr_eval_context
1848 {
1849   expr_result accum;
1850   expr_result_stack_type stack;
1851   // Is set to true if the result of the expression that got evaluated
1852   // is a TLS address.
1853   bool set_tls_addr;
1854 
dwarf_expr_eval_contextabigail::dwarf_reader::dwarf_expr_eval_context1855   dwarf_expr_eval_context()
1856     : accum(/*is_const=*/false),
1857       set_tls_addr(false)
1858   {
1859     stack.push_front(expr_result(true));
1860   }
1861 
1862   void
resetabigail::dwarf_reader::dwarf_expr_eval_context1863   reset()
1864   {
1865     stack.clear();
1866     stack.push_front(expr_result(true));
1867     accum = expr_result(false);
1868     set_tls_addr = false;
1869   }
1870 
1871   /// Set a flag to to tell that the result of the expression that got
1872   /// evaluated is a TLS address.
1873   ///
1874   /// @param f true iff the result of the expression that got
1875   /// evaluated is a TLS address, false otherwise.
1876   void
set_tls_addressabigail::dwarf_reader::dwarf_expr_eval_context1877   set_tls_address(bool f)
1878   {set_tls_addr = f;}
1879 
1880   /// Getter for the flag that tells if the result of the expression
1881   /// that got evaluated is a TLS address.
1882   ///
1883   /// @return true iff the result of the expression that got evaluated
1884   /// is a TLS address.
1885   bool
set_tls_addressabigail::dwarf_reader::dwarf_expr_eval_context1886   set_tls_address() const
1887   {return set_tls_addr;}
1888 
1889   expr_result
popabigail::dwarf_reader::dwarf_expr_eval_context1890   pop()
1891   {
1892     expr_result r = stack.front();
1893     stack.pop_front();
1894     return r;
1895   }
1896 
1897   void
pushabigail::dwarf_reader::dwarf_expr_eval_context1898   push(const expr_result& v)
1899   {stack.push_front(v);}
1900 };//end class dwarf_expr_eval_context
1901 
1902 // ---------------------------------------
1903 // </location expression evaluation types>
1904 // ---------------------------------------
1905 
1906 /// The context used to build ABI corpus from debug info in DWARF
1907 /// format.
1908 ///
1909 /// This context is to be created by create_read_context().  It's then
1910 /// passed to all the routines that read specific dwarf bits as they
1911 /// get some important data from it.
1912 ///
1913 /// When a new data member is added to this context, it must be
1914 /// initiliazed by the read_context::initiliaze() function.  So please
1915 /// do not forget.
1916 class read_context
1917 {
1918 public:
1919   struct options_type
1920   {
1921     environment*	env;
1922     bool		load_in_linux_kernel_mode;
1923     bool		load_all_types;
1924     bool		show_stats;
1925     bool		do_log;
1926 
options_typeabigail::dwarf_reader::read_context::options_type1927     options_type()
1928       : env(),
1929 	load_in_linux_kernel_mode(),
1930 	load_all_types(),
1931 	show_stats(),
1932 	do_log()
1933     {}
1934   };// read_context::options_type
1935 
1936   /// A set of containers that contains one container per kind of @ref
1937   /// die_source.  This allows to associate DIEs to things, depending
1938   /// on the source of the DIE.
1939   template <typename ContainerType>
1940   class die_source_dependant_container_set
1941   {
1942     ContainerType primary_debug_info_container_;
1943     ContainerType alt_debug_info_container_;
1944     ContainerType type_unit_container_;
1945 
1946   public:
1947 
1948     /// Getter for the container associated to DIEs coming from a
1949     /// given @ref die_source.
1950     ///
1951     /// @param source the die_source for which we want the container.
1952     ///
1953     /// @return the container that associates DIEs coming from @p
1954     /// source to something.
1955     ContainerType&
get_container(die_source source)1956     get_container(die_source source)
1957     {
1958       ContainerType *result = 0;
1959       switch (source)
1960 	{
1961 	case PRIMARY_DEBUG_INFO_DIE_SOURCE:
1962 	  result = &primary_debug_info_container_;
1963 	  break;
1964 	case ALT_DEBUG_INFO_DIE_SOURCE:
1965 	  result = &alt_debug_info_container_;
1966 	  break;
1967 	case TYPE_UNIT_DIE_SOURCE:
1968 	  result = &type_unit_container_;
1969 	  break;
1970 	case NO_DEBUG_INFO_DIE_SOURCE:
1971 	case NUMBER_OF_DIE_SOURCES:
1972 	  ABG_ASSERT_NOT_REACHED;
1973 	}
1974       return *result;
1975     }
1976 
1977     /// Getter for the container associated to DIEs coming from a
1978     /// given @ref die_source.
1979     ///
1980     /// @param source the die_source for which we want the container.
1981     ///
1982     /// @return the container that associates DIEs coming from @p
1983     /// source to something.
1984     const ContainerType&
get_container(die_source source) const1985     get_container(die_source source) const
1986     {
1987       return const_cast<die_source_dependant_container_set*>(this)->
1988 	get_container(source);
1989     }
1990 
1991     /// Getter for the container associated to DIEs coming from the
1992     /// same source as a given DIE.
1993     ///
1994     /// @param ctxt the read context to consider.
1995     ///
1996     /// @param die the DIE which should have the same source as the
1997     /// source of the container we want.
1998     ///
1999     /// @return the container that associates DIEs coming from the
2000     /// same source as @p die.
2001     ContainerType&
get_container(const read_context & ctxt,const Dwarf_Die * die)2002     get_container(const read_context& ctxt, const Dwarf_Die *die)
2003     {
2004       const die_source source = ctxt.get_die_source(die);
2005       return get_container(source);
2006     }
2007 
2008     /// Getter for the container associated to DIEs coming from the
2009     /// same source as a given DIE.
2010     ///
2011     /// @param ctxt the read context to consider.
2012     ///
2013     /// @param die the DIE which should have the same source as the
2014     /// source of the container we want.
2015     ///
2016     /// @return the container that associates DIEs coming from the
2017     /// same source as @p die.
2018     const ContainerType&
get_container(const read_context & ctxt,const Dwarf_Die * die) const2019     get_container(const read_context& ctxt, const Dwarf_Die *die) const
2020     {
2021       return const_cast<die_source_dependant_container_set*>(this)->
2022 	get_container(ctxt, die);
2023     }
2024 
2025     /// Clear the container set.
2026     void
clear()2027     clear()
2028     {
2029       primary_debug_info_container_.clear();
2030       alt_debug_info_container_.clear();
2031       type_unit_container_.clear();
2032     }
2033   }; // end die_dependant_container_set
2034 
2035   suppr::suppressions_type	supprs_;
2036   unsigned short		dwarf_version_;
2037   Dwfl_Callbacks		offline_callbacks_;
2038   // The set of directories under which to look for debug info.
2039   vector<char**>		debug_info_root_paths_;
2040   dwfl_sptr			handle_;
2041   Dwarf*			dwarf_;
2042   // The alternate debug info.  Alternate debug info sections are a
2043   // DWARF extension as of DWARF4 and are described at
2044   // http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.  Below are
2045   // the file desctor used to access the alternate debug info
2046   // sections, and the representation of the DWARF debug info.  Both
2047   // need to be freed after we are done using them, with fclose and
2048   // dwarf_end.
2049   int				alt_fd_;
2050   Dwarf*			alt_dwarf_;
2051   string			alt_debug_info_path_;
2052   // The address range of the offline elf file we are looking at.
2053   Dwfl_Module*			elf_module_;
2054   mutable Elf*			elf_handle_;
2055   string			elf_path_;
2056   mutable Elf_Scn*		symtab_section_;
2057   Dwarf_Die*			cur_tu_die_;
2058   mutable dwarf_expr_eval_context	dwarf_expr_eval_context_;
2059   // A set of maps (one per kind of die source) that associates a decl
2060   // string representation with the DIEs (offsets) representing that
2061   // decl.
2062   mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
2063   decl_die_repr_die_offsets_maps_;
2064   // A set of maps (one per kind of die source) that associates a type
2065   // string representation with the DIEs (offsets) representing that
2066   // type.
2067   mutable die_source_dependant_container_set<istring_dwarf_offsets_map_type>
2068   type_die_repr_die_offsets_maps_;
2069   mutable die_source_dependant_container_set<die_istring_map_type>
2070   die_qualified_name_maps_;
2071   mutable die_source_dependant_container_set<die_istring_map_type>
2072   die_pretty_repr_maps_;
2073   mutable die_source_dependant_container_set<die_istring_map_type>
2074   die_pretty_type_repr_maps_;
2075   // A set of maps (one per kind of die source) that associates the
2076   // offset of a decl die to its corresponding decl artifact.
2077   mutable die_source_dependant_container_set<die_artefact_map_type>
2078   decl_die_artefact_maps_;
2079   // A set of maps (one per kind of die source) that associates the
2080   // offset of a type die to its corresponding type artifact.
2081   mutable die_source_dependant_container_set<die_artefact_map_type>
2082   type_die_artefact_maps_;
2083   /// A set of vectors (one per kind of die source) that associates
2084   /// the offset of a type DIE to the offset of its canonical DIE.
2085   mutable die_source_dependant_container_set<offset_offset_map_type>
2086   canonical_type_die_offsets_;
2087   /// A set of vectors (one per kind of die source) that associates
2088   /// the offset of a decl DIE to the offset of its canonical DIE.
2089   mutable die_source_dependant_container_set<offset_offset_map_type>
2090   canonical_decl_die_offsets_;
2091   /// A map that associates a function type representations to
2092   /// function types, inside a translation unit.
2093   mutable istring_fn_type_map_type per_tu_repr_to_fn_type_maps_;
2094 
2095   die_class_or_union_map_type	die_wip_classes_map_;
2096   die_class_or_union_map_type	alternate_die_wip_classes_map_;
2097   die_class_or_union_map_type	type_unit_die_wip_classes_map_;
2098   die_function_type_map_type	die_wip_function_types_map_;
2099   die_function_type_map_type	alternate_die_wip_function_types_map_;
2100   die_function_type_map_type	type_unit_die_wip_function_types_map_;
2101   die_function_decl_map_type	die_function_with_no_symbol_map_;
2102   vector<Dwarf_Off>		types_to_canonicalize_;
2103   vector<Dwarf_Off>		alt_types_to_canonicalize_;
2104   vector<Dwarf_Off>		type_unit_types_to_canonicalize_;
2105   vector<type_base_sptr>	extra_types_to_canonicalize_;
2106   string_classes_map		decl_only_classes_map_;
2107   string_enums_map		decl_only_enums_map_;
2108   die_tu_map_type		die_tu_map_;
2109   corpus_group_sptr		cur_corpus_group_;
2110   corpus_sptr			cur_corpus_;
2111   translation_unit_sptr	cur_tu_;
2112   scope_decl_sptr		nil_scope_;
2113   scope_stack_type		scope_stack_;
2114   offset_offset_map_type	primary_die_parent_map_;
2115   // A map that associates each tu die to a vector of unit import
2116   // points, in the main debug info
2117   tu_die_imported_unit_points_map_type tu_die_imported_unit_points_map_;
2118   // A map that associates each tu die to a vector of unit import
2119   // points, in the alternate debug info
2120   tu_die_imported_unit_points_map_type alt_tu_die_imported_unit_points_map_;
2121   tu_die_imported_unit_points_map_type type_units_tu_die_imported_unit_points_map_;
2122   // A DIE -> parent map for DIEs coming from the alternate debug info
2123   // file.
2124   offset_offset_map_type	alternate_die_parent_map_;
2125   offset_offset_map_type	type_section_die_parent_map_;
2126   list<var_decl_sptr>		var_decls_to_add_;
2127   vector<string>		dt_needed_;
2128   string			dt_soname_;
2129   string			elf_architecture_;
2130   corpus::exported_decls_builder* exported_decls_builder_;
2131   options_type			options_;
2132   bool				drop_undefined_syms_;
2133   bool				merge_translation_units_;
2134   read_context();
2135 
2136 private:
2137   mutable symtab_reader::symtab_sptr symtab_;
2138 
2139 public:
2140 
2141   /// Constructor of read_context.
2142   ///
2143   /// @param elf_path the path to the elf file the context is to be
2144   /// used for.
2145   ///
2146   /// @param debug_info_root_paths a vector of pointers to the path to
2147   /// the root directory under which the debug info is to be found for
2148   /// @p elf_path.  Leave this empty if the debug info is not in a
2149   /// split file.
2150   ///
2151   /// @param environment the environment used by the current context.
2152   /// This environment contains resources needed by the reader and by
2153   /// the types and declarations that are to be created later.  Note
2154   /// that ABI artifacts that are to be compared all need to be
2155   /// created within the same environment.
2156   ///
2157   /// Please also note that the life time of this environment object
2158   /// must be greater than the life time of the resulting @ref
2159   /// read_context the context uses resources that are allocated in
2160   /// the environment.
2161   ///
2162   /// @param load_all_types if set to false only the types that are
2163   /// reachable from publicly exported declarations (of functions and
2164   /// variables) are read.  If set to true then all types found in the
2165   /// debug information are loaded.
2166   ///
2167   /// @param linux_kernel_mode if set to true, then consider the special
2168   /// linux kernel symbol tables when determining if a symbol is
2169   /// exported or not.
read_context(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)2170   read_context(const string&	elf_path,
2171 	       const vector<char**>& debug_info_root_paths,
2172 	       ir::environment* environment,
2173 	       bool		load_all_types,
2174 	       bool		linux_kernel_mode)
2175   {
2176     initialize(elf_path, debug_info_root_paths, environment,
2177 	       load_all_types, linux_kernel_mode);
2178   }
2179 
2180   /// Initializer of read_context.
2181   ///
2182   /// @param elf_path the path to the elf file the context is to be
2183   /// used for.
2184   ///
2185   /// @param debug_info_root_paths a vector of pointers to the path to
2186   /// the root directory under which the debug info is to be found for
2187   /// @p elf_path.  Leave this empty if the debug info is not in a
2188   /// split file.
2189   ///
2190   /// @param environment the environment used by the current context.
2191   /// This environment contains resources needed by the reader and by
2192   /// the types and declarations that are to be created later.  Note
2193   /// that ABI artifacts that are to be compared all need to be
2194   /// created within the same environment.
2195   ///
2196   /// Please also note that the life time of this environment object
2197   /// must be greater than the life time of the resulting @ref
2198   /// read_context the context uses resources that are allocated in
2199   /// the environment.
2200   ///
2201   /// @param load_all_types if set to false only the types that are
2202   /// reachable from publicly exported declarations (of functions and
2203   /// variables) are read.  If set to true then all types found in the
2204   /// debug information are loaded.
2205   ///
2206   /// @param linux_kernel_mode if set to true, then consider the
2207   /// special linux kernel symbol tables when determining if a symbol
2208   /// is exported or not.
2209   void
initialize(const string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)2210   initialize(const string&	elf_path,
2211 	     const vector<char**>& debug_info_root_paths,
2212 	     ir::environment* environment,
2213 	     bool		load_all_types,
2214 	     bool		linux_kernel_mode)
2215   {
2216     dwarf_version_ = 0;
2217     dwarf_ = 0;
2218     handle_.reset();
2219     alt_fd_ = 0;
2220     alt_dwarf_ = 0;
2221     elf_module_ = 0;
2222     elf_handle_ = 0;
2223     elf_path_ = elf_path;
2224     symtab_section_ = 0;
2225     cur_tu_die_ =  0;
2226     exported_decls_builder_ = 0;
2227 
2228     clear_alt_debug_info_data();
2229 
2230     supprs_.clear();
2231     decl_die_repr_die_offsets_maps_.clear();
2232     type_die_repr_die_offsets_maps_.clear();
2233     die_qualified_name_maps_.clear();
2234     die_pretty_repr_maps_.clear();
2235     die_pretty_type_repr_maps_.clear();
2236     decl_die_artefact_maps_.clear();
2237     type_die_artefact_maps_.clear();
2238     canonical_type_die_offsets_.clear();
2239     canonical_decl_die_offsets_.clear();
2240     die_wip_classes_map_.clear();
2241     alternate_die_wip_classes_map_.clear();
2242     type_unit_die_wip_classes_map_.clear();
2243     die_wip_function_types_map_.clear();
2244     alternate_die_wip_function_types_map_.clear();
2245     type_unit_die_wip_function_types_map_.clear();
2246     die_function_with_no_symbol_map_.clear();
2247     types_to_canonicalize_.clear();
2248     alt_types_to_canonicalize_.clear();
2249     type_unit_types_to_canonicalize_.clear();
2250     extra_types_to_canonicalize_.clear();
2251     decl_only_classes_map_.clear();
2252     die_tu_map_.clear();
2253     cur_corpus_group_.reset();
2254     cur_corpus_.reset();
2255     cur_tu_.reset();
2256     primary_die_parent_map_.clear();
2257     tu_die_imported_unit_points_map_.clear();
2258     alt_tu_die_imported_unit_points_map_.clear();
2259     type_units_tu_die_imported_unit_points_map_.clear();
2260     alternate_die_parent_map_.clear();
2261     type_section_die_parent_map_.clear();
2262     var_decls_to_add_.clear();
2263     dt_needed_.clear();
2264     dt_soname_.clear();
2265     elf_architecture_.clear();
2266 
2267     symtab_.reset();
2268 
2269     clear_per_translation_unit_data();
2270 
2271     memset(&offline_callbacks_, 0, sizeof(offline_callbacks_));
2272     create_default_dwfl(debug_info_root_paths);
2273     options_.env = environment;
2274     options_.load_in_linux_kernel_mode = linux_kernel_mode;
2275     options_.load_all_types = load_all_types;
2276     drop_undefined_syms_ = false;
2277     merge_translation_units_ = false;
2278     load_in_linux_kernel_mode(linux_kernel_mode);
2279   }
2280 
2281   /// Clear the resources related to the alternate DWARF data.
2282   void
clear_alt_debug_info_data()2283   clear_alt_debug_info_data()
2284   {
2285     if (alt_fd_)
2286       {
2287 	close(alt_fd_);
2288 	alt_fd_ = 0;
2289 	if (alt_dwarf_)
2290 	  {
2291 	    dwarf_end(alt_dwarf_);
2292 	    alt_dwarf_ = 0;
2293 	  }
2294 	alt_debug_info_path_.clear();
2295       }
2296   }
2297 
2298   /// Detructor of the @ref read_context type.
~read_context()2299   ~read_context()
2300   {
2301     clear_alt_debug_info_data();
2302   }
2303 
2304   /// Clear the data that is relevant only for the current translation
2305   /// unit being read.  The rest of the data is relevant for the
2306   /// entire ABI corpus.
2307   void
clear_per_translation_unit_data()2308   clear_per_translation_unit_data()
2309   {
2310     while (!scope_stack().empty())
2311       scope_stack().pop();
2312     var_decls_to_re_add_to_tree().clear();
2313     per_tu_repr_to_fn_type_maps().clear();
2314   }
2315 
2316   /// Clear the data that is relevant for the current corpus being
2317   /// read.
2318   void
clear_per_corpus_data()2319   clear_per_corpus_data()
2320   {
2321     die_qualified_name_maps_.clear();
2322     die_pretty_repr_maps_.clear();
2323     die_pretty_type_repr_maps_.clear();
2324     clear_types_to_canonicalize();
2325   }
2326 
2327   /// Getter for the current environment.
2328   ///
2329   /// @return the current environment.
2330   const ir::environment*
env() const2331   env() const
2332   {return options_.env;}
2333 
2334   /// Getter for the current environment.
2335   ///
2336   /// @return the current environment.
2337   ir::environment*
env()2338   env()
2339   {return options_.env;}
2340 
2341   /// Setter for the current environment.
2342   ///
2343   /// @param env the new current environment.
2344   void
env(ir::environment * env)2345   env(ir::environment* env)
2346   {options_.env = env;}
2347 
2348   /// Getter for the flag that tells us if we are dropping functions
2349   /// and variables that have undefined symbols.
2350   ///
2351   /// @return true iff we are dropping functions and variables that have
2352   /// undefined symbols.
2353   bool
drop_undefined_syms() const2354   drop_undefined_syms() const
2355   {return drop_undefined_syms_;}
2356 
2357   /// Setter for the flag that tells us if we are dropping functions
2358   /// and variables that have undefined symbols.
2359   ///
2360   /// @param f the new value of the flag.
2361   void
drop_undefined_syms(bool f)2362   drop_undefined_syms(bool f)
2363   {drop_undefined_syms_ = f;}
2364 
2365   /// Setter for the flag that tells us if we are merging translation
2366   /// units.
2367   ///
2368   /// @param f the new value of the flag.
2369   void
merge_translation_units(bool f)2370   merge_translation_units(bool f)
2371   {merge_translation_units_ = f;}
2372 
2373   /// Getter for the flag that tells us if we are merging translation
2374   /// units.
2375   ///
2376   /// @return true iff we are merging translation units.
2377   bool
merge_translation_units() const2378   merge_translation_units() const
2379   {return merge_translation_units_;}
2380 
2381   /// Getter of the suppression specifications to be used during
2382   /// ELF/DWARF parsing.
2383   ///
2384   /// @return the suppression specifications.
2385   const suppr::suppressions_type&
get_suppressions() const2386   get_suppressions() const
2387   {return supprs_;}
2388 
2389   /// Getter of the suppression specifications to be used during
2390   /// ELF/DWARF parsing.
2391   ///
2392   /// @return the suppression specifications.
2393   suppr::suppressions_type&
get_suppressions()2394   get_suppressions()
2395   {return supprs_;}
2396 
2397   /// Getter for the callbacks of the Dwarf Front End library of
2398   /// elfutils that is used by this reader to read dwarf.
2399   ///
2400   /// @return the callbacks.
2401   const Dwfl_Callbacks*
offline_callbacks() const2402   offline_callbacks() const
2403   {return &offline_callbacks_;}
2404 
2405   /// Getter for the callbacks of the Dwarf Front End library of
2406   /// elfutils that is used by this reader to read dwarf.
2407   /// @returnthe callbacks
2408   Dwfl_Callbacks*
offline_callbacks()2409   offline_callbacks()
2410   {return &offline_callbacks_;}
2411 
2412   /// Constructor for a default Dwfl handle that knows how to load debug
2413   /// info from a library or executable elf file.
2414   ///
2415   /// @param debug_info_root_paths a vector of pointers to the root
2416   /// path under which to look for the debug info of the elf files
2417   /// that are later handled by the Dwfl.  This is for cases where the
2418   /// debug info is split into a different file from the binary we
2419   /// want to inspect.  On Red Hat compatible systems, this root path
2420   /// is usually /usr/lib/debug by default.  If this argument is set
2421   /// to the empty set, then "./debug" and /usr/lib/debug will be
2422   /// searched for sub-directories containing the debug info file.
2423   /// Note that for now, elfutils wants this path to be absolute
2424   /// otherwise things just don't work and the debug info is not
2425   /// found.
2426   ///
2427   /// @return the constructed Dwfl handle.
2428   void
create_default_dwfl(const vector<char ** > & debug_info_root_paths)2429   create_default_dwfl(const vector<char**>& debug_info_root_paths)
2430   {
2431     offline_callbacks()->find_debuginfo = dwfl_standard_find_debuginfo;
2432     offline_callbacks()->section_address = dwfl_offline_section_address;
2433     offline_callbacks()->debuginfo_path =
2434       debug_info_root_paths.empty() ? 0 : debug_info_root_paths.front();
2435     handle_.reset(dwfl_begin(offline_callbacks()),
2436 		  dwfl_deleter());
2437     debug_info_root_paths_ = debug_info_root_paths;
2438   }
2439 
2440   unsigned short
dwarf_version() const2441   dwarf_version() const
2442   {return dwarf_version_;}
2443 
2444   void
dwarf_version(unsigned short v)2445   dwarf_version(unsigned short v)
2446   {dwarf_version_ = v;}
2447 
2448   /// Getter for a smart pointer to a handle on the dwarf front end
2449   /// library that we use to read dwarf.
2450   ///
2451   /// @return the dwfl handle.
2452   dwfl_sptr
dwfl_handle() const2453   dwfl_handle() const
2454   {return handle_;}
2455 
2456   /// Setter for a smart pointer to a handle on the dwarf front end
2457   /// library that we use to read dwarf.
2458   ///
2459   /// @param h the new dwfl handle.
2460   void
dwfl_handle(dwfl_sptr & h)2461   dwfl_handle(dwfl_sptr& h)
2462   {handle_ = h;}
2463 
2464   Dwfl_Module*
elf_module() const2465   elf_module() const
2466   {return elf_module_;}
2467 
2468   /// Return the ELF descriptor for the binary we are analizing.
2469   ///
2470   /// @return a pointer to the Elf descriptor representing the binary
2471   /// we are analizing.
2472   Elf*
elf_handle() const2473   elf_handle() const
2474   {
2475     if (elf_handle_ == 0)
2476       {
2477 	if (elf_module())
2478 	  {
2479 	    GElf_Addr bias = 0;
2480 	    elf_handle_ = dwfl_module_getelf(elf_module(), &bias);
2481 	  }
2482       }
2483     return elf_handle_;
2484   }
2485 
2486   /// Return the ELF descriptor used for DWARF access.
2487   ///
2488   /// This can be the same as read_context::elf_handle() above, if the
2489   /// DWARF info is in the same ELF file as the one of the binary we
2490   /// are analizing.  It is different if e.g, the debug info is split
2491   /// from the ELF file we are analizing.
2492   ///
2493   /// @return a pointer to the ELF descriptor used to access debug
2494   /// info.
2495   Elf*
dwarf_elf_handle() const2496   dwarf_elf_handle() const
2497   {return dwarf_getelf(dwarf());}
2498 
2499   /// Test if the debug information is in a separate ELF file wrt the
2500   /// main ELF file of the program (application or shared library) we
2501   /// are analizing.
2502   ///
2503   /// @return true if the debug information is in a separate ELF file
2504   /// compared to the main ELF file of the program (application or
2505   /// shared library) that we are looking at.
2506   bool
dwarf_is_splitted() const2507   dwarf_is_splitted() const
2508   {return dwarf_elf_handle() != elf_handle();}
2509 
2510   /// Add paths to the set of paths under which to look for split
2511   /// debuginfo files.
2512   ///
2513   /// @param debug_info_root_paths the paths to add.
2514   void
add_debug_info_root_paths(const vector<char ** > & debug_info_root_paths)2515   add_debug_info_root_paths(const vector<char **>& debug_info_root_paths)
2516   {
2517     debug_info_root_paths_.insert(debug_info_root_paths_.end(),
2518 				  debug_info_root_paths.begin(),
2519 				  debug_info_root_paths.end());
2520   }
2521 
2522   /// Add a path to the set of paths under which to look for split
2523   /// debuginfo files.
2524   ///
2525   /// @param debug_info_root_path the path to add.
2526   void
add_debug_info_root_path(char ** debug_info_root_path)2527   add_debug_info_root_path(char** debug_info_root_path)
2528   {debug_info_root_paths_.push_back(debug_info_root_path);}
2529 
2530   /// Find the alternate debuginfo file associated to a given elf file.
2531   ///
2532   /// @param elf_module represents the elf file to consider.
2533   ///
2534   /// @param alt_file_name the resulting path to the alternate
2535   /// debuginfo file found.  This is set iff the function returns a
2536   /// non-nil value.
2537   Dwarf*
find_alt_debug_info(Dwfl_Module * elf_module,string & alt_file_name,int & alt_fd)2538   find_alt_debug_info(Dwfl_Module *elf_module,
2539 		      string& alt_file_name,
2540 		      int& alt_fd)
2541   {
2542     Dwarf *result = 0;
2543     result = dwarf_reader::find_alt_debug_info(elf_module,
2544 					       debug_info_root_paths_,
2545 					       alt_file_name, alt_fd);
2546     return result;
2547   }
2548 
2549   /// Load the debug info associated with an elf file that is at a
2550   /// given path.
2551   ///
2552   /// @return a pointer to the DWARF debug info pointer upon
2553   /// successful debug info loading, NULL otherwise.
2554   Dwarf*
load_debug_info()2555   load_debug_info()
2556   {
2557     if (!dwfl_handle())
2558       return 0;
2559 
2560     if (dwarf_)
2561       return dwarf_;
2562 
2563     elf_module_ =
2564       dwfl_report_offline(dwfl_handle().get(),
2565 			  basename(const_cast<char*>(elf_path().c_str())),
2566 			  elf_path().c_str(),
2567 			  -1);
2568     dwfl_report_end(dwfl_handle().get(), 0, 0);
2569 
2570     Dwarf_Addr bias = 0;
2571     dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
2572     // Look for split debuginfo files under multiple possible
2573     // debuginfo roots.
2574     for (vector<char**>::const_iterator i = debug_info_root_paths_.begin();
2575 	 dwarf_ == 0 && i != debug_info_root_paths_.end();
2576 	 ++i)
2577       {
2578 	offline_callbacks()->debuginfo_path = *i;
2579 	dwarf_ = dwfl_module_getdwarf(elf_module_, &bias);
2580       }
2581 
2582     if (!alt_dwarf_)
2583       alt_dwarf_ = find_alt_debug_info(elf_module_,
2584 				       alt_debug_info_path_,
2585 				       alt_fd_);
2586 
2587     return dwarf_;
2588   }
2589 
2590   /// Return the main debug info we are looking at.
2591   ///
2592   /// @return the main debug info.
2593   Dwarf*
dwarf() const2594   dwarf() const
2595   {return dwarf_;}
2596 
2597   /// Return the alternate debug info we are looking at.
2598   ///
2599   /// Note that "alternate debug info sections" is a GNU extension as
2600   /// of DWARF4 and is described at
2601   /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
2602   ///
2603   /// @return the alternate debug info.
2604   Dwarf*
alt_dwarf() const2605   alt_dwarf() const
2606   {return alt_dwarf_;}
2607 
2608   /// Return the correct debug info, depending on the DIE source we
2609   /// are looking at.
2610   ///
2611   /// @param source the DIE source to consider.
2612   ///
2613   /// @return the right debug info, depending on @p source.
2614   Dwarf*
dwarf_per_die_source(die_source source) const2615   dwarf_per_die_source(die_source source) const
2616   {
2617     Dwarf *result = 0;
2618     switch(source)
2619       {
2620       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
2621       case TYPE_UNIT_DIE_SOURCE:
2622 	result = dwarf();
2623 	break;
2624       case ALT_DEBUG_INFO_DIE_SOURCE:
2625 	result = alt_dwarf();
2626 	break;
2627       case NO_DEBUG_INFO_DIE_SOURCE:
2628       case NUMBER_OF_DIE_SOURCES:
2629 	ABG_ASSERT_NOT_REACHED;
2630       }
2631     return result;
2632   }
2633 
2634   /// Return the path to the alternate debug info as contained in the
2635   /// .gnu_debugaltlink section of the main elf file.
2636   ///
2637   /// Note that "alternate debug info sections" is a GNU extension as
2638   /// of DWARF4 and is described at
2639   /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
2640   ///
2641   /// @return the path to the alternate debug info file, or an empty
2642   /// path if no alternate debug info file is associated.
2643   const string&
alt_debug_info_path() const2644   alt_debug_info_path() const
2645   {return alt_debug_info_path_;}
2646 
2647   /// Return the path to the ELF path we are reading.
2648   ///
2649   /// @return the elf path.
2650   const string&
elf_path() const2651   elf_path() const
2652   {return elf_path_;}
2653 
2654   const Dwarf_Die*
cur_tu_die() const2655   cur_tu_die() const
2656   {return cur_tu_die_;}
2657 
2658   void
cur_tu_die(Dwarf_Die * cur_tu_die)2659   cur_tu_die(Dwarf_Die* cur_tu_die)
2660   {cur_tu_die_ = cur_tu_die;}
2661 
2662   dwarf_expr_eval_context&
dwarf_expr_eval_ctxt() const2663   dwarf_expr_eval_ctxt() const
2664   {return dwarf_expr_eval_context_;}
2665 
2666   /// Getter of the maps set that associates a representation of a
2667   /// decl DIE to a vector of offsets of DIEs having that representation.
2668   ///
2669   /// @return the maps set that associates a representation of a decl
2670   /// DIE to a vector of offsets of DIEs having that representation.
2671   const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
decl_die_repr_die_offsets_maps() const2672   decl_die_repr_die_offsets_maps() const
2673   {return decl_die_repr_die_offsets_maps_;}
2674 
2675   /// Getter of the maps set that associates a representation of a
2676   /// decl DIE to a vector of offsets of DIEs having that representation.
2677   ///
2678   /// @return the maps set that associates a representation of a decl
2679   /// DIE to a vector of offsets of DIEs having that representation.
2680   die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
decl_die_repr_die_offsets_maps()2681   decl_die_repr_die_offsets_maps()
2682   {return decl_die_repr_die_offsets_maps_;}
2683 
2684   /// Getter of the maps set that associate a representation of a type
2685   /// DIE to a vector of offsets of DIEs having that representation.
2686   ///
2687   /// @return the maps set that associate a representation of a type
2688   /// DIE to a vector of offsets of DIEs having that representation.
2689   const die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
type_die_repr_die_offsets_maps() const2690   type_die_repr_die_offsets_maps() const
2691   {return type_die_repr_die_offsets_maps_;}
2692 
2693   /// Getter of the maps set that associate a representation of a type
2694   /// DIE to a vector of offsets of DIEs having that representation.
2695   ///
2696   /// @return the maps set that associate a representation of a type
2697   /// DIE to a vector of offsets of DIEs having that representation.
2698   die_source_dependant_container_set<istring_dwarf_offsets_map_type>&
type_die_repr_die_offsets_maps()2699   type_die_repr_die_offsets_maps()
2700   {return type_die_repr_die_offsets_maps_;}
2701 
2702 
2703   /// Compute the offset of the canonical DIE of a given DIE.
2704   ///
2705   /// @param die the DIE to consider.
2706   ///
2707   /// @param canonical_die_offset out parameter.  This is set to the
2708   /// resulting canonical DIE that was computed.
2709   ///
2710   /// @param die_as_type if yes, it means @p die has to be considered
2711   /// as a type.
2712   void
compute_canonical_die_offset(const Dwarf_Die * die,Dwarf_Off & canonical_die_offset,bool die_as_type) const2713   compute_canonical_die_offset(const Dwarf_Die *die,
2714 			       Dwarf_Off &canonical_die_offset,
2715 			       bool die_as_type) const
2716   {
2717     offset_offset_map_type &canonical_dies =
2718       die_as_type
2719       ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2720       get_container(*this, die)
2721       : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2722       get_container(*this, die);
2723 
2724     Dwarf_Die canonical_die;
2725     compute_canonical_die(die, canonical_dies, canonical_die, die_as_type);
2726 
2727     canonical_die_offset = dwarf_dieoffset(&canonical_die);
2728   }
2729 
2730   /// Compute (find) the canonical DIE of a given DIE.
2731   ///
2732   /// @param die the DIE to consider.
2733   ///
2734   /// @param canonical_dies the vector in which the canonical dies ar
2735   /// stored.  The index of each element is the offset of the DIE we
2736   /// want the canonical DIE for.  And the value of the element at
2737   /// that index is the canonical DIE offset we are looking for.
2738   ///
2739   /// @param canonical_die_offset out parameter.  This is set to the
2740   /// resulting canonical DIE that was computed.
2741   ///
2742   /// @param die_as_type if yes, it means @p die has to be considered
2743   /// as a type.
2744   void
compute_canonical_die(const Dwarf_Die * die,offset_offset_map_type & canonical_dies,Dwarf_Die & canonical_die,bool die_as_type) const2745   compute_canonical_die(const Dwarf_Die *die,
2746 			offset_offset_map_type& canonical_dies,
2747 			Dwarf_Die &canonical_die,
2748 			bool die_as_type) const
2749   {
2750     const die_source source = get_die_source(die);
2751 
2752     Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2753 
2754     compute_canonical_die(die_offset, source,
2755 			  canonical_dies,
2756 			  canonical_die, die_as_type);
2757   }
2758 
2759   /// Compute (find) the canonical DIE of a given DIE.
2760   ///
2761   /// @param die_offset the offset of the DIE to consider.
2762   ///
2763   /// @param source the source of the DIE to consider.
2764   ///
2765   /// @param canonical_dies the vector in which the canonical dies ar
2766   /// stored.  The index of each element is the offset of the DIE we
2767   /// want the canonical DIE for.  And the value of the element at
2768   /// that index is the canonical DIE offset we are looking for.
2769   ///
2770   /// @param canonical_die_offset out parameter.  This is set to the
2771   /// resulting canonical DIE that was computed.
2772   ///
2773   /// @param die_as_type if yes, it means @p die has to be considered
2774   /// as a type.
2775   void
compute_canonical_die(Dwarf_Off die_offset,die_source source,offset_offset_map_type & canonical_dies,Dwarf_Die & canonical_die,bool die_as_type) const2776   compute_canonical_die(Dwarf_Off die_offset,
2777 			die_source source,
2778 			offset_offset_map_type& canonical_dies,
2779 			Dwarf_Die &canonical_die,
2780 			bool die_as_type) const
2781   {
2782     // The map that associates the string representation of 'die'
2783     // with a vector of offsets of potentially equivalent DIEs.
2784     istring_dwarf_offsets_map_type& map =
2785       die_as_type
2786       ? (const_cast<read_context*>(this)->
2787 	 type_die_repr_die_offsets_maps().get_container(source))
2788       : (const_cast<read_context*>(this)->
2789 	 decl_die_repr_die_offsets_maps().get_container(source));
2790 
2791     Dwarf_Die die;
2792     ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
2793 
2794     // The variable repr is the the string representation of 'die'.
2795     //
2796     // Even if die_as_type is true -- which means that 'die' is said
2797     // to be considered as a type -- we always consider a
2798     // DW_TAG_subprogram DIE as a decl here, as far as its string
2799     // representation is concerned.
2800     interned_string name =
2801       (die_as_type)
2802       ? get_die_pretty_type_representation(&die, /*where=*/0)
2803       : get_die_pretty_representation(&die, /*where=*/0);
2804 
2805     Dwarf_Off canonical_die_offset = 0;
2806     istring_dwarf_offsets_map_type::iterator i = map.find(name);
2807     if (i == map.end())
2808       {
2809 	dwarf_offsets_type offsets;
2810 	offsets.push_back(die_offset);
2811 	map[name] = offsets;
2812 	set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2813 	get_die_from_offset(source, die_offset, &canonical_die);
2814 	return;
2815       }
2816 
2817     if (odr_is_relevant(&die))
2818       {
2819 	// ODR is relevant for this DIE.  In this case, all types with
2820 	// the same name are considered equivalent.  So the array
2821 	// i->second shoud only have on element.  If not, then
2822 	// the DIEs referenced in the array should all compare equal.
2823 	// Otherwise, this is an ODR violation.  In any case, return
2824 	// the first element of the array.
2825 	// ABG_ASSERT(i->second.size() == 1);
2826 	canonical_die_offset = i->second.front();
2827 	get_die_from_offset(source, canonical_die_offset, &canonical_die);
2828 	set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2829 	return;
2830       }
2831 
2832     Dwarf_Off cur_die_offset;
2833     Dwarf_Die potential_canonical_die;
2834     for (dwarf_offsets_type::const_iterator o = i->second.begin();
2835 	 o != i->second.end();
2836 	 ++o)
2837       {
2838 	cur_die_offset = *o;
2839 	get_die_from_offset(source, cur_die_offset, &potential_canonical_die);
2840 	if (compare_dies(*this, &die, &potential_canonical_die,
2841 			 /*update_canonical_dies_on_the_fly=*/false))
2842 	  {
2843 	    canonical_die_offset = cur_die_offset;
2844 	    set_canonical_die_offset(canonical_dies, die_offset,
2845 				     canonical_die_offset);
2846 	    get_die_from_offset(source, canonical_die_offset, &canonical_die);
2847 	    return;
2848 	  }
2849       }
2850 
2851     canonical_die_offset = die_offset;
2852     i->second.push_back(die_offset);
2853     set_canonical_die_offset(canonical_dies, die_offset, die_offset);
2854     get_die_from_offset(source, canonical_die_offset, &canonical_die);
2855   }
2856 
2857   /// Getter of the canonical DIE of a given DIE.
2858   ///
2859   /// @param die the DIE to consider.
2860   ///
2861   /// @param canonical_die output parameter.  Is set to the resuling
2862   /// canonical die, if this function returns true.
2863   ///
2864   /// @param where the offset of the logical DIE we are supposed to be
2865   /// calling this function from.  If set to zero this means this is
2866   /// to be ignored.
2867   ///
2868   /// @param die_as_type if set to yes, it means @p die is to be
2869   /// considered as a type DIE.
2870   ///
2871   /// @return true iff a canonical DIE was found for @p die.
2872   bool
get_canonical_die(const Dwarf_Die * die,Dwarf_Die & canonical_die,size_t where,bool die_as_type) const2873   get_canonical_die(const Dwarf_Die *die,
2874 		    Dwarf_Die &canonical_die,
2875 		    size_t where,
2876 		    bool die_as_type) const
2877   {
2878     const die_source source = get_die_source(die);
2879 
2880     offset_offset_map_type &canonical_dies =
2881       die_as_type
2882       ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2883       get_container(source)
2884       : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2885       get_container(source);
2886 
2887     Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2888     if (Dwarf_Off canonical_die_offset =
2889 	get_canonical_die_offset(canonical_dies, die_offset))
2890       {
2891 	get_die_from_offset(source, canonical_die_offset, &canonical_die);
2892 	return true;
2893       }
2894 
2895     // The map that associates the string representation of 'die'
2896     // with a vector of offsets of potentially equivalent DIEs.
2897     istring_dwarf_offsets_map_type& map =
2898       die_as_type
2899       ? (const_cast<read_context*>(this)->
2900 	 type_die_repr_die_offsets_maps().get_container(*this, die))
2901       : (const_cast<read_context*>(this)->
2902 	 decl_die_repr_die_offsets_maps().get_container(*this, die));
2903 
2904     // The variable repr is the the string representation of 'die'.
2905     //
2906     // Even if die_as_type is true -- which means that 'die' is said
2907     // to be considered as a type -- we always consider a
2908     // DW_TAG_subprogram DIE as a decl here, as far as its string
2909     // representation is concerned.
2910     interned_string name =
2911       (die_as_type /*&& dwarf_tag(die) != DW_TAG_subprogram*/)
2912       ? get_die_pretty_type_representation(die, where)
2913       : get_die_pretty_representation(die, where);
2914 
2915     istring_dwarf_offsets_map_type::iterator i = map.find(name);
2916     if (i == map.end())
2917       return false;
2918 
2919     if (odr_is_relevant(die))
2920       {
2921 	// ODR is relevant for this DIE.  In this case, all types with
2922 	// the same name are considered equivalent.  So the array
2923 	// i->second shoud only have on element.  If not, then
2924 	// the DIEs referenced in the array should all compare equal.
2925 	// Otherwise, this is an ODR violation.  In any case, return
2926 	// the first element of the array.
2927 	// ABG_ASSERT(i->second.size() == 1);
2928 	Dwarf_Off canonical_die_offset = i->second.front();
2929 	get_die_from_offset(source, canonical_die_offset, &canonical_die);
2930 	set_canonical_die_offset(canonical_dies,
2931 				 die_offset,
2932 				 canonical_die_offset);
2933 	return true;
2934       }
2935 
2936     Dwarf_Off cur_die_offset;
2937     for (dwarf_offsets_type::const_iterator o = i->second.begin();
2938 	 o != i->second.end();
2939 	 ++o)
2940       {
2941 	cur_die_offset = *o;
2942 	get_die_from_offset(source, cur_die_offset, &canonical_die);
2943 	// compare die and canonical_die.
2944 	if (compare_dies(*this, die, &canonical_die,
2945 			 /*update_canonical_dies_on_the_fly=*/true))
2946 	  {
2947 	    set_canonical_die_offset(canonical_dies,
2948 				     die_offset,
2949 				     cur_die_offset);
2950 	    return true;
2951 	  }
2952       }
2953 
2954     return false;
2955   }
2956 
2957   /// Retrieve the canonical DIE of a given DIE.
2958   ///
2959   /// The canonical DIE is a DIE that is structurally equivalent to
2960   /// this one.
2961   ///
2962   /// Note that this function caches the canonical DIE that was
2963   /// computed.  Subsequent invocations of this function on the same
2964   /// DIE return the same cached DIE.
2965   ///
2966   /// @param die the DIE to get a canonical type for.
2967   ///
2968   /// @param canonical_die the resulting canonical DIE.
2969   ///
2970   /// @param where the offset of the logical DIE we are supposed to be
2971   /// calling this function from.  If set to zero this means this is
2972   /// to be ignored.
2973   ///
2974   /// @param die_as_type if true, consider DIE is a type.
2975   ///
2976   /// @return true if an *existing* canonical DIE was found.
2977   /// Otherwise, @p die is considered as being a canonical DIE for
2978   /// itself. @p canonical_die is thus set to the canonical die in
2979   /// either cases.
2980   bool
get_or_compute_canonical_die(const Dwarf_Die * die,Dwarf_Die & canonical_die,size_t where,bool die_as_type) const2981   get_or_compute_canonical_die(const Dwarf_Die* die,
2982 			       Dwarf_Die& canonical_die,
2983 			       size_t where,
2984 			       bool die_as_type) const
2985   {
2986     const die_source source = get_die_source(die);
2987 
2988     offset_offset_map_type &canonical_dies =
2989       die_as_type
2990       ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
2991       get_container(source)
2992       : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
2993       get_container(source);
2994 
2995     Dwarf_Off initial_die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
2996 
2997     if (Dwarf_Off canonical_die_offset =
2998 	get_canonical_die_offset(canonical_dies,
2999 				 initial_die_offset))
3000       {
3001 	get_die_from_offset(source, canonical_die_offset, &canonical_die);
3002 	return true;
3003       }
3004 
3005     // The map that associates the string representation of 'die'
3006     // with a vector of offsets of potentially equivalent DIEs.
3007     istring_dwarf_offsets_map_type& map =
3008       die_as_type
3009       ? (const_cast<read_context*>(this)->
3010 	 type_die_repr_die_offsets_maps().get_container(*this, die))
3011       : (const_cast<read_context*>(this)->
3012 	 decl_die_repr_die_offsets_maps().get_container(*this, die));
3013 
3014     // The variable repr is the the string representation of 'die'.
3015     //
3016     // Even if die_as_type is true -- which means that 'die' is said
3017     // to be considered as a type -- we always consider a
3018     // DW_TAG_subprogram DIE as a decl here, as far as its string
3019     // representation is concerned.
3020     interned_string name =
3021       (die_as_type)
3022       ? get_die_pretty_type_representation(die, where)
3023       : get_die_pretty_representation(die, where);
3024 
3025     istring_dwarf_offsets_map_type::iterator i = map.find(name);
3026     if (i == map.end())
3027       {
3028 	dwarf_offsets_type offsets;
3029 	offsets.push_back(initial_die_offset);
3030 	map[name] = offsets;
3031 	get_die_from_offset(source, initial_die_offset, &canonical_die);
3032 	set_canonical_die_offset(canonical_dies,
3033 				 initial_die_offset,
3034 				 initial_die_offset);
3035 	return false;
3036       }
3037 
3038     if (odr_is_relevant(die))
3039       {
3040 	// ODR is relevant for this DIE.  In this case, all types with
3041 	// the same name are considered equivalent.  So the array
3042 	// i->second shoud only have on element.  If not, then
3043 	// the DIEs referenced in the array should all compare equal.
3044 	// Otherwise, this is an ODR violation.  In any case, return
3045 	// the first element of the array.
3046 	// ABG_ASSERT(i->second.size() == 1);
3047 	Dwarf_Off die_offset = i->second.front();
3048 	get_die_from_offset(source, die_offset, &canonical_die);
3049 	set_canonical_die_offset(canonical_dies,
3050 				 initial_die_offset,
3051 				 die_offset);
3052 	return true;
3053       }
3054 
3055     // walk i->second without any iterator (using a while loop rather
3056     // than a for loop) because compare_dies might add new content to
3057     // the end of the i->second vector during the walking.
3058     dwarf_offsets_type::size_type n = 0, s = i->second.size();
3059     while (n < s)
3060       {
3061 	Dwarf_Off die_offset = i->second[n];
3062 	get_die_from_offset(source, die_offset, &canonical_die);
3063 	// compare die and canonical_die.
3064 	if (compare_dies(*this, die, &canonical_die,
3065 			 /*update_canonical_dies_on_the_fly=*/true))
3066 	  {
3067 	    set_canonical_die_offset(canonical_dies,
3068 				     initial_die_offset,
3069 				     die_offset);
3070 	    return true;
3071 	  }
3072 	++n;
3073       }
3074 
3075     // We didn't find a canonical DIE for 'die'.  So let's consider
3076     // that it is its own canonical DIE.
3077     get_die_from_offset(source, initial_die_offset, &canonical_die);
3078     i->second.push_back(initial_die_offset);
3079     set_canonical_die_offset(canonical_dies,
3080 			     initial_die_offset,
3081 			     initial_die_offset);
3082 
3083     return false;
3084   }
3085 
3086   /// Get the source of the DIE.
3087   ///
3088   /// The function returns an enumerator value saying if the DIE comes
3089   /// from the .debug_info section of the primary debug info file, the
3090   /// .debug_info section of the alternate debug info file, or the
3091   /// .debug_types section.
3092   ///
3093   /// @param die the DIE to get the source of.
3094   ///
3095   /// @return the source of the DIE if it could be determined,
3096   /// NO_DEBUG_INFO_DIE_SOURCE otherwise.
3097   die_source
get_die_source(const Dwarf_Die * die) const3098   get_die_source(const Dwarf_Die *die) const
3099   {
3100     die_source source = NO_DEBUG_INFO_DIE_SOURCE;
3101     ABG_ASSERT(die);
3102     ABG_ASSERT(get_die_source(*die, source));
3103     return source;
3104   }
3105 
3106   /// Get the source of the DIE.
3107   ///
3108   /// The function returns an enumerator value saying if the DIE comes
3109   /// from the .debug_info section of the primary debug info file, the
3110   /// .debug_info section of the alternate debug info file, or the
3111   /// .debug_types section.
3112   ///
3113   /// @param die the DIE to get the source of.
3114   ///
3115   /// @param source out parameter.  The function sets this parameter
3116   /// to the source of the DIE @p iff it returns true.
3117   ///
3118   /// @return true iff the source of the DIE could be determined and
3119   /// returned.
3120   bool
get_die_source(const Dwarf_Die & die,die_source & source) const3121   get_die_source(const Dwarf_Die &die, die_source &source) const
3122   {
3123     Dwarf_Die cu_die;
3124     Dwarf_Die cu_kind;
3125     uint8_t address_size = 0, offset_size = 0;
3126     if (!dwarf_diecu(const_cast<Dwarf_Die*>(&die),
3127 		     &cu_die, &address_size,
3128 		     &offset_size))
3129       return false;
3130 
3131     Dwarf_Half version = 0;
3132     Dwarf_Off abbrev_offset = 0;
3133     uint64_t type_signature = 0;
3134     Dwarf_Off type_offset = 0;
3135     if (!dwarf_cu_die(cu_die.cu, &cu_kind,
3136 		      &version, &abbrev_offset,
3137 		      &address_size, &offset_size,
3138 		      &type_signature, &type_offset))
3139       return false;
3140 
3141     int tag = dwarf_tag(&cu_kind);
3142 
3143     if (tag == DW_TAG_compile_unit
3144 	|| tag == DW_TAG_partial_unit)
3145       {
3146 	Dwarf *die_dwarf = dwarf_cu_getdwarf(cu_die.cu);
3147 	if (dwarf() == die_dwarf)
3148 	  source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
3149 	else if (alt_dwarf() == die_dwarf)
3150 	  source = ALT_DEBUG_INFO_DIE_SOURCE;
3151 	else
3152 	  ABG_ASSERT_NOT_REACHED;
3153       }
3154     else if (tag == DW_TAG_type_unit)
3155       source = TYPE_UNIT_DIE_SOURCE;
3156     else
3157       return false;
3158 
3159     return true;
3160   }
3161 
3162   /// Getter for the DIE designated by an offset.
3163   ///
3164   /// @param source the source of the DIE to get.
3165   ///
3166   /// @param offset the offset of the DIE to get.
3167   ///
3168   /// @param die the resulting DIE.  The pointer has to point to an
3169   /// allocated memory region.
3170   void
get_die_from_offset(die_source source,Dwarf_Off offset,Dwarf_Die * die) const3171   get_die_from_offset(die_source source, Dwarf_Off offset, Dwarf_Die *die) const
3172   {
3173     if (source == TYPE_UNIT_DIE_SOURCE)
3174       ABG_ASSERT(dwarf_offdie_types(dwarf_per_die_source(source), offset, die));
3175     else
3176       ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), offset, die));
3177   }
3178 
3179 public:
3180 
3181   /// Add an entry to the relevant die->decl map.
3182   ///
3183   /// @param die the DIE to add the the map.
3184   ///
3185   /// @param decl the decl to consider.
3186   ///
3187   /// @param where_offset where in the DIE stream we logically are.
3188   ///
3189   /// @param do_associate_by_repr if true then this function
3190   /// associates the representation string of @p die with the
3191   /// declaration @p decl, in a corpus-wide manner.  That is, in the
3192   /// entire current corpus, there is going to be just one declaration
3193   /// associated with a DIE of the string representation of @p die.
3194   ///
3195   /// @param do_associate_by_repr_per_tu if true, then this function
3196   /// associates the representation string of @p die with the
3197   /// declaration @p decl in a translation unit wide manner.  That is,
3198   /// in the entire current translation unit, there is going to be
3199   /// just one declaration associated with a DIE of the string
3200   /// representation of @p die.
3201   void
associate_die_to_decl(Dwarf_Die * die,decl_base_sptr decl,size_t where_offset,bool do_associate_by_repr=false)3202   associate_die_to_decl(Dwarf_Die* die,
3203 			decl_base_sptr decl,
3204 			size_t where_offset,
3205 			bool do_associate_by_repr = false)
3206   {
3207     const die_source source = get_die_source(die);
3208 
3209     die_artefact_map_type& m =
3210       decl_die_artefact_maps().get_container(source);
3211 
3212     size_t die_offset;
3213     if (do_associate_by_repr)
3214       {
3215 	Dwarf_Die equiv_die;
3216 	get_or_compute_canonical_die(die, equiv_die, where_offset,
3217 				     /*die_as_type=*/false);
3218 	die_offset = dwarf_dieoffset(&equiv_die);
3219       }
3220     else
3221       die_offset = dwarf_dieoffset(die);
3222 
3223     m[die_offset] = decl;
3224   }
3225 
3226   /// Lookup the decl for a given DIE.
3227   ///
3228   /// The returned decl is either the decl of the DIE that as the
3229   /// exact offset @p die_offset
3230   /// die_offset, or
3231   /// give
3232   ///
3233   /// @param die_offset the offset of the DIE to consider.
3234   ///
3235   /// @param source where the DIE represented by @p die_offset comes
3236   /// from.
3237   ///
3238   /// Note that "alternate debug info sections" is a GNU extension as
3239   /// of DWARF4 and is described at
3240   /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1
3241   ///
3242   /// @return the resulting decl, or null if no decl is associated to
3243   /// the DIE represented by @p die_offset.
3244   decl_base_sptr
lookup_decl_from_die_offset(Dwarf_Off die_offset,die_source source)3245   lookup_decl_from_die_offset(Dwarf_Off die_offset, die_source source)
3246   {
3247     decl_base_sptr result =
3248       is_decl(lookup_artifact_from_die_offset(die_offset, source,
3249 					      /*die_as_type=*/false));
3250 
3251     return result;
3252   }
3253 
3254   /// Get the qualified name of a given DIE.
3255   ///
3256   /// If the name of the DIE was already computed before just return
3257   /// that name from a cache.  Otherwise, build the name, cache it and
3258   /// return it.
3259   ///
3260   /// @param die the DIE to consider.
3261   ///
3262   /// @param where_offset where in the DIE stream we logically are.
3263   ///
3264   /// @return the interned string representing the qualified name of
3265   /// @p die.
3266   interned_string
get_die_qualified_name(Dwarf_Die * die,size_t where_offset)3267   get_die_qualified_name(Dwarf_Die *die, size_t where_offset)
3268   {
3269     ABG_ASSERT(die);
3270     die_istring_map_type& map =
3271       die_qualified_name_maps_.get_container(*this, die);
3272 
3273     size_t die_offset = dwarf_dieoffset(die);
3274     die_istring_map_type::const_iterator i = map.find(die_offset);
3275 
3276     if (i == map.end())
3277       {
3278 	read_context& ctxt  = *const_cast<read_context*>(this);
3279 	string qualified_name = die_qualified_name(ctxt, die, where_offset);
3280 	interned_string istr = env()->intern(qualified_name);
3281 	map[die_offset] = istr;
3282 	return istr;
3283       }
3284 
3285     return i->second;
3286   }
3287 
3288   /// Get the qualified name of a given DIE.
3289   ///
3290   /// If the name of the DIE was already computed before just return
3291   /// that name from a cache.  Otherwise, build the name, cache it and
3292   /// return it.
3293   ///
3294   /// @param die the DIE to consider.
3295   ///
3296   /// @param where_offset where in the DIE stream we logically are.
3297   ///
3298   /// @return the interned string representing the qualified name of
3299   /// @p die.
3300   interned_string
get_die_qualified_name(Dwarf_Die * die,size_t where_offset) const3301   get_die_qualified_name(Dwarf_Die *die, size_t where_offset) const
3302   {
3303     return const_cast<read_context*>(this)->
3304       get_die_qualified_name(die, where_offset);
3305   }
3306 
3307   /// Get the qualified name of a given DIE which is considered to be
3308   /// the DIE for a type.
3309   ///
3310   /// For instance, for a DW_TAG_subprogram DIE, this function
3311   /// computes the name of the function *type* that corresponds to the
3312   /// function.
3313   ///
3314   /// If the name of the DIE was already computed before just return
3315   /// that name from a cache.  Otherwise, build the name, cache it and
3316   /// return it.
3317   ///
3318   /// @param die the DIE to consider.
3319   ///
3320   /// @param where_offset where in the DIE stream we logically are.
3321   ///
3322   /// @return the interned string representing the qualified name of
3323   /// @p die.
3324   interned_string
get_die_qualified_type_name(const Dwarf_Die * die,size_t where_offset) const3325   get_die_qualified_type_name(const Dwarf_Die *die, size_t where_offset) const
3326   {
3327     ABG_ASSERT(die);
3328 
3329     // The name of the translation unit die is "".
3330     if (die == cur_tu_die())
3331       return env()->intern("");
3332 
3333     die_istring_map_type& map =
3334       die_qualified_name_maps_.get_container(*const_cast<read_context*>(this),
3335 					     die);
3336 
3337     size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3338     die_istring_map_type::const_iterator i =
3339       map.find(die_offset);
3340 
3341     if (i == map.end())
3342       {
3343 	read_context& ctxt  = *const_cast<read_context*>(this);
3344 	string qualified_name;
3345 	int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
3346 	if ((tag == DW_TAG_structure_type
3347 	     || tag == DW_TAG_class_type
3348 	     || tag == DW_TAG_union_type)
3349 	    && die_is_anonymous(die))
3350 	  {
3351 	    location l = die_location(*this, die);
3352 	    qualified_name = l ? l.expand() : "noloc";
3353 	    qualified_name = "unnamed-at-" + qualified_name;
3354 	  }
3355 	else
3356 	  qualified_name =
3357 	    die_qualified_type_name(ctxt, die, where_offset);
3358 
3359 	interned_string istr = env()->intern(qualified_name);
3360 	map[die_offset] = istr;
3361 	return istr;
3362       }
3363 
3364     return i->second;
3365   }
3366 
3367   /// Get the pretty representation of a DIE that represents a type.
3368   ///
3369   /// For instance, for the DW_TAG_subprogram, this function computes
3370   /// the pretty representation of the type of the function, not the
3371   /// pretty representation of the function declaration.
3372   ///
3373   /// Once the pretty representation is computed, it's stored in a
3374   /// cache.  Subsequent invocations of this function on the same DIE
3375   /// will yield the cached name.
3376   ///
3377   /// @param die the DIE to consider.
3378   ///
3379   /// @param where_offset where in the DIE stream we logically are.
3380   ///
3381   /// @return the interned_string that represents the pretty
3382   /// representation.
3383   interned_string
get_die_pretty_type_representation(const Dwarf_Die * die,size_t where_offset) const3384   get_die_pretty_type_representation(const Dwarf_Die *die,
3385 				     size_t where_offset) const
3386   {
3387     ABG_ASSERT(die);
3388     die_istring_map_type& map =
3389       die_pretty_type_repr_maps_.get_container(*const_cast<read_context*>(this),
3390 					       die);
3391 
3392     size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3393     die_istring_map_type::const_iterator i = map.find(die_offset);
3394 
3395     if (i == map.end())
3396       {
3397 	read_context& ctxt = *const_cast<read_context*>(this);
3398 	string pretty_representation =
3399 	  die_pretty_print_type(ctxt, die, where_offset);
3400 	interned_string istr = env()->intern(pretty_representation);
3401 	map[die_offset] = istr;
3402 	return istr;
3403       }
3404 
3405     return i->second;
3406   }
3407 
3408   /// Get the pretty representation of a DIE.
3409   ///
3410   /// Once the pretty representation is computed, it's stored in a
3411   /// cache.  Subsequent invocations of this function on the same DIE
3412   /// will yield the cached name.
3413   ///
3414   /// @param die the DIE to consider.
3415   ///
3416   /// @param where_offset where in the DIE stream we logically are.
3417   ///
3418   /// @return the interned_string that represents the pretty
3419   /// representation.
3420   interned_string
get_die_pretty_representation(const Dwarf_Die * die,size_t where_offset) const3421   get_die_pretty_representation(const Dwarf_Die *die, size_t where_offset) const
3422   {
3423     ABG_ASSERT(die);
3424 
3425     die_istring_map_type& map =
3426       die_pretty_repr_maps_.get_container(*const_cast<read_context*>(this),
3427 					  die);
3428 
3429     size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3430     die_istring_map_type::const_iterator i = map.find(die_offset);
3431 
3432     if (i == map.end())
3433       {
3434 	read_context& ctxt = *const_cast<read_context*>(this);
3435 	string pretty_representation =
3436 	  die_pretty_print(ctxt, die, where_offset);
3437 	interned_string istr = env()->intern(pretty_representation);
3438 	map[die_offset] = istr;
3439 	return istr;
3440       }
3441 
3442     return i->second;
3443   }
3444 
3445   /// Lookup the artifact that was built to represent a type that has
3446   /// the same pretty representation as the type denoted by a given
3447   /// DIE.
3448   ///
3449   /// Note that the DIE must have previously been associated with the
3450   /// artifact using the functions associate_die_to_decl or
3451   /// associate_die_to_type.
3452   ///
3453   /// Also, note that the scope of the lookup is the current ABI
3454   /// corpus.
3455   ///
3456   /// @param die the DIE to consider.
3457   ///
3458   /// @param where_offset where in the DIE stream we logically are.
3459   ///
3460   /// @return the type artifact found.
3461   type_or_decl_base_sptr
lookup_type_artifact_from_die(Dwarf_Die * die) const3462   lookup_type_artifact_from_die(Dwarf_Die *die) const
3463   {
3464     type_or_decl_base_sptr artifact =
3465       lookup_artifact_from_die(die, /*type_as_die=*/true);
3466     if (function_decl_sptr fn = is_function_decl(artifact))
3467       return fn->get_type();
3468     return artifact;
3469   }
3470 
3471   /// Lookup the artifact that was built to represent a type or a
3472   /// declaration that has the same pretty representation as the type
3473   /// denoted by a given DIE.
3474   ///
3475   /// Note that the DIE must have previously been associated with the
3476   /// artifact using the functions associate_die_to_decl or
3477   /// associate_die_to_type.
3478   ///
3479   /// Also, note that the scope of the lookup is the current ABI
3480   /// corpus.
3481   ///
3482   /// @param die the DIE to consider.
3483   ///
3484   /// @param where_offset where in the DIE stream we logically are.
3485   ///
3486   /// @param die_as_type if true, it means the DIE is to be considered
3487   /// as a type.
3488   ///
3489   /// @return the artifact found.
3490   type_or_decl_base_sptr
lookup_artifact_from_die(const Dwarf_Die * die,bool die_as_type=false) const3491   lookup_artifact_from_die(const Dwarf_Die *die, bool die_as_type = false) const
3492   {
3493     Dwarf_Die equiv_die;
3494     if (!get_or_compute_canonical_die(die, equiv_die, /*where=*/0, die_as_type))
3495       return type_or_decl_base_sptr();
3496 
3497     const die_artefact_map_type& m =
3498       die_as_type
3499       ? type_die_artefact_maps().get_container(*this, &equiv_die)
3500       : decl_die_artefact_maps().get_container(*this, &equiv_die);
3501 
3502     size_t die_offset = dwarf_dieoffset(&equiv_die);
3503     die_artefact_map_type::const_iterator i = m.find(die_offset);
3504 
3505     if (i == m.end())
3506       return type_or_decl_base_sptr();
3507     return i->second;
3508   }
3509 
3510   /// Lookup the artifact that was built to represent a type or a
3511   /// declaration that has the same pretty representation as the type
3512   /// denoted by the offset of a given DIE.
3513   ///
3514   /// Note that the DIE must have previously been associated with the
3515   /// artifact using either associate_die_to_decl or
3516   /// associate_die_to_type.
3517   ///
3518   /// Also, note that the scope of the lookup is the current ABI
3519   /// corpus.
3520   ///
3521   /// @param die the DIE to consider.
3522   ///
3523   /// @param where_offset where in the DIE stream we logically are.
3524   ///
3525   /// @param die_as_type if true, it means the DIE is to be considered
3526   /// as a type.
3527   ///
3528   /// @return the artifact found.
3529   type_or_decl_base_sptr
lookup_artifact_from_die_offset(Dwarf_Off die_offset,die_source source,bool die_as_type=false) const3530   lookup_artifact_from_die_offset(Dwarf_Off die_offset,
3531 				  die_source source,
3532 				  bool die_as_type = false) const
3533   {
3534     const die_artefact_map_type& m =
3535       die_as_type
3536       ? type_die_artefact_maps().get_container(source)
3537       : decl_die_artefact_maps().get_container(source);
3538 
3539     die_artefact_map_type::const_iterator i = m.find(die_offset);
3540     if (i == m.end())
3541       return type_or_decl_base_sptr();
3542     return i->second;
3543   }
3544 
3545   /// Get the language used to generate a given DIE.
3546   ///
3547   /// @param die the DIE to consider.
3548   ///
3549   /// @param lang the resulting language.
3550   ///
3551   /// @return true iff the language of the DIE was found.
3552   bool
get_die_language(const Dwarf_Die * die,translation_unit::language & lang) const3553   get_die_language(const Dwarf_Die *die, translation_unit::language &lang) const
3554   {
3555     Dwarf_Die cu_die;
3556     ABG_ASSERT(dwarf_diecu(const_cast<Dwarf_Die*>(die), &cu_die, 0, 0));
3557 
3558     uint64_t l = 0;
3559     if (!die_unsigned_constant_attribute(&cu_die, DW_AT_language, l))
3560       return false;
3561 
3562     lang = dwarf_language_to_tu_language(l);
3563     return true;
3564   }
3565 
3566   /// Test if a given DIE originates from a program written in the C
3567   /// language.
3568   ///
3569   /// @param die the DIE to consider.
3570   ///
3571   /// @return true iff @p die originates from a program in the C
3572   /// language.
3573   bool
die_is_in_c(const Dwarf_Die * die) const3574   die_is_in_c(const Dwarf_Die *die) const
3575   {
3576     translation_unit::language l = translation_unit::LANG_UNKNOWN;
3577     if (!get_die_language(die, l))
3578       return false;
3579     return is_c_language(l);
3580   }
3581 
3582   /// Test if a given DIE originates from a program written in the C++
3583   /// language.
3584   ///
3585   /// @param die the DIE to consider.
3586   ///
3587   /// @return true iff @p die originates from a program in the C++
3588   /// language.
3589   bool
die_is_in_cplus_plus(const Dwarf_Die * die) const3590   die_is_in_cplus_plus(const Dwarf_Die *die) const
3591   {
3592     translation_unit::language l = translation_unit::LANG_UNKNOWN;
3593     if (!get_die_language(die, l))
3594       return false;
3595     return is_cplus_plus_language(l);
3596   }
3597 
3598   /// Test if a given DIE originates from a program written either in
3599   /// C or C++.
3600   ///
3601   /// @param die the DIE to consider.
3602   ///
3603   /// @return true iff @p die originates from a program written either in
3604   /// C or C++.
3605   bool
die_is_in_c_or_cplusplus(const Dwarf_Die * die) const3606   die_is_in_c_or_cplusplus(const Dwarf_Die *die) const
3607   {
3608     translation_unit::language l = translation_unit::LANG_UNKNOWN;
3609     if (!get_die_language(die, l))
3610       return false;
3611     return (is_cplus_plus_language(l) || is_c_language(l));
3612   }
3613 
3614   /// Check if we can assume the One Definition Rule[1] to be relevant
3615   /// for the current translation unit.
3616   ///
3617   /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
3618   ///
3619   /// At the moment this returns true if the current translation unit
3620   /// is in C++ language.  In that case, it's relevant to assume that
3621   /// we use optimizations based on the ODR.
3622   bool
odr_is_relevant() const3623   odr_is_relevant() const
3624   {return odr_is_relevant(cur_transl_unit()->get_language());}
3625 
3626   /// Check if we can assume the One Definition Rule[1] to be relevant
3627   /// for a given language.
3628   ///
3629   /// [1]: https://en.wikipedia.org/wiki/One_Definition_Rule
3630   ///
3631   /// At the moment this returns true if the language considered
3632   /// is C++, Java or Ada.
3633   bool
odr_is_relevant(translation_unit::language l) const3634   odr_is_relevant(translation_unit::language l) const
3635   {
3636     return (is_cplus_plus_language(l)
3637 	    || is_java_language(l)
3638 	    || is_ada_language(l));
3639   }
3640 
3641   /// Check if we can assume the One Definition Rule to be relevant
3642   /// for a given DIE.
3643   ///
3644   /// @param die the DIE to consider.
3645   ///
3646   /// @return true if the ODR is relevant for @p die.
3647   bool
odr_is_relevant(Dwarf_Off die_offset,die_source source) const3648   odr_is_relevant(Dwarf_Off die_offset, die_source source) const
3649   {
3650     Dwarf_Die die;
3651     ABG_ASSERT(dwarf_offdie(dwarf_per_die_source(source), die_offset, &die));
3652     return odr_is_relevant(&die);
3653   }
3654 
3655   /// Check if we can assume the One Definition Rule to be relevant
3656   /// for a given DIE.
3657   ///
3658   /// @param die the DIE to consider.
3659   ///
3660   /// @return true if the ODR is relevant for @p die.
3661   bool
odr_is_relevant(const Dwarf_Die * die) const3662   odr_is_relevant(const Dwarf_Die *die) const
3663   {
3664     translation_unit::language lang;
3665     if (!get_die_language(die, lang))
3666       return odr_is_relevant();
3667 
3668     return odr_is_relevant(lang);
3669   }
3670 
3671   /// Getter for the maps set that associates a decl DIE offset to an
3672   /// artifact.
3673   ///
3674   /// @return the maps set that associates a decl DIE offset to an
3675   /// artifact.
3676   die_source_dependant_container_set<die_artefact_map_type>&
decl_die_artefact_maps()3677   decl_die_artefact_maps()
3678   {return decl_die_artefact_maps_;}
3679 
3680   /// Getter for the maps set that associates a decl DIE offset to an
3681   /// artifact.
3682   ///
3683   /// @return the maps set that associates a decl DIE offset to an
3684   /// artifact.
3685   const die_source_dependant_container_set<die_artefact_map_type>&
decl_die_artefact_maps() const3686   decl_die_artefact_maps() const
3687   {return decl_die_artefact_maps_;}
3688 
3689   /// Getter for the maps set that associates a type DIE offset to an
3690   /// artifact.
3691   ///
3692   /// @return the maps set that associates a type DIE offset to an
3693   /// artifact.
3694   die_source_dependant_container_set<die_artefact_map_type>&
type_die_artefact_maps()3695   type_die_artefact_maps()
3696   {return type_die_artefact_maps_;}
3697 
3698   /// Getter for the maps set that associates a type DIE offset to an
3699   /// artifact.
3700   ///
3701   /// @return the maps set that associates a type DIE offset to an
3702   /// artifact.
3703   const die_source_dependant_container_set<die_artefact_map_type>&
type_die_artefact_maps() const3704   type_die_artefact_maps() const
3705   {return type_die_artefact_maps_;}
3706 
3707   /// Getter of the maps that associates function type representations
3708   /// to function types, inside a translation unit.
3709   ///
3710   /// @return the maps that associates function type representations
3711   /// to function types, inside a translation unit.
3712   istring_fn_type_map_type&
per_tu_repr_to_fn_type_maps()3713   per_tu_repr_to_fn_type_maps()
3714   {return per_tu_repr_to_fn_type_maps_;}
3715 
3716   /// Getter of the maps that associates function type representations
3717   /// to function types, inside a translation unit.
3718   ///
3719   /// @return the maps that associates function type representations
3720   /// to function types, inside a translation unit.
3721   const istring_fn_type_map_type&
per_tu_repr_to_fn_type_maps() const3722   per_tu_repr_to_fn_type_maps() const
3723   {return per_tu_repr_to_fn_type_maps_;}
3724 
3725   /// Associate the representation of a function type DIE to a given
3726   /// function type, inside the current translation unit.
3727   ///
3728   /// @param die the DIE to associate to the function type, using its
3729   /// representation.
3730   ///
3731   /// @param fn_type the function type to associate to @p die.
3732   void
associate_die_repr_to_fn_type_per_tu(const Dwarf_Die * die,const function_type_sptr & fn_type)3733   associate_die_repr_to_fn_type_per_tu(const Dwarf_Die *die,
3734 				       const function_type_sptr &fn_type)
3735   {
3736     if (!die_is_function_type(die))
3737       return;
3738 
3739     interned_string repr =
3740       get_die_pretty_type_representation(die, /*where=*/0);
3741     ABG_ASSERT(!repr.empty());
3742 
3743     per_tu_repr_to_fn_type_maps()[repr]= fn_type;
3744   }
3745 
3746   /// Lookup the function type associated to a given function type
3747   /// DIE, in the current translation unit.
3748   ///
3749   /// @param die the DIE of function type to consider.
3750   ///
3751   /// @return the @ref function_type_sptr associated to @p die, or nil
3752   /// of no function_type is associated to @p die.
3753   function_type_sptr
lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die * die)3754   lookup_fn_type_from_die_repr_per_tu(const Dwarf_Die *die)
3755   {
3756     if (!die_is_function_type(die))
3757       return function_type_sptr();
3758 
3759     interned_string repr =
3760       get_die_pretty_representation(die, /*where=*/0);
3761     ABG_ASSERT(!repr.empty());
3762 
3763     istring_fn_type_map_type::const_iterator i =
3764       per_tu_repr_to_fn_type_maps().find(repr);
3765 
3766     if (i == per_tu_repr_to_fn_type_maps().end())
3767       return function_type_sptr();
3768 
3769     return i->second;
3770   }
3771 
3772   /// Set the canonical DIE offset of a given DIE.
3773   ///
3774   /// @param canonical_dies the vector that holds canonical DIEs.
3775   ///
3776   /// @param die_offset the offset of the DIE to set the canonical DIE
3777   /// for.
3778   ///
3779   /// @param canonical_die_offset the canonical DIE offset to
3780   /// associate to @p die_offset.
3781   void
set_canonical_die_offset(offset_offset_map_type & canonical_dies,Dwarf_Off die_offset,Dwarf_Off canonical_die_offset) const3782   set_canonical_die_offset(offset_offset_map_type &canonical_dies,
3783 			   Dwarf_Off die_offset,
3784 			   Dwarf_Off canonical_die_offset) const
3785   {
3786     canonical_dies[die_offset] = canonical_die_offset;}
3787 
3788   /// Set the canonical DIE offset of a given DIE.
3789   ///
3790   ///
3791   /// @param die_offset the offset of the DIE to set the canonical DIE
3792   /// for.
3793   ///
3794   /// @param source the source of the DIE denoted by @p die_offset.
3795   ///
3796   /// @param canonical_die_offset the canonical DIE offset to
3797   /// associate to @p die_offset.
3798   ///
3799   /// @param die_as_type if true, it means that @p die_offset has to
3800   /// be considered as a type.
3801   void
set_canonical_die_offset(Dwarf_Off die_offset,die_source source,Dwarf_Off canonical_die_offset,bool die_as_type) const3802   set_canonical_die_offset(Dwarf_Off die_offset,
3803 			   die_source source,
3804 			   Dwarf_Off canonical_die_offset,
3805 			   bool die_as_type) const
3806   {
3807     offset_offset_map_type &canonical_dies =
3808       die_as_type
3809       ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3810       get_container(source)
3811       : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3812       get_container(source);
3813 
3814     set_canonical_die_offset(canonical_dies,
3815 			     die_offset,
3816 			     canonical_die_offset);
3817   }
3818 
3819   /// Set the canonical DIE offset of a given DIE.
3820   ///
3821   ///
3822   /// @param die the DIE to set the canonical DIE for.
3823   ///
3824   /// @param canonical_die_offset the canonical DIE offset to
3825   /// associate to @p die_offset.
3826   ///
3827   /// @param die_as_type if true, it means that @p die has to be
3828   /// considered as a type.
3829   void
set_canonical_die_offset(const Dwarf_Die * die,Dwarf_Off canonical_die_offset,bool die_as_type) const3830   set_canonical_die_offset(const Dwarf_Die *die,
3831 			   Dwarf_Off canonical_die_offset,
3832 			   bool die_as_type) const
3833   {
3834     const die_source source = get_die_source(die);
3835 
3836     Dwarf_Off die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
3837 
3838     set_canonical_die_offset(die_offset, source,
3839 			     canonical_die_offset,
3840 			     die_as_type);
3841   }
3842 
3843   /// Get the canonical DIE offset of a given DIE.
3844   ///
3845   /// @param canonical_dies the vector that contains canonical DIES.
3846   ///
3847   /// @param die_offset the offset of the DIE to consider.
3848   ///
3849   /// @return the canonical of the DIE denoted by @p die_offset, or
3850   /// zero if no canonical DIE was found.
3851   Dwarf_Off
get_canonical_die_offset(offset_offset_map_type & canonical_dies,Dwarf_Off die_offset) const3852   get_canonical_die_offset(offset_offset_map_type &canonical_dies,
3853 			   Dwarf_Off die_offset) const
3854   {
3855     offset_offset_map_type::const_iterator it = canonical_dies.find(die_offset);
3856     if (it == canonical_dies.end())
3857       return 0;
3858     return it->second;
3859   }
3860 
3861   /// Get the canonical DIE offset of a given DIE.
3862   ///
3863   /// @param die_offset the offset of the DIE to consider.
3864   ///
3865   /// @param source the source of the DIE denoted by @p die_offset.
3866   ///
3867   /// @param die_as_type if true, it means that @p is to be considered
3868   /// as a type DIE.
3869   ///
3870   /// @return the canonical of the DIE denoted by @p die_offset, or
3871   /// zero if no canonical DIE was found.
3872   Dwarf_Off
get_canonical_die_offset(Dwarf_Off die_offset,die_source source,bool die_as_type) const3873   get_canonical_die_offset(Dwarf_Off die_offset,
3874 			   die_source source,
3875 			   bool die_as_type) const
3876   {
3877     offset_offset_map_type &canonical_dies =
3878       die_as_type
3879       ? const_cast<read_context*>(this)->canonical_type_die_offsets_.
3880       get_container(source)
3881       : const_cast<read_context*>(this)->canonical_decl_die_offsets_.
3882       get_container(source);
3883 
3884     return get_canonical_die_offset(canonical_dies, die_offset);
3885   }
3886 
3887   /// Associate a DIE (representing a type) to the type that it
3888   /// represents.
3889   ///
3890   /// @param die the DIE to consider.
3891   ///
3892   /// @param type the type to associate the DIE to.
3893   ///
3894   /// @param where_offset where in the DIE stream we logically are.
3895   void
associate_die_to_type(const Dwarf_Die * die,type_base_sptr type,size_t where)3896   associate_die_to_type(const Dwarf_Die	*die,
3897 			type_base_sptr	type,
3898 			size_t		where)
3899   {
3900     if (!type)
3901       return;
3902 
3903     Dwarf_Die equiv_die;
3904     get_or_compute_canonical_die(die, equiv_die, where, /*die_as_type=*/true);
3905 
3906     die_artefact_map_type& m =
3907       type_die_artefact_maps().get_container(*this, &equiv_die);
3908 
3909     size_t die_offset = dwarf_dieoffset(&equiv_die);
3910     m[die_offset] = type;
3911   }
3912 
3913   /// Lookup the type associated to a given DIE.
3914   ///
3915   /// Note that the DIE must have been associated to type by a
3916   /// previous invocation of the function
3917   /// read_context::associate_die_to_type().
3918   ///
3919   /// @param die the DIE to consider.
3920   ///
3921   /// @return the type associated to the DIE or NULL if no type is
3922   /// associated to the DIE.
3923   type_base_sptr
lookup_type_from_die(const Dwarf_Die * die) const3924   lookup_type_from_die(const Dwarf_Die* die) const
3925   {
3926     type_or_decl_base_sptr artifact =
3927       lookup_artifact_from_die(die, /*die_as_type=*/true);
3928     if (function_decl_sptr fn = is_function_decl(artifact))
3929       return fn->get_type();
3930     return is_type(artifact);
3931   }
3932 
3933   /// Lookup the type associated to a DIE at a given offset, from a
3934   /// given source.
3935   ///
3936   /// Note that the DIE must have been associated to type by a
3937   /// previous invocation of the function
3938   /// read_context::associate_die_to_type().
3939   ///
3940   /// @param die_offset the offset of the DIE to consider.
3941   ///
3942   /// @param source the source of the DIE to consider.
3943   ///
3944   /// @return the type associated to the DIE or NULL if no type is
3945   /// associated to the DIE.
3946   type_base_sptr
lookup_type_from_die_offset(size_t die_offset,die_source source) const3947   lookup_type_from_die_offset(size_t die_offset, die_source source) const
3948   {
3949     type_base_sptr result;
3950     const die_artefact_map_type& m =
3951       type_die_artefact_maps().get_container(source);
3952     die_artefact_map_type::const_iterator i = m.find(die_offset);
3953     if (i != m.end())
3954       {
3955 	if (function_decl_sptr fn = is_function_decl(i->second))
3956 	  return fn->get_type();
3957 	result = is_type(i->second);
3958       }
3959 
3960     if (!result)
3961       {
3962 	// Maybe we are looking for a class type being constructed?
3963 	const die_class_or_union_map_type& m = die_wip_classes_map(source);
3964 	die_class_or_union_map_type::const_iterator i = m.find(die_offset);
3965 
3966 	if (i != m.end())
3967 	  result = i->second;
3968       }
3969 
3970     if (!result)
3971       {
3972 	// Maybe we are looking for a function type being constructed?
3973 	const die_function_type_map_type& m =
3974 	  die_wip_function_types_map(source);
3975 	die_function_type_map_type::const_iterator i = m.find(die_offset);
3976 
3977 	if (i != m.end())
3978 	  result = i->second;
3979       }
3980 
3981     return result;
3982   }
3983 
3984   /// Getter of a map that associates a die that represents a
3985   /// class/struct with the declaration of the class, while the class
3986   /// is being constructed.
3987   ///
3988   /// @param source where the DIE is from.
3989   ///
3990   /// @return the map that associates a DIE to the class that is being
3991   /// built.
3992   const die_class_or_union_map_type&
die_wip_classes_map(die_source source) const3993   die_wip_classes_map(die_source source) const
3994   {return const_cast<read_context*>(this)->die_wip_classes_map(source);}
3995 
3996   /// Getter of a map that associates a die that represents a
3997   /// class/struct with the declaration of the class, while the class
3998   /// is being constructed.
3999   ///
4000   /// @param source where the DIE comes from.
4001   ///
4002   /// @return the map that associates a DIE to the class that is being
4003   /// built.
4004   die_class_or_union_map_type&
die_wip_classes_map(die_source source)4005   die_wip_classes_map(die_source source)
4006   {
4007     switch (source)
4008       {
4009       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4010 	break;
4011       case ALT_DEBUG_INFO_DIE_SOURCE:
4012 	return alternate_die_wip_classes_map_;
4013       case TYPE_UNIT_DIE_SOURCE:
4014 	return type_unit_die_wip_classes_map_;
4015       case NO_DEBUG_INFO_DIE_SOURCE:
4016       case NUMBER_OF_DIE_SOURCES:
4017 	ABG_ASSERT_NOT_REACHED;
4018       }
4019     return die_wip_classes_map_;
4020   }
4021 
4022   /// Getter for a map that associates a die (that represents a
4023   /// function type) whith a function type, while the function type is
4024   /// being constructed (WIP == work in progress).
4025   ///
4026   /// @param source where the DIE comes from.n
4027   ///
4028   /// @return the map of wip function types.
4029   const die_function_type_map_type&
die_wip_function_types_map(die_source source) const4030   die_wip_function_types_map(die_source source) const
4031   {return const_cast<read_context*>(this)->die_wip_function_types_map(source);}
4032 
4033   /// Getter for a map that associates a die (that represents a
4034   /// function type) whith a function type, while the function type is
4035   /// being constructed (WIP == work in progress).
4036   ///
4037   /// @param source where DIEs of the map come from.
4038   ///
4039   /// @return the map of wip function types.
4040   die_function_type_map_type&
die_wip_function_types_map(die_source source)4041   die_wip_function_types_map(die_source source)
4042   {
4043     switch (source)
4044       {
4045       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4046 	break;
4047       case ALT_DEBUG_INFO_DIE_SOURCE:
4048 	return alternate_die_wip_function_types_map_;
4049       case TYPE_UNIT_DIE_SOURCE:
4050 	return type_unit_die_wip_function_types_map_;
4051       case NO_DEBUG_INFO_DIE_SOURCE:
4052       case NUMBER_OF_DIE_SOURCES:
4053 	ABG_ASSERT_NOT_REACHED;
4054       }
4055     return die_wip_function_types_map_;
4056   }
4057 
4058   /// Getter for a map that associates a die with a function decl
4059   /// which has a linkage name but no elf symbol yet.
4060   ///
4061   /// This is to fixup function decls with linkage names, but with no
4062   /// link to their underlying elf symbol.  There are some DIEs like
4063   /// that in DWARF sometimes, especially when the compiler optimizes
4064   /// stuff aggressively.
4065   die_function_decl_map_type&
die_function_decl_with_no_symbol_map()4066   die_function_decl_with_no_symbol_map()
4067   {return die_function_with_no_symbol_map_;}
4068 
4069   /// Return true iff a given offset is for the DIE of a class that is
4070   /// being built, but that is not fully built yet.  WIP == "work in
4071   /// progress".
4072   ///
4073   /// @param offset the DIE offset to consider.
4074   ///
4075   /// @param source where the DIE of the map come from.
4076   ///
4077   /// @return true iff @p offset is the offset of the DIE of a class
4078   /// that is being currently built.
4079   bool
is_wip_class_die_offset(Dwarf_Off offset,die_source source) const4080   is_wip_class_die_offset(Dwarf_Off offset, die_source source) const
4081   {
4082     die_class_or_union_map_type::const_iterator i =
4083       die_wip_classes_map(source).find(offset);
4084     return (i != die_wip_classes_map(source).end());
4085   }
4086 
4087   /// Return true iff a given offset is for the DIE of a function type
4088   /// that is being built at the moment, but is not fully built yet.
4089   /// WIP == work in progress.
4090   ///
4091   /// @param offset DIE offset to consider.
4092   ///
4093   /// @param source where the DIE comes from.
4094   ///
4095   /// @return true iff @p offset is the offset of the DIE of a
4096   /// function type that is being currently built.
4097   bool
is_wip_function_type_die_offset(Dwarf_Off offset,die_source source) const4098   is_wip_function_type_die_offset(Dwarf_Off offset, die_source source) const
4099   {
4100     die_function_type_map_type::const_iterator i =
4101       die_wip_function_types_map(source).find(offset);
4102     return (i != die_wip_function_types_map(source).end());
4103   }
4104 
4105   /// Getter for the map of declaration-only classes that are to be
4106   /// resolved to their definition classes by the end of the corpus
4107   /// loading.
4108   ///
4109   /// @return a map of string -> vector of classes where the key is
4110   /// the fully qualified name of the class and the value is the
4111   /// vector of declaration-only class.
4112   const string_classes_map&
declaration_only_classes() const4113   declaration_only_classes() const
4114   {return decl_only_classes_map_;}
4115 
4116   /// Getter for the map of declaration-only classes that are to be
4117   /// resolved to their definition classes by the end of the corpus
4118   /// loading.
4119   ///
4120   /// @return a map of string -> vector of classes where the key is
4121   /// the fully qualified name of the class and the value is the
4122   /// vector of declaration-only class.
4123   string_classes_map&
declaration_only_classes()4124   declaration_only_classes()
4125   {return decl_only_classes_map_;}
4126 
4127   /// If a given class is a declaration-only class then stash it on
4128   /// the side so that at the end of the corpus reading we can resolve
4129   /// it to its definition.
4130   ///
4131   /// @param klass the class to consider.
4132   void
maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr & klass)4133   maybe_schedule_declaration_only_class_for_resolution(class_decl_sptr& klass)
4134   {
4135     if (klass->get_is_declaration_only()
4136 	&& klass->get_definition_of_declaration() == 0)
4137       {
4138 	string qn = klass->get_qualified_name();
4139 	string_classes_map::iterator record =
4140 	  declaration_only_classes().find(qn);
4141 	if (record == declaration_only_classes().end())
4142 	  declaration_only_classes()[qn].push_back(klass);
4143 	else
4144 	  record->second.push_back(klass);
4145       }
4146   }
4147 
4148   /// Test if a given declaration-only class has been scheduled for
4149   /// resolution to a defined class.
4150   ///
4151   /// @param klass the class to consider for the test.
4152   ///
4153   /// @return true iff @p klass is a declaration-only class and if
4154   /// it's been scheduled for resolution to a defined class.
4155   bool
is_decl_only_class_scheduled_for_resolution(class_decl_sptr & klass)4156   is_decl_only_class_scheduled_for_resolution(class_decl_sptr& klass)
4157   {
4158     if (klass->get_is_declaration_only())
4159       return (declaration_only_classes().find(klass->get_qualified_name())
4160 	      != declaration_only_classes().end());
4161 
4162     return false;
4163   }
4164 
4165   /// Compare two ABI artifacts in a context which canonicalization
4166   /// has not be done yet.
4167   ///
4168   /// @param l the left-hand-side operand of the comparison
4169   ///
4170   /// @param r the right-hand-side operand of the comparison.
4171   ///
4172   /// @return true if @p l equals @p r.
4173   bool
compare_before_canonicalisation(const type_or_decl_base_sptr & l,const type_or_decl_base_sptr & r)4174   compare_before_canonicalisation(const type_or_decl_base_sptr &l,
4175 				  const type_or_decl_base_sptr &r)
4176   {
4177     if (!l || !r)
4178       return !!l == !!r;
4179 
4180     const environment* e = l->get_environment();
4181     ABG_ASSERT(!e->canonicalization_is_done());
4182 
4183     bool s = e->decl_only_class_equals_definition();
4184     e->decl_only_class_equals_definition(true);
4185     bool equal = l == r;
4186     e->decl_only_class_equals_definition(s);
4187     return equal;
4188   }
4189 
4190   /// Walk the declaration-only classes that have been found during
4191   /// the building of the corpus and resolve them to their definitions.
4192   void
resolve_declaration_only_classes()4193   resolve_declaration_only_classes()
4194   {
4195     vector<string> resolved_classes;
4196 
4197     for (string_classes_map::iterator i =
4198 	   declaration_only_classes().begin();
4199 	 i != declaration_only_classes().end();
4200 	 ++i)
4201       {
4202 	bool to_resolve = false;
4203 	for (classes_type::iterator j = i->second.begin();
4204 	     j != i->second.end();
4205 	     ++j)
4206 	  if ((*j)->get_is_declaration_only()
4207 	      && ((*j)->get_definition_of_declaration() == 0))
4208 	    to_resolve = true;
4209 
4210 	if (!to_resolve)
4211 	  {
4212 	    resolved_classes.push_back(i->first);
4213 	    continue;
4214 	  }
4215 
4216 	// Now, for each decl-only class that have the current name
4217 	// 'i->first', let's try to poke at the fully defined class
4218 	// that is defined in the same translation unit as the
4219 	// declaration.
4220 	//
4221 	// If we find one class (defined in the TU of the declaration)
4222 	// that defines the declaration, then the declaration can be
4223 	// resolved to that class.
4224 	//
4225 	// If no defining class is found in the TU of the declaration,
4226 	// then there are possibly three cases to consider:
4227 	//
4228 	//   1/ There is exactly one class that defines the
4229 	//   declaration and that class is defined in another TU.  In
4230 	//   this case, the declaration is resolved to that
4231 	//   definition.
4232 	//
4233 	//   2/ There are more than one class that define that
4234 	//   declaration and none of them is defined in the TU of the
4235 	//   declaration.  If those classes are all different, then
4236 	//   the declaration is left unresolved.
4237 	//
4238 	//   3/ No class defines the declaration.  In this case, the
4239 	//   declaration is left unresoved.
4240 
4241 	// So get the classes that might define the current
4242 	// declarations which name is i->first.
4243 	const type_base_wptrs_type *classes =
4244 	  lookup_class_types(i->first, *current_corpus());
4245 	if (!classes)
4246 	  continue;
4247 
4248 	// This is a map that associates the translation unit path to
4249 	// the class (that potentially defines the declarations that
4250 	// we consider) that are defined in that translation unit.  It
4251 	// should stay ordered by using the TU path as key to ensure
4252 	// stability of the order of classe definitions in ABIXML
4253 	// output.
4254 	map<string, class_decl_sptr> per_tu_class_map;
4255 	for (type_base_wptrs_type::const_iterator c = classes->begin();
4256 	     c != classes->end();
4257 	     ++c)
4258 	  {
4259 	    class_decl_sptr klass = is_class_type(type_base_sptr(*c));
4260 	    ABG_ASSERT(klass);
4261 
4262 	    klass = is_class_type(look_through_decl_only_class(klass));
4263 	    if (klass->get_is_declaration_only())
4264 	      continue;
4265 
4266 	    string tu_path = klass->get_translation_unit()->get_absolute_path();
4267 	    if (tu_path.empty())
4268 	      continue;
4269 
4270 	    // Build a map that associates the translation unit path
4271 	    // to the class (that potentially defines the declarations
4272 	    // that we consider) that are defined in that translation unit.
4273 	    per_tu_class_map[tu_path] = klass;
4274 	  }
4275 
4276 	if (!per_tu_class_map.empty())
4277 	  {
4278 	    // Walk the declarations to resolve and resolve them
4279 	    // either to the definitions that are in the same TU as
4280 	    // the declaration, or to the definition found elsewhere,
4281 	    // if there is only one such definition.
4282 	    for (classes_type::iterator j = i->second.begin();
4283 		 j != i->second.end();
4284 		 ++j)
4285 	      {
4286 		if ((*j)->get_is_declaration_only()
4287 		    && ((*j)->get_definition_of_declaration() == 0))
4288 		  {
4289 		    string tu_path =
4290 		      (*j)->get_translation_unit()->get_absolute_path();
4291 		    map<string, class_decl_sptr>::const_iterator e =
4292 		      per_tu_class_map.find(tu_path);
4293 		    if (e != per_tu_class_map.end())
4294 		      (*j)->set_definition_of_declaration(e->second);
4295 		    else if (per_tu_class_map.size() == 1)
4296 		      (*j)->set_definition_of_declaration
4297 			(per_tu_class_map.begin()->second);
4298 		    else if (per_tu_class_map.size() > 1)
4299 		      {
4300 			// We are in case where there are more than
4301 			// one definition for the declaration.  Let's
4302 			// see if they are all equal.  If they are,
4303 			// then the declaration resolves to the
4304 			// definition.  Otherwise, we are in the case
4305 			// 3/ described above.
4306 			map<string,
4307 			    class_decl_sptr>::const_iterator it;
4308 			class_decl_sptr first_class =
4309 			  per_tu_class_map.begin()->second;
4310 			bool all_class_definitions_are_equal = true;
4311 			for (it = per_tu_class_map.begin();
4312 			     it != per_tu_class_map.end();
4313 			     ++it)
4314 			  {
4315 			    if (it == per_tu_class_map.begin())
4316 			      continue;
4317 			    else
4318 			      {
4319 				if (!compare_before_canonicalisation(it->second,
4320 								     first_class))
4321 				  {
4322 				    all_class_definitions_are_equal = false;
4323 				    break;
4324 				  }
4325 			      }
4326 			  }
4327 			if (all_class_definitions_are_equal)
4328 			  (*j)->set_definition_of_declaration(first_class);
4329 		      }
4330 		  }
4331 	      }
4332 	    resolved_classes.push_back(i->first);
4333 	  }
4334       }
4335 
4336     size_t num_decl_only_classes = declaration_only_classes().size(),
4337       num_resolved = resolved_classes.size();
4338     if (show_stats())
4339       cerr << "resolved " << num_resolved
4340 	   << " class declarations out of "
4341 	   << num_decl_only_classes
4342 	   << "\n";
4343 
4344     for (vector<string>::const_iterator i = resolved_classes.begin();
4345 	 i != resolved_classes.end();
4346 	 ++i)
4347       declaration_only_classes().erase(*i);
4348 
4349     for (string_classes_map::iterator i = declaration_only_classes().begin();
4350 	 i != declaration_only_classes().end();
4351 	 ++i)
4352       {
4353 	if (show_stats())
4354 	  {
4355 	    if (i == declaration_only_classes().begin())
4356 	      cerr << "Here are the "
4357 		   << num_decl_only_classes - num_resolved
4358 		   << " unresolved class declarations:\n";
4359 	    else
4360 	      cerr << "    " << i->first << "\n";
4361 	  }
4362       }
4363   }
4364 
4365   /// Getter for the map of declaration-only enums that are to be
4366   /// resolved to their definition enums by the end of the corpus
4367   /// loading.
4368   ///
4369   /// @return a map of string -> vector of enums where the key is
4370   /// the fully qualified name of the enum and the value is the
4371   /// vector of declaration-only enum.
4372   const string_enums_map&
declaration_only_enums() const4373   declaration_only_enums() const
4374   {return decl_only_enums_map_;}
4375 
4376   /// Getter for the map of declaration-only enums that are to be
4377   /// resolved to their definition enums by the end of the corpus
4378   /// loading.
4379   ///
4380   /// @return a map of string -> vector of enums where the key is
4381   /// the fully qualified name of the enum and the value is the
4382   /// vector of declaration-only enum.
4383   string_enums_map&
declaration_only_enums()4384   declaration_only_enums()
4385   {return decl_only_enums_map_;}
4386 
4387   /// If a given enum is a declaration-only enum then stash it on
4388   /// the side so that at the end of the corpus reading we can resolve
4389   /// it to its definition.
4390   ///
4391   /// @param enom the enum to consider.
4392   void
maybe_schedule_declaration_only_enum_for_resolution(enum_type_decl_sptr & enom)4393   maybe_schedule_declaration_only_enum_for_resolution(enum_type_decl_sptr& enom)
4394   {
4395     if (enom->get_is_declaration_only()
4396 	&& enom->get_definition_of_declaration() == 0)
4397       {
4398 	string qn = enom->get_qualified_name();
4399 	string_enums_map::iterator record =
4400 	  declaration_only_enums().find(qn);
4401 	if (record == declaration_only_enums().end())
4402 	  declaration_only_enums()[qn].push_back(enom);
4403 	else
4404 	  record->second.push_back(enom);
4405       }
4406   }
4407 
4408   /// Test if a given declaration-only enum has been scheduled for
4409   /// resolution to a defined enum.
4410   ///
4411   /// @param enom the enum to consider for the test.
4412   ///
4413   /// @return true iff @p enom is a declaration-only enum and if
4414   /// it's been scheduled for resolution to a defined enum.
4415   bool
is_decl_only_enum_scheduled_for_resolution(enum_type_decl_sptr & enom)4416   is_decl_only_enum_scheduled_for_resolution(enum_type_decl_sptr& enom)
4417   {
4418     if (enom->get_is_declaration_only())
4419       return (declaration_only_enums().find(enom->get_qualified_name())
4420 	      != declaration_only_enums().end());
4421 
4422     return false;
4423   }
4424 
4425   /// Walk the declaration-only enums that have been found during
4426   /// the building of the corpus and resolve them to their definitions.
4427   ///
4428   /// TODO: Do away with this function by factorizing it with
4429   /// resolve_declaration_only_classes.  All declaration-only decls
4430   /// could be handled the same way as declaration-only-ness is a
4431   /// property of abigail::ir::decl_base now.
4432   void
resolve_declaration_only_enums()4433   resolve_declaration_only_enums()
4434   {
4435     vector<string> resolved_enums;
4436 
4437     for (string_enums_map::iterator i =
4438 	   declaration_only_enums().begin();
4439 	 i != declaration_only_enums().end();
4440 	 ++i)
4441       {
4442 	bool to_resolve = false;
4443 	for (enums_type::iterator j = i->second.begin();
4444 	     j != i->second.end();
4445 	     ++j)
4446 	  if ((*j)->get_is_declaration_only()
4447 	      && ((*j)->get_definition_of_declaration() == 0))
4448 	    to_resolve = true;
4449 
4450 	if (!to_resolve)
4451 	  {
4452 	    resolved_enums.push_back(i->first);
4453 	    continue;
4454 	  }
4455 
4456 	// Now, for each decl-only enum that have the current name
4457 	// 'i->first', let's try to poke at the fully defined enum
4458 	// that is defined in the same translation unit as the
4459 	// declaration.
4460 	//
4461 	// If we find one enum (defined in the TU of the declaration)
4462 	// that defines the declaration, then the declaration can be
4463 	// resolved to that enum.
4464 	//
4465 	// If no defining enum is found in the TU of the declaration,
4466 	// then there are possibly three cases to consider:
4467 	//
4468 	//   1/ There is exactly one enum that defines the
4469 	//   declaration and that enum is defined in another TU.  In
4470 	//   this case, the declaration is resolved to that
4471 	//   definition.
4472 	//
4473 	//   2/ There are more than one enum that define that
4474 	//   declaration and none of them is defined in the TU of the
4475 	//   declaration.  In this case, the declaration is left
4476 	//   unresolved.
4477 	//
4478 	//   3/ No enum defines the declaration.  In this case, the
4479 	//   declaration is left unresoved.
4480 
4481 	// So get the enums that might define the current
4482 	// declarations which name is i->first.
4483 	const type_base_wptrs_type *enums =
4484 	  lookup_enum_types(i->first, *current_corpus());
4485 	if (!enums)
4486 	  continue;
4487 
4488 	unordered_map<string, enum_type_decl_sptr> per_tu_enum_map;
4489 	for (type_base_wptrs_type::const_iterator c = enums->begin();
4490 	     c != enums->end();
4491 	     ++c)
4492 	  {
4493 	    enum_type_decl_sptr enom = is_enum_type(type_base_sptr(*c));
4494 	    ABG_ASSERT(enom);
4495 
4496 	    enom = is_enum_type(look_through_decl_only_enum(enom));
4497 	    if (enom->get_is_declaration_only())
4498 	      continue;
4499 
4500 	    string tu_path = enom->get_translation_unit()->get_absolute_path();
4501 	    if (tu_path.empty())
4502 	      continue;
4503 
4504 	    // Build a map that associates the translation unit path
4505 	    // to the enum (that potentially defines the declarations
4506 	    // that we consider) that are defined in that translation unit.
4507 	    per_tu_enum_map[tu_path] = enom;
4508 	  }
4509 
4510 	if (!per_tu_enum_map.empty())
4511 	  {
4512 	    // Walk the declarations to resolve and resolve them
4513 	    // either to the definitions that are in the same TU as
4514 	    // the declaration, or to the definition found elsewhere,
4515 	    // if there is only one such definition.
4516 	    for (enums_type::iterator j = i->second.begin();
4517 		 j != i->second.end();
4518 		 ++j)
4519 	      {
4520 		if ((*j)->get_is_declaration_only()
4521 		    && ((*j)->get_definition_of_declaration() == 0))
4522 		  {
4523 		    string tu_path =
4524 		      (*j)->get_translation_unit()->get_absolute_path();
4525 		    unordered_map<string, enum_type_decl_sptr>::const_iterator e =
4526 		      per_tu_enum_map.find(tu_path);
4527 		    if (e != per_tu_enum_map.end())
4528 		      (*j)->set_definition_of_declaration(e->second);
4529 		    else if (per_tu_enum_map.size() == 1)
4530 		      (*j)->set_definition_of_declaration
4531 			(per_tu_enum_map.begin()->second);
4532 		  }
4533 	      }
4534 	    resolved_enums.push_back(i->first);
4535 	  }
4536       }
4537 
4538     size_t num_decl_only_enums = declaration_only_enums().size(),
4539       num_resolved = resolved_enums.size();
4540     if (show_stats())
4541       cerr << "resolved " << num_resolved
4542 	   << " enum declarations out of "
4543 	   << num_decl_only_enums
4544 	   << "\n";
4545 
4546     for (vector<string>::const_iterator i = resolved_enums.begin();
4547 	 i != resolved_enums.end();
4548 	 ++i)
4549       declaration_only_enums().erase(*i);
4550 
4551     for (string_enums_map::iterator i = declaration_only_enums().begin();
4552 	 i != declaration_only_enums().end();
4553 	 ++i)
4554       {
4555 	if (show_stats())
4556 	  {
4557 	    if (i == declaration_only_enums().begin())
4558 	      cerr << "Here are the "
4559 		   << num_decl_only_enums - num_resolved
4560 		   << " unresolved enum declarations:\n";
4561 	    else
4562 	      cerr << "    " << i->first << "\n";
4563 	  }
4564       }
4565   }
4566 
4567   /// Test if a symbol belongs to a function of the current ABI
4568   /// corpus.
4569   ///
4570   /// This is a sub-routine of fixup_functions_with_no_symbols.
4571   ///
4572   /// @param fn the function symbol to consider.
4573   ///
4574   /// @returnt true if @p fn belongs to a function of the current ABI
4575   /// corpus.
4576   bool
symbol_already_belongs_to_a_function(elf_symbol_sptr & fn)4577   symbol_already_belongs_to_a_function(elf_symbol_sptr& fn)
4578   {
4579     corpus_sptr corp = current_corpus();
4580     if (!corp)
4581       return false;
4582 
4583     string id = fn->get_id_string();
4584 
4585     const vector<function_decl*> *fns = corp->lookup_functions(id);
4586     if (!fns)
4587       return false;
4588 
4589     for (vector<function_decl*>::const_iterator i = fns->begin();
4590 	 i != fns->end();
4591 	 ++i)
4592       {
4593 	function_decl* f = *i;
4594 	ABG_ASSERT(f);
4595 	if (f->get_symbol())
4596 	  return true;
4597       }
4598     return false;
4599   }
4600 
4601   /// Some functions described by DWARF may have their linkage name
4602   /// set, but no link to their actual underlying elf symbol.  When
4603   /// these are virtual member functions, comparing the enclosing type
4604   /// against another one which has its underlying symbol properly set
4605   /// might lead to spurious type changes.
4606   ///
4607   /// If the corpus contains a symbol with the same name as the
4608   /// linkage name of the function, then set up the link between the
4609   /// function and its underlying symbol.
4610   ///
4611   /// Note that for the moment, only virtual member functions are
4612   /// fixed up like this.  This is because they really are the only
4613   /// fuctions of functions that can affect types (in spurious ways).
4614   void
fixup_functions_with_no_symbols()4615   fixup_functions_with_no_symbols()
4616   {
4617     corpus_sptr corp = current_corpus();
4618     if (!corp)
4619       return;
4620 
4621     die_function_decl_map_type &fns_with_no_symbol =
4622       die_function_decl_with_no_symbol_map();
4623 
4624     if (do_log())
4625       cerr << fns_with_no_symbol.size()
4626 	   << " functions to fixup, potentially\n";
4627 
4628     for (die_function_decl_map_type::iterator i = fns_with_no_symbol.begin();
4629 	 i != fns_with_no_symbol.end();
4630 	 ++i)
4631       if (elf_symbol_sptr sym =
4632 	  corp->lookup_function_symbol(i->second->get_linkage_name()))
4633 	{
4634 	  // So i->second is a virtual member function that was
4635 	  // previously scheduled to be set a function symbol.
4636 	  //
4637 	  // But if it appears that it now has a symbol already set,
4638 	  // then do not set a symbol to it again.
4639 	  //
4640 	  // Or if it appears that another virtual member function
4641 	  // from the current ABI Corpus, with the same linkage
4642 	  // (mangled) name has already been set a symbol, then do not
4643 	  // set a symbol to this function either.  Otherwise, there
4644 	  // will be two virtual member functions with the same symbol
4645 	  // in the class and that leads to spurious hard-to-debug
4646 	  // change reports later down the road.
4647 	  if (i->second->get_symbol()
4648 	      || symbol_already_belongs_to_a_function(sym))
4649 	    continue;
4650 
4651 	  ABG_ASSERT(is_member_function(i->second));
4652 	  ABG_ASSERT(get_member_function_is_virtual(i->second));
4653 	  i->second->set_symbol(sym);
4654 	  if (do_log())
4655 	    cerr << "fixed up '"
4656 		 << i->second->get_pretty_representation()
4657 		 << "' with symbol '"
4658 		 << sym->get_id_string()
4659 		 << "'\n";
4660 	}
4661 
4662     fns_with_no_symbol.clear();
4663   }
4664 
4665   /// Return a reference to the vector containing the offsets of the
4666   /// types that need late canonicalizing.
4667   ///
4668   /// @param source whe DIEs referred to by the offsets contained in
4669   /// the vector to return are from.
4670   vector<Dwarf_Off>&
types_to_canonicalize(die_source source)4671   types_to_canonicalize(die_source source)
4672   {
4673     switch (source)
4674       {
4675       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4676 	break;
4677       case ALT_DEBUG_INFO_DIE_SOURCE:
4678 	return alt_types_to_canonicalize_;
4679       case TYPE_UNIT_DIE_SOURCE:
4680 	return type_unit_types_to_canonicalize_;
4681       case NO_DEBUG_INFO_DIE_SOURCE:
4682       case NUMBER_OF_DIE_SOURCES:
4683 	ABG_ASSERT_NOT_REACHED;
4684       }
4685     return types_to_canonicalize_;
4686   }
4687 
4688   /// Return a reference to the vector containing the offsets of the
4689   /// types that need late canonicalizing.
4690   ///
4691   /// @param source where the DIEs referred to by the offset in the
4692   /// returned vector are from.
4693   const vector<Dwarf_Off>&
types_to_canonicalize(die_source source) const4694   types_to_canonicalize(die_source source) const
4695   {return const_cast<read_context*>(this)->types_to_canonicalize(source);}
4696 
4697   /// Return a reference to the vector containing the types created
4698   /// during the binary analysis but that are not tied to a given
4699   /// DWARF DIE.
4700   ///
4701   /// @return reference to the vector containing the types created
4702   /// during the binary analysis but that are not tied to a given
4703   /// DWARF DIE.
4704   const vector<type_base_sptr>&
extra_types_to_canonicalize() const4705   extra_types_to_canonicalize() const
4706   {return extra_types_to_canonicalize_;}
4707 
4708   /// Clear the containers holding types to canonicalize.
4709   void
clear_types_to_canonicalize()4710   clear_types_to_canonicalize()
4711   {
4712     types_to_canonicalize_.clear();
4713     alt_types_to_canonicalize_.clear();
4714     type_unit_types_to_canonicalize_.clear();
4715     extra_types_to_canonicalize_.clear();
4716   }
4717 
4718   /// Put the offset of a DIE representing a type on a side vector so
4719   /// that when the reading of the debug info of the current
4720   /// translation unit is done, we can get back to the type DIE and
4721   /// from there, to the type it's associated to, and then
4722   /// canonicalize it.  This what we call late canonicalization.
4723   ///
4724   /// @param die the type DIE to schedule for late type
4725   /// canonicalization.
4726   void
schedule_type_for_late_canonicalization(const Dwarf_Die * die)4727   schedule_type_for_late_canonicalization(const Dwarf_Die *die)
4728   {
4729     Dwarf_Off o;
4730 
4731     Dwarf_Die equiv_die;
4732     ABG_ASSERT(get_canonical_die(die, equiv_die,
4733 				  /*where=*/0,
4734 				 /*die_as_type=*/true));
4735 
4736     const die_source source = get_die_source(&equiv_die);
4737     o = dwarf_dieoffset(&equiv_die);
4738 
4739     const die_artefact_map_type& m =
4740       type_die_artefact_maps().get_container(*this, die);
4741 
4742     die_artefact_map_type::const_iterator i = m.find(o);
4743     ABG_ASSERT(i != m.end());
4744 
4745     // Then really do the scheduling.
4746     types_to_canonicalize(source).push_back(o);
4747   }
4748 
4749   /// Types that were created but not tied to a particular DIE, must
4750   /// be scheduled for late canonicalization using this method.
4751   ///
4752   /// @param t the type to schedule for late canonicalization.
4753   void
schedule_type_for_late_canonicalization(const type_base_sptr & t)4754   schedule_type_for_late_canonicalization(const type_base_sptr &t)
4755   {
4756     extra_types_to_canonicalize_.push_back(t);
4757   }
4758 
4759   /// Canonicalize types which DIE offsets are stored in vectors on
4760   /// the side.  This is a sub-routine of
4761   /// read_context::perform_late_type_canonicalizing().
4762   ///
4763   /// @param source where the DIE of the types to canonicalize are
4764   /// from.
4765   void
canonicalize_types_scheduled(die_source source)4766   canonicalize_types_scheduled(die_source source)
4767   {
4768     tools_utils::timer cn_timer;
4769     if (do_log())
4770       {
4771 	cerr << "going to canonicalize types";
4772 	corpus_sptr c = current_corpus();
4773 	if (c)
4774 	  cerr << " of corpus " << current_corpus()->get_path();
4775 	cerr << " (DIEs source: " << source << ")\n";
4776 	cn_timer.start();
4777       }
4778 
4779     if (!types_to_canonicalize(source).empty()
4780 	|| !extra_types_to_canonicalize().empty())
4781       {
4782 	tools_utils::timer single_type_cn_timer;
4783 	size_t total = types_to_canonicalize(source).size();
4784 	if (do_log())
4785 	  cerr << total << " types to canonicalize\n";
4786 	for (size_t i = 0; i < total; ++i)
4787 	  {
4788 	    Dwarf_Off element = types_to_canonicalize(source)[i];
4789 	    type_base_sptr t =
4790 	      lookup_type_from_die_offset(element, source);
4791 	    ABG_ASSERT(t);
4792 	    if (do_log())
4793 	      {
4794 		cerr << "canonicalizing type "
4795 		     << get_pretty_representation(t, false)
4796 		     << " [" << i + 1 << "/" << total << "]";
4797 		if (corpus_sptr c = current_corpus())
4798 		  cerr << "@" << c->get_path();
4799 		cerr << " ...";
4800 		single_type_cn_timer.start();
4801 	      }
4802 	    canonicalize(t);
4803 	    if (do_log())
4804 	      {
4805 		cerr << " DONE";
4806 		single_type_cn_timer.stop();
4807 		cerr << ":" <<single_type_cn_timer << "\n";
4808 	      }
4809 	  }
4810 
4811 	// Now canonicalize types that were created but not tied to
4812 	// any DIE.
4813 	if (!extra_types_to_canonicalize().empty())
4814 	  {
4815 	    tools_utils::timer single_type_cn_timer;
4816 	    size_t total = extra_types_to_canonicalize().size();
4817 	    if (do_log())
4818 	      cerr << total << " extra types to canonicalize\n";
4819 	    size_t i = 1;
4820 	    for (vector<type_base_sptr>::const_iterator it =
4821 		   extra_types_to_canonicalize().begin();
4822 		 it != extra_types_to_canonicalize().end();
4823 		 ++it, ++i)
4824 	      {
4825 		if (do_log())
4826 		  {
4827 		    cerr << "canonicalizing extra type "
4828 			 << get_pretty_representation(*it, false)
4829 			 << " [" << i << "/" << total << "]";
4830 		    if (corpus_sptr c = current_corpus())
4831 		      cerr << "@" << c->get_path();
4832 		    cerr << " ...";
4833 		    single_type_cn_timer.start();
4834 		  }
4835 		canonicalize(*it);
4836 		if (do_log())
4837 		  {
4838 		    single_type_cn_timer.stop();
4839 		    cerr << "DONE:"
4840 			 << single_type_cn_timer
4841 			 << "\n";
4842 		  }
4843 	      }
4844 	  }
4845       }
4846 
4847     if (do_log())
4848       {
4849 	cn_timer.stop();
4850 	cerr << "finished canonicalizing types";
4851 	corpus_sptr c = current_corpus();
4852 	if (c)
4853 	  cerr << " of corpus " << current_corpus()->get_path();
4854 	cerr << " (DIEs source: "
4855 	     << source << "):"
4856 	     << cn_timer
4857 	     << "\n";
4858       }
4859   }
4860 
4861   /// Compute the number of canonicalized and missed types in the late
4862   /// canonicalization phase.
4863   ///
4864   /// @param source where the DIEs of the canonicalized types are
4865   /// from.
4866   ///
4867   /// @param canonicalized the number of types that got canonicalized
4868   /// is added to the value already present in this parameter.
4869   ///
4870   /// @param missed the number of types scheduled for late
4871   /// canonicalization and which couldn't be canonicalized (for a
4872   /// reason) is added to the value already present in this parameter.
4873   void
add_late_canonicalized_types_stats(die_source source,size_t & canonicalized,size_t & missed) const4874   add_late_canonicalized_types_stats(die_source	source,
4875 				     size_t&		canonicalized,
4876 				     size_t&		missed) const
4877   {
4878     for (vector<Dwarf_Off>::const_iterator i =
4879 	   types_to_canonicalize(source).begin();
4880 	 i != types_to_canonicalize(source).end();
4881 	 ++i)
4882       {
4883         type_base_sptr t = lookup_type_from_die_offset(*i, source);
4884 	if (t->get_canonical_type())
4885 	  ++canonicalized;
4886 	else
4887 	  ++missed;
4888       }
4889   }
4890 
4891   /// Compute the number of canonicalized and missed types in the late
4892   /// canonicalization phase.
4893   ///
4894   /// @param canonicalized the number of types that got canonicalized
4895   /// is added to the value already present in this parameter.
4896   ///
4897   /// @param missed the number of types scheduled for late
4898   /// canonicalization and which couldn't be canonicalized (for a
4899   /// reason) is added to the value already present in this parameter.
4900   void
add_late_canonicalized_types_stats(size_t & canonicalized,size_t & missed) const4901   add_late_canonicalized_types_stats(size_t& canonicalized,
4902 				     size_t& missed) const
4903   {
4904     for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4905 	 source < NUMBER_OF_DIE_SOURCES;
4906 	 ++source)
4907       add_late_canonicalized_types_stats(source, canonicalized, missed);
4908   }
4909 
4910   // Look at the types that need to be canonicalized after the
4911   // translation unit has been constructed and canonicalize them.
4912   void
perform_late_type_canonicalizing()4913   perform_late_type_canonicalizing()
4914   {
4915     for (die_source source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
4916 	 source < NUMBER_OF_DIE_SOURCES;
4917 	 ++source)
4918       canonicalize_types_scheduled(source);
4919 
4920     if (show_stats())
4921       {
4922 	size_t num_canonicalized = 0, num_missed = 0, total = 0;
4923 	add_late_canonicalized_types_stats(num_canonicalized,
4924 					   num_missed);
4925 	total = num_canonicalized + num_missed;
4926 	cerr << "binary: "
4927 	     << elf_path()
4928 	     << "\n";
4929 	cerr << "    # late canonicalized types: "
4930              << num_canonicalized;
4931         if (total)
4932           cerr << " (" << num_canonicalized * 100 / total << "%)";
4933         cerr << "\n"
4934 	     << "    # missed canonicalization opportunities: "
4935              << num_missed;
4936         if (total)
4937           cerr << " (" << num_missed * 100 / total << "%)";
4938         cerr << "\n";
4939       }
4940 
4941   }
4942 
4943   const die_tu_map_type&
die_tu_map() const4944   die_tu_map() const
4945   {return die_tu_map_;}
4946 
4947   die_tu_map_type&
die_tu_map()4948   die_tu_map()
4949   {return die_tu_map_;}
4950 
4951   /// Getter for the map that associates a translation unit DIE to the
4952   /// vector of imported unit points that it contains.
4953   ///
4954   /// @param source where the DIEs are from.
4955   ///
4956   /// @return the map.
4957   const tu_die_imported_unit_points_map_type&
tu_die_imported_unit_points_map(die_source source) const4958   tu_die_imported_unit_points_map(die_source source) const
4959   {return const_cast<read_context*>(this)->tu_die_imported_unit_points_map(source);}
4960 
4961   /// Getter for the map that associates a translation unit DIE to the
4962   /// vector of imported unit points that it contains.
4963   ///
4964   /// @param source where the DIEs are from.
4965   ///
4966   /// @return the map.
4967   tu_die_imported_unit_points_map_type&
tu_die_imported_unit_points_map(die_source source)4968   tu_die_imported_unit_points_map(die_source source)
4969   {
4970     switch (source)
4971       {
4972       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
4973 	break;
4974       case ALT_DEBUG_INFO_DIE_SOURCE:
4975 	return alt_tu_die_imported_unit_points_map_;
4976       case TYPE_UNIT_DIE_SOURCE:
4977 	return type_units_tu_die_imported_unit_points_map_;
4978       case NO_DEBUG_INFO_DIE_SOURCE:
4979       case NUMBER_OF_DIE_SOURCES:
4980 	// We cannot reach this point.
4981 	ABG_ASSERT_NOT_REACHED;
4982       }
4983     return tu_die_imported_unit_points_map_;
4984   }
4985 
4986   /// Getter of the current corpus being constructed.
4987   ///
4988   /// @return the current corpus.
4989   const corpus_sptr
current_corpus() const4990   current_corpus() const
4991   {return cur_corpus_;}
4992 
4993   /// Getter of the current corpus being constructed.
4994   ///
4995   /// @return the current corpus.
4996   corpus_sptr
current_corpus()4997   current_corpus()
4998   {return cur_corpus_;}
4999 
5000   /// Setter of the current corpus being constructed.
5001   ///
5002   /// @param c the new corpus.
5003   void
current_corpus(const corpus_sptr & c)5004   current_corpus(const corpus_sptr& c)
5005   {
5006     if (c)
5007       cur_corpus_ = c;
5008   }
5009 
5010   /// Reset the current corpus being constructed.
5011   ///
5012   /// This actually deletes the current corpus being constructed.
5013   void
reset_current_corpus()5014   reset_current_corpus()
5015   {cur_corpus_.reset();}
5016 
5017   /// Getter of the current corpus group being constructed.
5018   ///
5019   /// @return current the current corpus being constructed, if any, or
5020   /// nil.
5021   const corpus_group_sptr
current_corpus_group() const5022   current_corpus_group() const
5023   {return cur_corpus_group_;}
5024 
5025   /// Getter of the current corpus group being constructed.
5026   ///
5027   /// @return current the current corpus being constructed, if any, or
5028   /// nil.
5029   corpus_group_sptr
current_corpus_group()5030   current_corpus_group()
5031   {return cur_corpus_group_;}
5032 
5033   /// Setter of the current corpus group being constructed.
5034   ///
5035   /// @param g the new corpus group.
5036   void
current_corpus_group(const corpus_group_sptr & g)5037   current_corpus_group(const corpus_group_sptr& g)
5038   {
5039     if (g)
5040       cur_corpus_group_ = g;
5041   }
5042 
5043   /// Test if there is a corpus group being built.
5044   ///
5045   /// @return if there is a corpus group being built, false otherwise.
5046   bool
has_corpus_group() const5047   has_corpus_group() const
5048   {return bool(cur_corpus_group_);}
5049 
5050   /// Return the main corpus from the current corpus group, if any.
5051   ///
5052   /// @return the main corpus of the current corpus group, if any, nil
5053   /// if no corpus group is being constructed.
5054   corpus_sptr
main_corpus_from_current_group()5055   main_corpus_from_current_group()
5056   {
5057     if (cur_corpus_group_)
5058       return cur_corpus_group_->get_main_corpus();
5059     return corpus_sptr();
5060   }
5061 
5062   /// Return the main corpus from the current corpus group, if any.
5063   ///
5064   /// @return the main corpus of the current corpus group, if any, nil
5065   /// if no corpus group is being constructed.
5066   const corpus_sptr
main_corpus_from_current_group() const5067   main_corpus_from_current_group() const
5068   {return const_cast<read_context*>(this)->main_corpus_from_current_group();}
5069 
5070   /// Test if the current corpus being built is the main corpus of the
5071   /// current corpus group.
5072   ///
5073   /// @return return true iff the current corpus being built is the
5074   /// main corpus of the current corpus group.
5075   bool
current_corpus_is_main_corpus_from_current_group() const5076   current_corpus_is_main_corpus_from_current_group() const
5077   {
5078     corpus_sptr main_corpus = main_corpus_from_current_group();
5079 
5080     if (main_corpus && main_corpus.get() == cur_corpus_.get())
5081       return true;
5082 
5083     return false;
5084   }
5085 
5086   /// Return true if the current corpus is part of a corpus group
5087   /// being built and if it's not the main corpus of the group.
5088   ///
5089   /// For instance, this would return true if we are loading a linux
5090   /// kernel *module* that is part of the current corpus group that is
5091   /// being built.  In this case, it means we should re-use types
5092   /// coming from the "vmlinux" binary that is the main corpus of the
5093   /// group.
5094   ///
5095   /// @return the corpus group the current corpus belongs to, if the
5096   /// current corpus is part of a corpus group being built. Nil otherwise.
5097   corpus_sptr
should_reuse_type_from_corpus_group() const5098   should_reuse_type_from_corpus_group() const
5099   {
5100     if (has_corpus_group() && is_c_language(cur_transl_unit()->get_language()))
5101       if (corpus_sptr main_corpus = main_corpus_from_current_group())
5102 	if (!current_corpus_is_main_corpus_from_current_group())
5103 	  return current_corpus_group();
5104 
5105     return corpus_sptr();
5106   }
5107 
5108   /// Get the map that associates each DIE to its parent DIE.  This is
5109   /// for DIEs coming from the main debug info sections.
5110   ///
5111   /// @param source where the DIEs in the map come from.
5112   ///
5113   /// @return the DIE -> parent map.
5114   const offset_offset_map_type&
die_parent_map(die_source source) const5115   die_parent_map(die_source source) const
5116   {return const_cast<read_context*>(this)->die_parent_map(source);}
5117 
5118   /// Get the map that associates each DIE to its parent DIE.  This is
5119   /// for DIEs coming from the main debug info sections.
5120   ///
5121   /// @param source where the DIEs in the map come from.
5122   ///
5123   /// @return the DIE -> parent map.
5124   offset_offset_map_type&
die_parent_map(die_source source)5125   die_parent_map(die_source source)
5126   {
5127     switch (source)
5128       {
5129       case PRIMARY_DEBUG_INFO_DIE_SOURCE:
5130 	break;
5131       case ALT_DEBUG_INFO_DIE_SOURCE:
5132 	return alternate_die_parent_map_;
5133       case TYPE_UNIT_DIE_SOURCE:
5134 	return type_section_die_parent_map();
5135       case NO_DEBUG_INFO_DIE_SOURCE:
5136       case NUMBER_OF_DIE_SOURCES:
5137 	ABG_ASSERT_NOT_REACHED;
5138       }
5139     return primary_die_parent_map_;
5140   }
5141 
5142   const offset_offset_map_type&
type_section_die_parent_map() const5143   type_section_die_parent_map() const
5144   {return type_section_die_parent_map_;}
5145 
5146   offset_offset_map_type&
type_section_die_parent_map()5147   type_section_die_parent_map()
5148   {return type_section_die_parent_map_;}
5149 
5150   /// Getter of the current translation unit.
5151   ///
5152   /// @return the current translation unit being constructed.
5153   const translation_unit_sptr&
cur_transl_unit() const5154   cur_transl_unit() const
5155   {return cur_tu_;}
5156 
5157   /// Getter of the current translation unit.
5158   ///
5159   /// @return the current translation unit being constructed.
5160   translation_unit_sptr&
cur_transl_unit()5161   cur_transl_unit()
5162   {return cur_tu_;}
5163 
5164   /// Setter of the current translation unit.
5165   ///
5166   /// @param tu the current translation unit being constructed.
5167   void
cur_transl_unit(translation_unit_sptr tu)5168   cur_transl_unit(translation_unit_sptr tu)
5169   {
5170     if (tu)
5171       cur_tu_ = tu;
5172   }
5173 
5174   /// Return the global scope of the current translation unit.
5175   ///
5176   /// @return the global scope of the current translation unit.
5177   const scope_decl_sptr&
global_scope() const5178   global_scope() const
5179   {return cur_transl_unit()->get_global_scope();}
5180 
5181   /// Return a scope that is nil.
5182   ///
5183   /// @return a scope that is nil.
5184   const scope_decl_sptr&
nil_scope() const5185   nil_scope() const
5186   {return nil_scope_;}
5187 
5188   const scope_stack_type&
scope_stack() const5189   scope_stack() const
5190   {return scope_stack_;}
5191 
5192   scope_stack_type&
scope_stack()5193   scope_stack()
5194   {return scope_stack_;}
5195 
5196   scope_decl*
current_scope()5197   current_scope()
5198   {
5199     if (scope_stack().empty())
5200       {
5201 	if (cur_transl_unit())
5202 	  scope_stack().push(cur_transl_unit()->get_global_scope().get());
5203       }
5204     return scope_stack().top();
5205   }
5206 
5207   list<var_decl_sptr>&
var_decls_to_re_add_to_tree()5208   var_decls_to_re_add_to_tree()
5209   {return var_decls_to_add_;}
5210 
5211   /// The section containing the symbol table from the current ELF
5212   /// file.
5213   ///
5214   /// Note that after it's first invocation, this function caches the
5215   /// symbol table that it found.  Subsequent invocations just return
5216   /// the cached symbol table section.
5217   ///
5218   /// @return the symbol table section if found
5219   Elf_Scn*
find_symbol_table_section() const5220   find_symbol_table_section() const
5221   {
5222     if (!symtab_section_)
5223       symtab_section_ = elf_helpers::find_symbol_table_section(elf_handle());
5224     return symtab_section_;
5225   }
5226 
5227   /// Lookup an elf symbol, referred to by its index, from the .symtab
5228   /// section.
5229   ///
5230   /// The resulting symbol returned is an instance of a GElf_Sym, from
5231   /// the libelf library.
5232   ///
5233   /// @param symbol_index the index of the symbol to look up.
5234   ///
5235   /// @param elf_sym out parameter.  This is set to the resulting ELF
5236   /// symbol iff the function returns TRUE, meaning the symbol was
5237   /// found.
5238   ///
5239   /// @return TRUE iff the symbol was found.
5240   bool
lookup_native_elf_symbol_from_index(size_t symbol_index,GElf_Sym & elf_sym)5241   lookup_native_elf_symbol_from_index(size_t symbol_index, GElf_Sym &elf_sym)
5242   {
5243     Elf_Scn* symtab_section = find_symbol_table_section();
5244     if (!symtab_section)
5245       return false;
5246 
5247     Elf_Data* symtab = elf_getdata(symtab_section, 0);
5248     ABG_ASSERT(symtab);
5249 
5250     if (!gelf_getsym(symtab, symbol_index, &elf_sym))
5251       return false;
5252 
5253     return true;
5254   }
5255 
5256   /// Test if a given function symbol has been exported.
5257   ///
5258   /// @param symbol_address the address of the symbol we are looking
5259   /// for.  Note that this address must be a relative offset from the
5260   /// beginning of the .text section, just like the kind of addresses
5261   /// that are present in the .symtab section.
5262   ///
5263   /// @returnthe elf symbol if found, or nil otherwise.
5264   elf_symbol_sptr
function_symbol_is_exported(GElf_Addr symbol_address) const5265   function_symbol_is_exported(GElf_Addr symbol_address) const
5266   {
5267     elf_symbol_sptr symbol = symtab()->lookup_symbol(symbol_address);
5268     if (!symbol)
5269       return symbol;
5270 
5271     if (!symbol->is_function() || !symbol->is_public())
5272       return elf_symbol_sptr();
5273 
5274     address_set_sptr set;
5275     bool looking_at_linux_kernel_binary =
5276       load_in_linux_kernel_mode() && is_linux_kernel(elf_handle());
5277 
5278     if (looking_at_linux_kernel_binary)
5279       {
5280 	if (symbol->is_in_ksymtab())
5281 	  return symbol;
5282 	return elf_symbol_sptr();
5283       }
5284 
5285     return symbol;
5286   }
5287 
5288   /// Test if a given variable symbol has been exported.
5289   ///
5290   /// @param symbol_address the address of the symbol we are looking
5291   /// for.  Note that this address must be a relative offset from the
5292   /// beginning of the .text section, just like the kind of addresses
5293   /// that are present in the .symtab section.
5294   ///
5295   /// @returnthe elf symbol if found, or nil otherwise.
5296   elf_symbol_sptr
variable_symbol_is_exported(GElf_Addr symbol_address) const5297   variable_symbol_is_exported(GElf_Addr symbol_address) const
5298   {
5299     elf_symbol_sptr symbol = symtab()->lookup_symbol(symbol_address);
5300     if (!symbol)
5301       return symbol;
5302 
5303     if (!symbol->is_variable() || !symbol->is_public())
5304       return elf_symbol_sptr();
5305 
5306     address_set_sptr set;
5307     bool looking_at_linux_kernel_binary =
5308       load_in_linux_kernel_mode() && is_linux_kernel(elf_handle());
5309 
5310     if (looking_at_linux_kernel_binary)
5311       {
5312 	if (symbol->is_in_ksymtab())
5313 	  return symbol;
5314 	return elf_symbol_sptr();
5315       }
5316 
5317     return symbol;
5318   }
5319 
5320   /// Getter for the symtab reader. Will load the symtab from the elf handle if
5321   /// not yet set.
5322   ///
5323   /// @return a shared pointer to the symtab object
5324   const symtab_reader::symtab_sptr&
symtab() const5325   symtab() const
5326   {
5327     if (!symtab_)
5328       symtab_ = symtab_reader::symtab::load
5329 	(elf_handle(), options_.env,
5330 	 [&](const elf_symbol_sptr& symbol)
5331 	 {return is_elf_symbol_suppressed(symbol);});
5332 
5333     if (!symtab_)
5334       std::cerr << "Symbol table of '" << elf_path_
5335 		<< "' could not be loaded\n";
5336     return symtab_;
5337   }
5338 
5339   /// Getter for the ELF dt_needed tag.
5340   const vector<string>&
dt_needed() const5341   dt_needed() const
5342   {return dt_needed_;}
5343 
5344   /// Getter for the ELF dt_soname tag.
5345   const string&
dt_soname() const5346   dt_soname() const
5347   {return dt_soname_;}
5348 
5349   /// Getter for the ELF architecture of the current file.
5350   const string&
elf_architecture() const5351   elf_architecture() const
5352   {return elf_architecture_;}
5353 
5354   /// Test if a given ELF symbol was suppressed by a suppression
5355   /// specification.
5356   ///
5357   /// @param symbol the ELF symbol to consider.
5358   ///
5359   /// @return true iff @p symbol is suppressed.
5360   bool
is_elf_symbol_suppressed(const elf_symbol_sptr & symbol) const5361   is_elf_symbol_suppressed(const elf_symbol_sptr& symbol) const
5362   {
5363     return (symbol
5364 	    && suppr::is_elf_symbol_suppressed(*this,
5365 					       symbol->get_name(),
5366 					       symbol->get_type()));
5367   }
5368 
5369   /// Load the DT_NEEDED and DT_SONAME elf TAGS.
5370   ///
5371   void
load_dt_soname_and_needed()5372   load_dt_soname_and_needed()
5373   {
5374     lookup_data_tag_from_dynamic_segment(elf_handle(), DT_NEEDED, dt_needed_);
5375 
5376     vector<string> dt_tag_data;
5377     lookup_data_tag_from_dynamic_segment(elf_handle(), DT_SONAME, dt_tag_data);
5378     if (!dt_tag_data.empty())
5379       dt_soname_ = dt_tag_data[0];
5380   }
5381 
5382   /// Read the string representing the architecture of the current ELF
5383   /// file.
5384   void
load_elf_architecture()5385   load_elf_architecture()
5386   {
5387     if (!elf_handle())
5388       return;
5389 
5390     GElf_Ehdr eh_mem;
5391     GElf_Ehdr* elf_header = gelf_getehdr(elf_handle(), &eh_mem);
5392 
5393     elf_architecture_ = e_machine_to_string(elf_header->e_machine);
5394   }
5395 
5396   /// Load various ELF data.
5397   ///
5398   /// This function loads ELF data that are not symbol maps or debug
5399   /// info.  That is, things like various tags, elf architecture and
5400   /// so on.
5401   void
load_elf_properties()5402   load_elf_properties()
5403   {
5404     load_dt_soname_and_needed();
5405     load_elf_architecture();
5406   }
5407 
5408   /// This is a sub-routine of maybe_adjust_fn_sym_address and
5409   /// maybe_adjust_var_sym_address.
5410   ///
5411   /// Given an address that we got by looking at some debug
5412   /// information (e.g, a symbol's address referred to by a DWARF
5413   /// TAG), If the ELF file we are interested in is a shared library
5414   /// or an executable, then adjust the address to be coherent with
5415   /// where the executable (or shared library) is loaded.  That way,
5416   /// the address can be used to look for symbols in the executable or
5417   /// shared library.
5418   ///
5419   /// @return the adjusted address, or the same address as @p addr if
5420   /// it didn't need any adjustment.
5421   Dwarf_Addr
maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const5422   maybe_adjust_address_for_exec_or_dyn(Dwarf_Addr addr) const
5423   {
5424     if (addr == 0)
5425       return addr;
5426 
5427     GElf_Ehdr eh_mem;
5428     GElf_Ehdr *elf_header = gelf_getehdr(elf_handle(), &eh_mem);
5429 
5430     if (elf_header->e_type == ET_DYN || elf_header->e_type == ET_EXEC)
5431       {
5432 	Dwarf_Addr dwarf_elf_load_address = 0, elf_load_address = 0;
5433 	ABG_ASSERT(get_binary_load_address(dwarf_elf_handle(),
5434 					   dwarf_elf_load_address));
5435 	ABG_ASSERT(get_binary_load_address(elf_handle(),
5436 					   elf_load_address));
5437 	if (dwarf_is_splitted()
5438 	    && (dwarf_elf_load_address != elf_load_address))
5439 	  // This means that in theory the DWARF and the executable are
5440 	  // not loaded at the same address.  And addr is meaningful
5441 	  // only in the context of the DWARF.
5442 	  //
5443 	  // So let's transform addr into an offset relative to where
5444 	  // the DWARF is loaded, and let's add that relative offset
5445 	  // to the load address of the executable.  That way, addr
5446 	  // becomes meaningful in the context of the executable and
5447 	  // can thus be used to compare against the address of
5448 	  // symbols of the executable, for instance.
5449 	  addr = addr - dwarf_elf_load_address + elf_load_address;
5450       }
5451 
5452     return addr;
5453   }
5454 
5455   /// For a relocatable (*.o) elf file, this function expects an
5456   /// absolute address, representing a function symbol.  It then
5457   /// extracts the address of the .text section from the symbol
5458   /// absolute address to get the relative address of the function
5459   /// from the beginning of the .text section.
5460   ///
5461   /// For executable or shared library, this function expects an
5462   /// address of a function symbol that was retrieved by looking at a
5463   /// DWARF "file".  The function thus adjusts the address to make it
5464   /// be meaningful in the context of the ELF file.
5465   ///
5466   /// In both cases, the address can then be compared against the
5467   /// st_value field of a function symbol from the ELF file.
5468   ///
5469   /// @param addr an adress for a function symbol that was retrieved
5470   /// from a DWARF file.
5471   ///
5472   /// @return the (possibly) adjusted address, or just @p addr if no
5473   /// adjustment took place.
5474   Dwarf_Addr
maybe_adjust_fn_sym_address(Dwarf_Addr addr) const5475   maybe_adjust_fn_sym_address(Dwarf_Addr addr) const
5476   {
5477     if (addr == 0)
5478       return addr;
5479 
5480     Elf* elf = elf_handle();
5481     GElf_Ehdr eh_mem;
5482     GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
5483 
5484     if (elf_header->e_type == ET_REL)
5485       // We are looking at a relocatable file.  In this case, we don't
5486       // do anything because:
5487       //
5488       // 1/ the addresses from DWARF are absolute (relative to the
5489       // beginning of the relocatable file)
5490       //
5491       // 2/ The ELF symbol addresses that we store in our lookup
5492       // tables are translated from section-related to absolute as
5493       // well.  So we don't have anything to do at this point for
5494       // ET_REL files.
5495       ;
5496     else
5497       addr = maybe_adjust_address_for_exec_or_dyn(addr);
5498 
5499     return addr;
5500   }
5501 
5502   /// For a relocatable (*.o) elf file, this function expects an
5503   /// absolute address, representing a global variable symbol.  It
5504   /// then extracts the address of the {.data,.data1,.rodata,.bss}
5505   /// section from the symbol absolute address to get the relative
5506   /// address of the variable from the beginning of the data section.
5507   ///
5508   /// For executable or shared library, this function expects an
5509   /// address of a variable symbol that was retrieved by looking at a
5510   /// DWARF "file".  The function thus adjusts the address to make it
5511   /// be meaningful in the context of the ELF file.
5512   ///
5513   /// In both cases, the address can then be compared against the
5514   /// st_value field of a function symbol from the ELF file.
5515   ///
5516   /// @param addr an address for a global variable symbol that was
5517   /// retrieved from a DWARF file.
5518   ///
5519   /// @return the (possibly) adjusted address, or just @p addr if no
5520   /// adjustment took place.
5521   Dwarf_Addr
maybe_adjust_var_sym_address(Dwarf_Addr addr) const5522   maybe_adjust_var_sym_address(Dwarf_Addr addr) const
5523   {
5524     Elf* elf = elf_handle();
5525     GElf_Ehdr eh_mem;
5526     GElf_Ehdr* elf_header = gelf_getehdr(elf, &eh_mem);
5527 
5528     if (elf_header->e_type == ET_REL)
5529       // We are looking at a relocatable file.  In this case, we don't
5530       // do anything because:
5531       //
5532       // 1/ the addresses from DWARF are absolute (relative to the
5533       // beginning of the relocatable file)
5534       //
5535       // 2/ The ELF symbol addresses that we store in our lookup
5536       // tables are translated from section-related to absolute as
5537       // well.  So we don't have anything to do at this point for
5538       // ET_REL files.
5539       ;
5540     else
5541       addr = maybe_adjust_address_for_exec_or_dyn(addr);
5542 
5543     return addr;
5544   }
5545 
5546   /// Get the first exported function address in the set of addresses
5547   /// referred to by the DW_AT_ranges attribute of a given DIE.
5548   ///
5549   /// @param die the DIE we are considering.
5550   ///
5551   /// @param address output parameter.  This is set to the first
5552   /// address found in the sequence pointed to by the DW_AT_ranges
5553   /// attribute found on the DIE @p die, iff the function returns
5554   /// true.  Otherwise, no value is set into this output parameter.
5555   ///
5556   /// @return true iff the DIE @p die does have a DW_AT_ranges
5557   /// attribute and an address of an exported function was found in
5558   /// its sequence value.
5559   bool
get_first_exported_fn_address_from_DW_AT_ranges(Dwarf_Die * die,Dwarf_Addr & address) const5560   get_first_exported_fn_address_from_DW_AT_ranges(Dwarf_Die* die,
5561 						  Dwarf_Addr& address) const
5562   {
5563     Dwarf_Addr base;
5564     Dwarf_Addr end_addr;
5565     ptrdiff_t offset = 0;
5566 
5567     do
5568       {
5569 	Dwarf_Addr addr = 0, fn_addr = 0;
5570 	if ((offset = dwarf_ranges(die, offset, &base, &addr, &end_addr)) >= 0)
5571 	  {
5572 	    fn_addr = maybe_adjust_fn_sym_address(addr);
5573 	    if (function_symbol_is_exported(fn_addr))
5574 	      {
5575 		address = fn_addr;
5576 		return true;
5577 	      }
5578 	  }
5579       } while (offset > 0);
5580     return false;
5581   }
5582 
5583   /// Get the address of the function.
5584   ///
5585   /// The address of the function is considered to be the value of the
5586   /// DW_AT_low_pc attribute, possibly adjusted (in relocatable files
5587   /// only) to not point to an absolute address anymore, but rather to
5588   /// the address of the function inside the .text segment.
5589   ///
5590   /// @param function_die the die of the function to consider.
5591   ///
5592   /// @param address the resulting address iff the function returns
5593   /// true.
5594   ///
5595   /// @return true if the function address was found.
5596   bool
get_function_address(Dwarf_Die * function_die,Dwarf_Addr & address) const5597   get_function_address(Dwarf_Die* function_die, Dwarf_Addr& address) const
5598   {
5599     if (!die_address_attribute(function_die, DW_AT_low_pc, address))
5600       // So no DW_AT_low_pc was found.  Let's see if the function DIE
5601       // has got a DW_AT_ranges attribute instead.  If it does, the
5602       // first address of the set of addresses represented by the
5603       // value of that DW_AT_ranges represents the function (symbol)
5604       // address we are looking for.
5605       if (!get_first_exported_fn_address_from_DW_AT_ranges(function_die,
5606 							   address))
5607 	return false;
5608 
5609     address = maybe_adjust_fn_sym_address(address);
5610     return true;
5611   }
5612 
5613   /// Get the address of the global variable.
5614   ///
5615   /// The address of the global variable is considered to be the value
5616   /// of the DW_AT_location attribute, possibly adjusted (in
5617   /// relocatable files only) to not point to an absolute address
5618   /// anymore, but rather to the address of the global variable inside
5619   /// the data segment.
5620   ///
5621   /// @param variable_die the die of the function to consider.
5622   ///
5623   /// @param address the resulting address iff this function returns
5624   /// true.
5625   ///
5626   /// @return true if the variable address was found.
5627   bool
get_variable_address(Dwarf_Die * variable_die,Dwarf_Addr & address) const5628   get_variable_address(Dwarf_Die*	variable_die,
5629 		       Dwarf_Addr&	address) const
5630   {
5631     bool is_tls_address = false;
5632     if (!die_location_address(variable_die, address, is_tls_address))
5633       return false;
5634     if (!is_tls_address)
5635       address = maybe_adjust_var_sym_address(address);
5636     return true;
5637   }
5638 
5639   /// Tests if a suppression specification can match ABI artifacts
5640   /// coming from the binary being analyzed.
5641   ///
5642   /// This tests if the suppression can match the soname of and binary
5643   /// name of the ELF binary being analyzed.  More precisely, if there
5644   /// are any soname or file name property in the suppression and if
5645   /// those do *NOT* match the current binary, then the function
5646   /// returns false.
5647   ///
5648   /// @param s the suppression specification to consider.
5649   ///
5650   /// @return true iff either there are no soname/filename related
5651   /// property on the suppression, or if none of the soname/filename
5652   /// properties of the suppression match the current binary.
5653   bool
suppression_can_match(const suppr::suppression_base & s) const5654   suppression_can_match(const suppr::suppression_base& s) const
5655   {
5656     if (!s.priv_->matches_soname(dt_soname()))
5657       if (s.has_soname_related_property())
5658 	// The suppression has some SONAME related properties, but
5659 	// none of them match the SONAME of the current binary.  So
5660 	// the suppression cannot match the current binary.
5661 	return false;
5662 
5663     if (!s.priv_->matches_binary_name(elf_path()))
5664       if (s.has_file_name_related_property())
5665 	// The suppression has some file_name related properties, but
5666 	// none of them match the file name of the current binary.  So
5667 	// the suppression cannot match the current binary.
5668 	return false;
5669 
5670     return true;
5671   }
5672 
5673   /// Test whether if a given function suppression matches a function
5674   /// designated by a regular expression that describes its linkage
5675   /// name (symbol name).
5676   ///
5677   /// @param s the suppression specification to evaluate to see if it
5678   /// matches a given function linkage name
5679   ///
5680   /// @param fn_linkage_name the linkage name of the function of interest.
5681   ///
5682   /// @return true iff the suppression specification @p s matches the
5683   /// function whose linkage name is @p fn_linkage_name.
5684   bool
suppression_matches_function_sym_name(const suppr::function_suppression & s,const string & fn_linkage_name) const5685   suppression_matches_function_sym_name(const suppr::function_suppression& s,
5686 					const string& fn_linkage_name) const
5687   {
5688     if (!suppression_can_match(s))
5689       return false;
5690 
5691     return suppr::suppression_matches_function_sym_name(s, fn_linkage_name);
5692   }
5693 
5694   /// Test whether if a given function suppression matches a function
5695   /// designated by a regular expression that describes its name.
5696   ///
5697   /// @param s the suppression specification to evaluate to see if it
5698   /// matches a given function name.
5699   ///
5700   /// @param fn_name the name of the function of interest.  Note that
5701   /// this name must be *non* qualified.
5702   ///
5703   /// @return true iff the suppression specification @p s matches the
5704   /// function whose name is @p fn_name.
5705   bool
suppression_matches_function_name(const suppr::function_suppression & s,const string & fn_name) const5706   suppression_matches_function_name(const suppr::function_suppression& s,
5707 				    const string& fn_name) const
5708   {
5709     if (!suppression_can_match(s))
5710       return false;
5711 
5712     return suppr::suppression_matches_function_name(s, fn_name);
5713   }
5714 
5715   /// Test whether if a given variable suppression specification
5716   /// matches a variable denoted by its name.
5717   ///
5718   /// @param s the variable suppression specification to consider.
5719   ///
5720   /// @param var_name the name of the variable to consider.
5721   ///
5722   /// @return true iff the suppression specification @p s matches the
5723   /// variable whose name is @p var_name.
5724   bool
suppression_matches_variable_name(const suppr::variable_suppression & s,const string & var_name) const5725   suppression_matches_variable_name(const suppr::variable_suppression& s,
5726 				    const string& var_name) const
5727   {
5728     if (!suppression_can_match(s))
5729       return false;
5730 
5731     return suppr::suppression_matches_variable_name(s, var_name);
5732   }
5733 
5734   /// Test whether if a given variable suppression specification
5735   /// matches a variable denoted by its linkage name.
5736   ///
5737   /// @param s the variable suppression specification to consider.
5738   ///
5739   /// @param var_linkage_name the linkage name of the variable to consider.
5740   ///
5741   /// @return true iff variable suppression specification @p s matches
5742   /// the variable denoted by linkage name @p var_linkage_name.
5743   bool
suppression_matches_variable_sym_name(const suppr::variable_suppression & s,const string & var_linkage_name) const5744   suppression_matches_variable_sym_name(const suppr::variable_suppression& s,
5745 					const string& var_linkage_name) const
5746   {
5747     if (!suppression_can_match(s))
5748       return false;
5749 
5750     return suppr::suppression_matches_variable_sym_name(s, var_linkage_name);
5751   }
5752 
5753   /// Test if a given type suppression specification matches a type
5754   /// designated by its name and location.
5755   ///
5756   /// @param s the suppression specification to consider.
5757   ///
5758   /// @param type_name the fully qualified type name to consider.
5759   ///
5760   /// @param type_location the type location to consider.
5761   ///
5762   /// @return true iff the type suppression specification matches a
5763   /// type of a given name and location.
5764   bool
suppression_matches_type_name_or_location(const suppr::type_suppression & s,const string & type_name,const location & type_location) const5765   suppression_matches_type_name_or_location(const suppr::type_suppression& s,
5766 					    const string& type_name,
5767 					    const location& type_location) const
5768   {
5769     if (!suppression_can_match(s))
5770       return false;
5771 
5772     return suppr::suppression_matches_type_name_or_location(s, type_name,
5773 							    type_location);
5774   }
5775 
5776   /// Getter of the exported decls builder object.
5777   ///
5778   /// @return the exported decls builder.
5779   corpus::exported_decls_builder*
exported_decls_builder()5780   exported_decls_builder()
5781   {return exported_decls_builder_;}
5782 
5783   /// Setter of the exported decls builder object.
5784   ///
5785   /// Note that this @ref read_context is not responsible for the live
5786   /// time of the exported_decls_builder object.  The corpus is.
5787   ///
5788   /// @param b the new builder.
5789   void
exported_decls_builder(corpus::exported_decls_builder * b)5790   exported_decls_builder(corpus::exported_decls_builder* b)
5791   {exported_decls_builder_ = b;}
5792 
5793   /// Getter of the "load_all_types" flag.  This flag tells if all the
5794   /// types (including those not reachable by public declarations) are
5795   /// to be read and represented in the final ABI corpus.
5796   ///
5797   /// @return the load_all_types flag.
5798   bool
load_all_types() const5799   load_all_types() const
5800   {return options_.load_all_types;}
5801 
5802   /// Setter of the "load_all_types" flag.  This flag tells if all the
5803   /// types (including those not reachable by public declarations) are
5804   /// to be read and represented in the final ABI corpus.
5805   ///
5806   /// @param f the new load_all_types flag.
5807   void
load_all_types(bool f)5808   load_all_types(bool f)
5809   {options_.load_all_types = f;}
5810 
5811   bool
load_in_linux_kernel_mode() const5812   load_in_linux_kernel_mode() const
5813   {return options_.load_in_linux_kernel_mode;}
5814 
5815   void
load_in_linux_kernel_mode(bool f)5816   load_in_linux_kernel_mode(bool f)
5817   {options_.load_in_linux_kernel_mode = f;}
5818 
5819   /// Getter of the "show_stats" flag.
5820   ///
5821   /// This flag tells if we should emit statistics about various
5822   /// internal stuff.
5823   ///
5824   /// @return the value of the flag.
5825   bool
show_stats() const5826   show_stats() const
5827   {return options_.show_stats;}
5828 
5829   /// Setter of the "show_stats" flag.
5830   ///
5831   /// This flag tells if we should emit statistics about various
5832   /// internal stuff.
5833   ///
5834   /// @param f the value of the flag.
5835   void
show_stats(bool f)5836   show_stats(bool f)
5837   {options_.show_stats = f;}
5838 
5839   /// Getter of the "do_log" flag.
5840   ///
5841   /// This flag tells if we should log about various internal
5842   /// details.
5843   ///
5844   /// return the "do_log" flag.
5845   bool
do_log() const5846   do_log() const
5847   {return options_.do_log;}
5848 
5849   /// Setter of the "do_log" flag.
5850   ///
5851   /// This flag tells if we should log about various internal details.
5852   ///
5853   /// @param f the new value of the flag.
5854   void
do_log(bool f)5855   do_log(bool f)
5856   {options_.do_log = f;}
5857 
5858   /// If a given function decl is suitable for the set of exported
5859   /// functions of the current corpus, this function adds it to that
5860   /// set.
5861   ///
5862   /// @param fn the function to consider for inclusion into the set of
5863   /// exported functions of the current corpus.
5864   void
maybe_add_fn_to_exported_decls(function_decl * fn)5865   maybe_add_fn_to_exported_decls(function_decl* fn)
5866   {
5867     if (fn)
5868       if (corpus::exported_decls_builder* b = exported_decls_builder())
5869 	b->maybe_add_fn_to_exported_fns(fn);
5870   }
5871 
5872   /// If a given variable decl is suitable for the set of exported
5873   /// variables of the current corpus, this variable adds it to that
5874   /// set.
5875   ///
5876   /// @param fn the variable to consider for inclusion into the set of
5877   /// exported variables of the current corpus.
5878   void
maybe_add_var_to_exported_decls(var_decl * var)5879   maybe_add_var_to_exported_decls(var_decl* var)
5880   {
5881     if (var)
5882       if (corpus::exported_decls_builder* b = exported_decls_builder())
5883 	b->maybe_add_var_to_exported_vars(var);
5884   }
5885 
5886   /// Walk the DIEs under a given die and for each child, populate the
5887   /// die -> parent map to record the child -> parent relationship
5888   /// that
5889   /// exists between the child and the given die.
5890   ///
5891   /// The function also builds the vector of places where units are
5892   /// imported.
5893   ///
5894   /// This is done recursively as for each child DIE, this function
5895   /// walks its children as well.
5896   ///
5897   /// @param die the DIE whose children to walk recursively.
5898   ///
5899   /// @param source where the DIE @p die comes from.
5900   ///
5901   /// @param imported_units a vector containing all the offsets of the
5902   /// points where unit have been imported, under @p die.
5903   void
build_die_parent_relations_under(Dwarf_Die * die,die_source source,imported_unit_points_type & imported_units)5904   build_die_parent_relations_under(Dwarf_Die*			die,
5905 				   die_source			source,
5906 				   imported_unit_points_type &	imported_units)
5907   {
5908     if (!die)
5909       return;
5910 
5911     offset_offset_map_type& parent_of = die_parent_map(source);
5912 
5913     Dwarf_Die child;
5914     if (dwarf_child(die, &child) != 0)
5915       return;
5916 
5917     do
5918       {
5919 	parent_of[dwarf_dieoffset(&child)] = dwarf_dieoffset(die);
5920 	if (dwarf_tag(&child) == DW_TAG_imported_unit)
5921 	  {
5922 	    Dwarf_Die imported_unit;
5923 	    if (die_die_attribute(&child, DW_AT_import, imported_unit)
5924 		// If the imported_unit has a sub-tree, let's record
5925 		// this point at which the sub-tree is imported into
5926 		// the current debug info.
5927 		//
5928 		// Otherwise, if the imported_unit has no sub-tree,
5929 		// there is no point in recording where a non-existent
5930 		// sub-tree is being imported.
5931 		//
5932 		// Note that the imported_unit_points_type type below
5933 		// expects the imported_unit to have a sub-tree.
5934 		&& die_has_children(&imported_unit))
5935 	      {
5936 		die_source imported_unit_die_source = NO_DEBUG_INFO_DIE_SOURCE;
5937 		ABG_ASSERT(get_die_source(imported_unit, imported_unit_die_source));
5938 		imported_units.push_back
5939 		  (imported_unit_point(dwarf_dieoffset(&child),
5940 				       imported_unit,
5941 				       imported_unit_die_source));
5942 	      }
5943 	  }
5944 	build_die_parent_relations_under(&child, source, imported_units);
5945       }
5946     while (dwarf_siblingof(&child, &child) == 0);
5947 
5948   }
5949 
5950   /// Determine if we do have to build a DIE -> parent map, depending
5951   /// on a given language.
5952   ///
5953   /// Some languages like C++, Ada etc, do have the concept of
5954   /// namespace and yet, the DIE data structure doesn't provide us
5955   /// with a way to get the parent namespace of a given DIE.  So for
5956   /// those languages, we need to build a DIE -> parent map so that we
5957   /// can get the namespace DIE (or more generally the scope DIE) of a given
5958   /// DIE as we need it.
5959   ///
5960   /// But then some more basic languages like C or assembly don't have
5961   /// that need.
5962   ///
5963   /// This function, depending on the language, tells us if we need to
5964   /// build the DIE -> parent map or not.
5965   ///
5966   /// @param lang the language to consider.
5967   ///
5968   /// @return true iff we need to build the DIE -> parent map for this
5969   /// language.
5970   bool
do_we_build_die_parent_maps(translation_unit::language lang)5971   do_we_build_die_parent_maps(translation_unit::language lang)
5972   {
5973     if (is_c_language(lang))
5974       return false;
5975 
5976     switch (lang)
5977       {
5978       case translation_unit::LANG_UNKNOWN:
5979 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
5980       case translation_unit::LANG_Mips_Assembler:
5981 #endif
5982 	return false;
5983       default:
5984 	break;
5985       }
5986     return true;
5987   }
5988 
5989   /// Walk all the DIEs accessible in the debug info (and in the
5990   /// alternate debug info as well) and build maps representing the
5991   /// relationship DIE -> parent.  That is, make it so that we can get
5992   /// the parent for a given DIE.
5993   ///
5994   /// Note that the goal of this map is to be able to get the parent
5995   /// of a given DIE. This is to mainly to handle namespaces.  For instance,
5996   /// when we get a DIE of a type, and we want to build an internal
5997   /// representation for it, we need to get its fully qualified name.
5998   /// For that, we need to know what is the parent DIE of that type
5999   /// DIE, so that we can know what the namespace of that type is.
6000   ///
6001   /// Note that as the C language doesn't have namespaces (all types
6002   /// are defined in the same global namespace), this function doesn't
6003   /// build the DIE -> parent map if the current translation unit
6004   /// comes from C.  This saves time on big C ELF files with a lot of
6005   /// DIEs.
6006   void
build_die_parent_maps()6007   build_die_parent_maps()
6008   {
6009     bool we_do_have_to_build_die_parent_map = false;
6010     uint8_t address_size = 0;
6011     size_t header_size = 0;
6012     // Get the DIE of the current translation unit, look at it to get
6013     // its language. If that language is in C, then all types are in
6014     // the global namespace so we don't need to build the DIE ->
6015     // parent map.  So we dont build it in that case.
6016     for (Dwarf_Off offset = 0, next_offset = 0;
6017 	 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6018 			  NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6019 	 offset = next_offset)
6020       {
6021 	Dwarf_Off die_offset = offset + header_size;
6022 	Dwarf_Die cu;
6023 	if (!dwarf_offdie(dwarf(), die_offset, &cu))
6024 	  continue;
6025 
6026 	uint64_t l = 0;
6027 	die_unsigned_constant_attribute(&cu, DW_AT_language, l);
6028 	translation_unit::language lang = dwarf_language_to_tu_language(l);
6029 	if (do_we_build_die_parent_maps(lang))
6030 	  we_do_have_to_build_die_parent_map = true;
6031       }
6032 
6033     if (!we_do_have_to_build_die_parent_map)
6034       return;
6035 
6036     // Build the DIE -> parent relation for DIEs coming from the
6037     // .debug_info section in the alternate debug info file.
6038     die_source source = ALT_DEBUG_INFO_DIE_SOURCE;
6039     for (Dwarf_Off offset = 0, next_offset = 0;
6040 	 (dwarf_next_unit(alt_dwarf(), offset, &next_offset, &header_size,
6041 			  NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6042 	 offset = next_offset)
6043       {
6044 	Dwarf_Off die_offset = offset + header_size;
6045 	Dwarf_Die cu;
6046 	if (!dwarf_offdie(alt_dwarf(), die_offset, &cu))
6047 	  continue;
6048 	cur_tu_die(&cu);
6049 
6050 	imported_unit_points_type& imported_units =
6051 	  tu_die_imported_unit_points_map(source)[die_offset] =
6052 	  imported_unit_points_type();
6053 	build_die_parent_relations_under(&cu, source, imported_units);
6054       }
6055 
6056     // Build the DIE -> parent relation for DIEs coming from the
6057     // .debug_info section of the main debug info file.
6058     source = PRIMARY_DEBUG_INFO_DIE_SOURCE;
6059     address_size = 0;
6060     header_size = 0;
6061     for (Dwarf_Off offset = 0, next_offset = 0;
6062 	 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6063 			  NULL, NULL, &address_size, NULL, NULL, NULL) == 0);
6064 	 offset = next_offset)
6065       {
6066 	Dwarf_Off die_offset = offset + header_size;
6067 	Dwarf_Die cu;
6068 	if (!dwarf_offdie(dwarf(), die_offset, &cu))
6069 	  continue;
6070 	cur_tu_die(&cu);
6071 	imported_unit_points_type& imported_units =
6072 	  tu_die_imported_unit_points_map(source)[die_offset] =
6073 	  imported_unit_points_type();
6074 	build_die_parent_relations_under(&cu, source, imported_units);
6075       }
6076 
6077     // Build the DIE -> parent relation for DIEs coming from the
6078     // .debug_types section.
6079     source = TYPE_UNIT_DIE_SOURCE;
6080     address_size = 0;
6081     header_size = 0;
6082     uint64_t type_signature = 0;
6083     Dwarf_Off type_offset;
6084     for (Dwarf_Off offset = 0, next_offset = 0;
6085 	 (dwarf_next_unit(dwarf(), offset, &next_offset, &header_size,
6086 			  NULL, NULL, &address_size, NULL,
6087 			  &type_signature, &type_offset) == 0);
6088 	 offset = next_offset)
6089       {
6090 	Dwarf_Off die_offset = offset + header_size;
6091 	Dwarf_Die cu;
6092 
6093 	if (!dwarf_offdie_types(dwarf(), die_offset, &cu))
6094 	  continue;
6095 	cur_tu_die(&cu);
6096 	imported_unit_points_type& imported_units =
6097 	  tu_die_imported_unit_points_map(source)[die_offset] =
6098 	  imported_unit_points_type();
6099 	build_die_parent_relations_under(&cu, source, imported_units);
6100       }
6101   }
6102 };// end class read_context.
6103 
6104 static type_or_decl_base_sptr
6105 build_ir_node_from_die(read_context&	ctxt,
6106 		       Dwarf_Die*	die,
6107 		       scope_decl*	scope,
6108 		       bool		called_from_public_decl,
6109 		       size_t		where_offset,
6110 		       bool		is_declaration_only = true,
6111 		       bool		is_required_decl_spec = false);
6112 
6113 static type_or_decl_base_sptr
6114 build_ir_node_from_die(read_context&	ctxt,
6115 		       Dwarf_Die*	die,
6116 		       bool		called_from_public_decl,
6117 		       size_t		where_offset);
6118 
6119 static class_decl_sptr
6120 add_or_update_class_type(read_context&	 ctxt,
6121 			 Dwarf_Die*	 die,
6122 			 scope_decl*	 scope,
6123 			 bool		 is_struct,
6124 			 class_decl_sptr klass,
6125 			 bool		 called_from_public_decl,
6126 			 size_t		 where_offset,
6127 			 bool		 is_declaration_only);
6128 
6129 static union_decl_sptr
6130 add_or_update_union_type(read_context&	 ctxt,
6131 			 Dwarf_Die*	 die,
6132 			 scope_decl*	 scope,
6133 			 union_decl_sptr union_type,
6134 			 bool		 called_from_public_decl,
6135 			 size_t		 where_offset,
6136 			 bool		 is_declaration_only);
6137 
6138 static decl_base_sptr
6139 build_ir_node_for_void_type(read_context& ctxt);
6140 
6141 static decl_base_sptr
6142 build_ir_node_for_variadic_parameter_type(read_context &ctxt);
6143 
6144 static function_decl_sptr
6145 build_function_decl(read_context&	ctxt,
6146 		    Dwarf_Die*		die,
6147 		    size_t		where_offset,
6148 		    function_decl_sptr	fn);
6149 
6150 static bool
6151 function_is_suppressed(const read_context& ctxt,
6152 		       const scope_decl* scope,
6153 		       Dwarf_Die *function_die,
6154 		       bool is_declaration_only);
6155 
6156 static function_decl_sptr
6157 build_or_get_fn_decl_if_not_suppressed(read_context&	ctxt,
6158 				       scope_decl	*scope,
6159 				       Dwarf_Die	*die,
6160 				       size_t	where_offset,
6161 				       bool is_declaration_only,
6162 				       function_decl_sptr f);
6163 
6164 static var_decl_sptr
6165 build_var_decl(read_context&	ctxt,
6166 	       Dwarf_Die	*die,
6167 	       size_t		where_offset,
6168 	       var_decl_sptr	result = var_decl_sptr());
6169 
6170 static var_decl_sptr
6171 build_or_get_var_decl_if_not_suppressed(read_context&	ctxt,
6172 					scope_decl	*scope,
6173 					Dwarf_Die	*die,
6174 					size_t	where_offset,
6175 					var_decl_sptr	res = var_decl_sptr(),
6176 					bool is_required_decl_spec = false);
6177 static bool
6178 variable_is_suppressed(const read_context& ctxt,
6179 		       const scope_decl* scope,
6180 		       Dwarf_Die *variable_die,
6181 		       bool is_required_decl_spec = false);
6182 
6183 static void
6184 finish_member_function_reading(Dwarf_Die*		 die,
6185 			       const function_decl_sptr& f,
6186 			       const class_or_union_sptr& klass,
6187 			       read_context&		 ctxt);
6188 
6189 /// Setter of the debug info root path for a dwarf reader context.
6190 ///
6191 /// @param ctxt the dwarf reader context to consider.
6192 ///
6193 /// @param path the new debug info root path.  This must be a pointer to a
6194 /// character string which life time should be greater than the life
6195 /// time of the read context.
6196 void
set_debug_info_root_path(read_context & ctxt,char ** path)6197 set_debug_info_root_path(read_context& ctxt, char** path)
6198 {ctxt.offline_callbacks()->debuginfo_path = path;}
6199 
6200 /// Setter of the debug info root path for a dwarf reader context.
6201 ///
6202 /// @param ctxt the dwarf reader context to consider.
6203 ///
6204 /// @return a pointer to the debug info root path.
6205 ///
6206 /// time of the read context.
6207 char**
get_debug_info_root_path(read_context & ctxt)6208 get_debug_info_root_path(read_context& ctxt)
6209 {return ctxt.offline_callbacks()->debuginfo_path;}
6210 
6211 /// Getter of the "show_stats" flag.
6212 ///
6213 /// This flag tells if we should emit statistics about various
6214 /// internal stuff.
6215 ///
6216 /// @param ctx the read context to consider for this flag.
6217 ///
6218 /// @return the value of the flag.
6219 bool
get_show_stats(read_context & ctxt)6220 get_show_stats(read_context& ctxt)
6221 {return ctxt.show_stats();}
6222 
6223 /// Setter of the "show_stats" flag.
6224 ///
6225 /// This flag tells if we should emit statistics about various
6226 /// internal stuff.
6227 ///
6228 /// @param ctxt the read context to consider for this flag.
6229 ///
6230 /// @param f the value of the flag.
6231 void
set_show_stats(read_context & ctxt,bool f)6232 set_show_stats(read_context& ctxt, bool f)
6233 {ctxt.show_stats(f);}
6234 
6235 /// Setter of the "drop_undefined_syms" flag.
6236 ///
6237 /// This flag tells if we should drop functions or variables
6238 /// with undefined symbols.
6239 ///
6240 /// @param ctxt the read context to consider for this flag.
6241 ///
6242 /// @param f the value of the flag.
6243 void
set_drop_undefined_syms(read_context & ctxt,bool f)6244 set_drop_undefined_syms(read_context& ctxt, bool f)
6245 {ctxt.drop_undefined_syms(f);}
6246 
6247 /// Setter of the "merge_translation_units" flag.
6248 ///
6249 /// This flag tells if we should merge translation units.
6250 ///
6251 /// @param ctxt the read context to consider for this flag.
6252 ///
6253 /// @param f the value of the flag.
6254 void
set_merge_translation_units(read_context & ctxt,bool f)6255 set_merge_translation_units(read_context& ctxt, bool f)
6256 {ctxt.merge_translation_units(f);}
6257 
6258 /// Setter of the "do_log" flag.
6259 ///
6260 /// This flag tells if we should emit verbose logs for various
6261 /// internal things related to DWARF reading.
6262 ///
6263 /// @param ctxt the DWARF reading context to consider.
6264 ///
6265 /// @param f the new value of the flag.
6266 void
set_do_log(read_context & ctxt,bool f)6267 set_do_log(read_context& ctxt, bool f)
6268 {ctxt.do_log(f);}
6269 
6270 /// Test if a given DIE is anonymous
6271 ///
6272 /// @param die the DIE to consider.
6273 ///
6274 /// @return true iff @p die is anonymous.
6275 static bool
die_is_anonymous(const Dwarf_Die * die)6276 die_is_anonymous(const Dwarf_Die* die)
6277 {
6278   Dwarf_Attribute attr;
6279   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), DW_AT_name, &attr))
6280     return true;
6281   return false;
6282 }
6283 
6284 /// Get the value of an attribute that is supposed to be a string, or
6285 /// an empty string if the attribute could not be found.
6286 ///
6287 /// @param die the DIE to get the attribute value from.
6288 ///
6289 /// @param attr_name the attribute name.  Must come from dwarf.h and
6290 /// be an enumerator representing an attribute like, e.g, DW_AT_name.
6291 ///
6292 /// @return the string representing the value of the attribute, or an
6293 /// empty string if no string attribute could be found.
6294 static string
die_string_attribute(const Dwarf_Die * die,unsigned attr_name)6295 die_string_attribute(const Dwarf_Die* die, unsigned attr_name)
6296 {
6297   if (!die)
6298     return "";
6299 
6300   Dwarf_Attribute attr;
6301   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
6302     return "";
6303 
6304   const char* str = dwarf_formstring(&attr);
6305   return str ? str : "";
6306 }
6307 
6308 /// Get the value of an attribute that is supposed to be an unsigned
6309 /// constant.
6310 ///
6311 /// @param die the DIE to read the information from.
6312 ///
6313 /// @param attr_name the DW_AT_* name of the attribute.  Must come
6314 /// from dwarf.h and be an enumerator representing an attribute like,
6315 /// e.g, DW_AT_decl_line.
6316 ///
6317 ///@param cst the output parameter that is set to the value of the
6318 /// attribute @p attr_name.  This parameter is set iff the function
6319 /// return true.
6320 ///
6321 /// @return true if there was an attribute of the name @p attr_name
6322 /// and with a value that is a constant, false otherwise.
6323 static bool
die_unsigned_constant_attribute(const Dwarf_Die * die,unsigned attr_name,uint64_t & cst)6324 die_unsigned_constant_attribute(const Dwarf_Die*	die,
6325 				unsigned	attr_name,
6326 				uint64_t&	cst)
6327 {
6328   if (!die)
6329     return false;
6330 
6331   Dwarf_Attribute attr;
6332   Dwarf_Word result = 0;
6333   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6334       || dwarf_formudata(&attr, &result))
6335     return false;
6336 
6337   cst = result;
6338   return true;
6339 }
6340 
6341 /// Read a signed constant value from a given attribute.
6342 ///
6343 /// The signed constant expected must be of constant form.
6344 ///
6345 /// @param die the DIE to get the attribute from.
6346 ///
6347 /// @param attr_name the attribute name.
6348 ///
6349 /// @param cst the resulting signed constant read.
6350 ///
6351 /// @return true iff a signed constant attribute of the name @p
6352 /// attr_name was found on the DIE @p die.
6353 static bool
die_signed_constant_attribute(const Dwarf_Die * die,unsigned attr_name,int64_t & cst)6354 die_signed_constant_attribute(const Dwarf_Die *die,
6355 			      unsigned	attr_name,
6356 			      int64_t&	cst)
6357 {
6358   if (!die)
6359     return false;
6360 
6361   Dwarf_Attribute attr;
6362   Dwarf_Sword result = 0;
6363   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6364       || dwarf_formsdata(&attr, &result))
6365     return false;
6366 
6367   cst = result;
6368   return true;
6369 }
6370 
6371 /// Read the value of a constant attribute that is either signed or
6372 /// unsigned into a array_type_def::subrange_type::bound_value value.
6373 ///
6374 /// The bound_value instance will capture the actual signedness of the
6375 /// read attribute.
6376 ///
6377 /// @param die the DIE from which to read the value of the attribute.
6378 ///
6379 /// @param attr_name the attribute name to consider.
6380 ///
6381 /// @param is_signed true if the attribute value has to read as
6382 /// signed.
6383 ///
6384 /// @param value the resulting value read from attribute @p attr_name
6385 /// on DIE @p die.
6386 ///
6387 /// @return true iff DIE @p die has an attribute named @p attr_name
6388 /// with a constant value.
6389 static bool
die_constant_attribute(const Dwarf_Die * die,unsigned attr_name,bool is_signed,array_type_def::subrange_type::bound_value & value)6390 die_constant_attribute(const Dwarf_Die *die,
6391 		       unsigned attr_name,
6392 		       bool is_signed,
6393 		       array_type_def::subrange_type::bound_value &value)
6394 {
6395   if (!is_signed)
6396     {
6397       uint64_t l = 0;
6398       if (!die_unsigned_constant_attribute(die, attr_name, l))
6399 	return false;
6400       value.set_unsigned(l);
6401     }
6402   else
6403     {
6404       int64_t l = 0;
6405       if (!die_signed_constant_attribute(die, attr_name, l))
6406 	return false;
6407       value.set_signed(l);
6408     }
6409   return true;
6410 }
6411 
6412 /// Test if a given DWARF form is DW_FORM_strx{1,4}.
6413 ///
6414 /// Unfortunaly, the DW_FORM_strx{1,4} are enumerators of an untagged
6415 /// enum in dwarf.h so we have to use an unsigned int for the form,
6416 /// grrr.
6417 ///
6418 /// @param form the form to consider.
6419 ///
6420 /// @return true iff @p form is DW_FORM_strx{1,4}.
6421 static bool
form_is_DW_FORM_strx(unsigned form)6422 form_is_DW_FORM_strx(unsigned form)
6423 {
6424   if (form)
6425     {
6426 #if defined HAVE_DW_FORM_strx1		\
6427   && defined HAVE_DW_FORM_strx2	\
6428   && defined HAVE_DW_FORM_strx3	\
6429   && defined HAVE_DW_FORM_strx4
6430       if (form == DW_FORM_strx1
6431 	  || form == DW_FORM_strx2
6432 	  || form == DW_FORM_strx3
6433 	  ||form == DW_FORM_strx4)
6434 	return true;
6435 #endif
6436     }
6437   return false;
6438 }
6439 
6440 /// Test if a given DWARF form is DW_FORM_line_strp.
6441 ///
6442 /// Unfortunaly, the DW_FORM_line_strp is an enumerator of an untagged
6443 /// enum in dwarf.h so we have to use an unsigned int for the form,
6444 /// grrr.
6445 ///
6446 /// @param form the form to consider.
6447 ///
6448 /// @return true iff @p form is DW_FORM_line_strp.
6449 static bool
form_is_DW_FORM_line_strp(unsigned form)6450 form_is_DW_FORM_line_strp(unsigned form)
6451 {
6452   if (form)
6453     {
6454 #if defined HAVE_DW_FORM_line_strp
6455       if (form == DW_FORM_line_strp)
6456 	return true;
6457 #endif
6458     }
6459   return false;
6460 }
6461 
6462 /// Get the value of a DIE attribute; that value is meant to be a
6463 /// flag.
6464 ///
6465 /// @param die the DIE to get the attribute from.
6466 ///
6467 /// @param attr_name the DW_AT_* name of the attribute.  Must come
6468 /// from dwarf.h and be an enumerator representing an attribute like,
6469 /// e.g, DW_AT_external.
6470 ///
6471 /// @param flag the output parameter to store the flag value into.
6472 /// This is set iff the function returns true.
6473 ///
6474 /// @param recursively if true, the function looks through the
6475 /// possible DW_AT_specification and DW_AT_abstract_origin attribute
6476 /// all the way down to the initial DIE that is cloned and look on
6477 /// that DIE to see if it has the @p attr_name attribute.
6478 ///
6479 /// @return true if the DIE has a flag attribute named @p attr_name,
6480 /// false otherwise.
6481 static bool
die_flag_attribute(Dwarf_Die * die,unsigned attr_name,bool & flag,bool recursively=true)6482 die_flag_attribute(Dwarf_Die* die,
6483 		   unsigned attr_name,
6484 		   bool& flag,
6485 		   bool recursively = true)
6486 {
6487   Dwarf_Attribute attr;
6488   if (recursively
6489       ? !dwarf_attr_integrate(die, attr_name, &attr)
6490       : !dwarf_attr(die, attr_name, &attr))
6491     return false;
6492 
6493   bool f = false;
6494   if (dwarf_formflag(&attr, &f))
6495     return false;
6496 
6497   flag = f;
6498   return true;
6499 }
6500 
6501 /// Get the mangled name from a given DIE.
6502 ///
6503 /// @param die the DIE to read the mangled name from.
6504 ///
6505 /// @return the mangled name if it's present in the DIE, or just an
6506 /// empty string if it's not.
6507 static string
die_linkage_name(const Dwarf_Die * die)6508 die_linkage_name(const Dwarf_Die* die)
6509 {
6510   if (!die)
6511     return "";
6512 
6513   string linkage_name = die_string_attribute(die, DW_AT_linkage_name);
6514   if (linkage_name.empty())
6515     linkage_name = die_string_attribute(die, DW_AT_MIPS_linkage_name);
6516   return linkage_name;
6517 }
6518 
6519 /// Get the file path that is the value of the DW_AT_decl_file
6520 /// attribute on a given DIE, if the DIE is a decl DIE having that
6521 /// attribute.
6522 ///
6523 /// @param die the DIE to consider.
6524 ///
6525 /// @return a string containing the file path that is the logical
6526 /// value of the DW_AT_decl_file attribute.  If the DIE @p die
6527 /// doesn't have a DW_AT_decl_file attribute, then the return value is
6528 /// just an empty string.
6529 static string
die_decl_file_attribute(const Dwarf_Die * die)6530 die_decl_file_attribute(const Dwarf_Die* die)
6531 {
6532   if (!die)
6533     return "";
6534 
6535   const char* str = dwarf_decl_file(const_cast<Dwarf_Die*>(die));
6536 
6537   return str ? str : "";
6538 }
6539 
6540 /// Get the value of an attribute which value is supposed to be a
6541 /// reference to a DIE.
6542 ///
6543 /// @param die the DIE to read the value from.
6544 ///
6545 /// @param attr_name the DW_AT_* attribute name to read.
6546 ///
6547 /// @param result the DIE resulting from reading the attribute value.
6548 /// This is set iff the function returns true.
6549 ///
6550 /// @param recursively if true, the function looks through the
6551 /// possible DW_AT_specification and DW_AT_abstract_origin attribute
6552 /// all the way down to the initial DIE that is cloned and look on
6553 /// that DIE to see if it has the @p attr_name attribute.
6554 ///
6555 /// @return true if the DIE @p die contains an attribute named @p
6556 /// attr_name that is a DIE reference, false otherwise.
6557 static bool
die_die_attribute(const Dwarf_Die * die,unsigned attr_name,Dwarf_Die & result,bool recursively)6558 die_die_attribute(const Dwarf_Die* die,
6559 		  unsigned attr_name,
6560 		  Dwarf_Die& result,
6561 		  bool recursively)
6562 {
6563   Dwarf_Attribute attr;
6564   if (recursively
6565       ? !dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr)
6566       : !dwarf_attr(const_cast<Dwarf_Die*>(die), attr_name, &attr))
6567     return false;
6568 
6569   return dwarf_formref_die(&attr, &result);
6570 }
6571 
6572 /// Read and return an addresss class attribute from a given DIE.
6573 ///
6574 /// @param die the DIE to consider.
6575 ///
6576 /// @param attr_name the name of the address class attribute to read
6577 /// the value from.
6578 ///
6579 /// @param the resulting address.
6580 ///
6581 /// @return true iff the attribute could be read, was of the expected
6582 /// address class and could thus be translated into the @p result.
6583 static bool
die_address_attribute(Dwarf_Die * die,unsigned attr_name,Dwarf_Addr & result)6584 die_address_attribute(Dwarf_Die* die, unsigned attr_name, Dwarf_Addr& result)
6585 {
6586   Dwarf_Attribute attr;
6587   if (!dwarf_attr_integrate(die, attr_name, &attr))
6588     return false;
6589   return dwarf_formaddr(&attr, &result) == 0;
6590 }
6591 
6592 /// Returns the source location associated with a decl DIE.
6593 ///
6594 /// @param ctxt the @ref read_context to use.
6595 ///
6596 /// @param die the DIE the read the source location from.
6597 ///
6598 /// @return the location associated with @p die.
6599 static location
die_location(const read_context & ctxt,const Dwarf_Die * die)6600 die_location(const read_context& ctxt, const Dwarf_Die* die)
6601 {
6602   if (!die)
6603     return location();
6604 
6605   string file = die_decl_file_attribute(die);
6606   uint64_t line = 0;
6607   die_unsigned_constant_attribute(die, DW_AT_decl_line, line);
6608 
6609   if (!file.empty() && line != 0)
6610     {
6611       translation_unit_sptr tu = ctxt.cur_transl_unit();
6612       location l = tu->get_loc_mgr().create_new_location(file, line, 1);
6613       return l;
6614     }
6615   return location();
6616 }
6617 
6618 /// Return a copy of the name of a DIE.
6619 ///
6620 /// @param die the DIE to consider.
6621 ///
6622 /// @return a copy of the name of the DIE.
6623 static string
die_name(const Dwarf_Die * die)6624 die_name(const Dwarf_Die* die)
6625 {
6626   string name = die_string_attribute(die, DW_AT_name);
6627   return name;
6628 }
6629 
6630 /// Return the location, the name and the mangled name of a given DIE.
6631 ///
6632 /// @param ctxt the read context to use.
6633 ///
6634 /// @param die the DIE to read location and names from.
6635 ///
6636 /// @param loc the location output parameter to set.
6637 ///
6638 /// @param name the name output parameter to set.
6639 ///
6640 /// @param linkage_name the linkage_name output parameter to set.
6641 static void
die_loc_and_name(const read_context & ctxt,Dwarf_Die * die,location & loc,string & name,string & linkage_name)6642 die_loc_and_name(const read_context&	ctxt,
6643 		 Dwarf_Die*		die,
6644 		 location&		loc,
6645 		 string&		name,
6646 		 string&		linkage_name)
6647 {
6648   loc = die_location(ctxt, die);
6649   name = die_name(die);
6650   linkage_name = die_linkage_name(die);
6651 }
6652 
6653 /// Get the size of a (type) DIE as the value for the parameter
6654 /// DW_AT_byte_size or DW_AT_bit_size.
6655 ///
6656 /// @param die the DIE to read the information from.
6657 ///
6658 /// @param size the resulting size in bits.  This is set iff the
6659 /// function return true.
6660 ///
6661 /// @return true if the size attribute was found.
6662 static bool
die_size_in_bits(const Dwarf_Die * die,uint64_t & size)6663 die_size_in_bits(const Dwarf_Die* die, uint64_t& size)
6664 {
6665   if (!die)
6666     return false;
6667 
6668   uint64_t byte_size = 0, bit_size = 0;
6669 
6670   if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
6671     {
6672       if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
6673 	return false;
6674     }
6675   else
6676     bit_size = byte_size * 8;
6677 
6678   size = bit_size;
6679 
6680   return true;
6681 }
6682 
6683 /// Get the access specifier (from the DW_AT_accessibility attribute
6684 /// value) of a given DIE.
6685 ///
6686 /// @param die the DIE to consider.
6687 ///
6688 /// @param access the resulting access.  This is set iff the function
6689 /// returns true.
6690 ///
6691 /// @return bool if the DIE contains the DW_AT_accessibility die.
6692 static bool
die_access_specifier(Dwarf_Die * die,access_specifier & access)6693 die_access_specifier(Dwarf_Die * die, access_specifier& access)
6694 {
6695   if (!die)
6696     return false;
6697 
6698   uint64_t a = 0;
6699   if (!die_unsigned_constant_attribute(die, DW_AT_accessibility, a))
6700     return false;
6701 
6702   access_specifier result = private_access;
6703 
6704   switch (a)
6705     {
6706     case private_access:
6707       result = private_access;
6708       break;
6709 
6710     case protected_access:
6711       result = protected_access;
6712       break;
6713 
6714     case public_access:
6715       result = public_access;
6716       break;
6717 
6718     default:
6719       break;
6720     }
6721 
6722   access = result;
6723   return true;
6724 }
6725 
6726 /// Test whether a given DIE represents a decl that is public.  That
6727 /// is, one with the DW_AT_external attribute set.
6728 ///
6729 /// @param die the DIE to consider for testing.
6730 ///
6731 /// @return true if a DW_AT_external attribute is present and its
6732 /// value is set to the true; return false otherwise.
6733 static bool
die_is_public_decl(Dwarf_Die * die)6734 die_is_public_decl(Dwarf_Die* die)
6735 {
6736   bool is_public = false;
6737   die_flag_attribute(die, DW_AT_external, is_public);
6738   return is_public;
6739 }
6740 
6741 /// Test whether a given DIE represents a declaration-only DIE.
6742 ///
6743 /// That is, if the DIE has the DW_AT_declaration flag set.
6744 ///
6745 /// @param die the DIE to consider.
6746 //
6747 /// @return true if a DW_AT_declaration is present, false otherwise.
6748 static bool
die_is_declaration_only(Dwarf_Die * die)6749 die_is_declaration_only(Dwarf_Die* die)
6750 {
6751   bool is_declaration_only = false;
6752   die_flag_attribute(die, DW_AT_declaration, is_declaration_only, false);
6753   return is_declaration_only;
6754 }
6755 
6756 /// Tests whether a given DIE is artificial.
6757 ///
6758 /// @param die the test to test for.
6759 ///
6760 /// @return true if the DIE is artificial, false otherwise.
6761 static bool
die_is_artificial(Dwarf_Die * die)6762 die_is_artificial(Dwarf_Die* die)
6763 {
6764   bool is_artificial;
6765   return die_flag_attribute(die, DW_AT_artificial, is_artificial);
6766 }
6767 
6768 ///@return true if a tag represents a type, false otherwise.
6769 ///
6770 ///@param tag the tag to consider.
6771 static bool
is_type_tag(unsigned tag)6772 is_type_tag(unsigned tag)
6773 {
6774   bool result = false;
6775 
6776   switch (tag)
6777     {
6778     case DW_TAG_array_type:
6779     case DW_TAG_class_type:
6780     case DW_TAG_enumeration_type:
6781     case DW_TAG_pointer_type:
6782     case DW_TAG_reference_type:
6783     case DW_TAG_string_type:
6784     case DW_TAG_structure_type:
6785     case DW_TAG_subroutine_type:
6786     case DW_TAG_typedef:
6787     case DW_TAG_union_type:
6788     case DW_TAG_ptr_to_member_type:
6789     case DW_TAG_set_type:
6790     case DW_TAG_subrange_type:
6791     case DW_TAG_base_type:
6792     case DW_TAG_const_type:
6793     case DW_TAG_file_type:
6794     case DW_TAG_packed_type:
6795     case DW_TAG_thrown_type:
6796     case DW_TAG_volatile_type:
6797     case DW_TAG_restrict_type:
6798     case DW_TAG_interface_type:
6799     case DW_TAG_unspecified_type:
6800     case DW_TAG_shared_type:
6801     case DW_TAG_rvalue_reference_type:
6802     case DW_TAG_coarray_type:
6803     case DW_TAG_atomic_type:
6804     case DW_TAG_immutable_type:
6805       result = true;
6806       break;
6807 
6808     default:
6809       result = false;
6810       break;
6811     }
6812 
6813   return result;
6814 }
6815 
6816 /// Test if a given DIE is a type to be canonicalized.  note that a
6817 /// function DIE (DW_TAG_subprogram) is considered to be a
6818 /// canonicalize-able type too because we can consider that DIE as
6819 /// being the type of the function, as well as the function decl
6820 /// itself.
6821 ///
6822 /// @param tag the tag of the DIE to consider.
6823 ///
6824 /// @return true iff the DIE of tag @p tag is a canonicalize-able DIE.
6825 static bool
is_canonicalizeable_type_tag(unsigned tag)6826 is_canonicalizeable_type_tag(unsigned tag)
6827 {
6828   bool result = false;
6829 
6830   switch (tag)
6831     {
6832     case DW_TAG_array_type:
6833     case DW_TAG_class_type:
6834     case DW_TAG_enumeration_type:
6835     case DW_TAG_pointer_type:
6836     case DW_TAG_reference_type:
6837     case DW_TAG_structure_type:
6838     case DW_TAG_subroutine_type:
6839     case DW_TAG_subprogram:
6840     case DW_TAG_typedef:
6841     case DW_TAG_union_type:
6842     case DW_TAG_base_type:
6843     case DW_TAG_const_type:
6844     case DW_TAG_volatile_type:
6845     case DW_TAG_restrict_type:
6846     case DW_TAG_rvalue_reference_type:
6847       result = true;
6848       break;
6849 
6850     default:
6851       result = false;
6852       break;
6853     }
6854 
6855   return result;
6856 }
6857 
6858 /// Test if a DIE tag represents a declaration.
6859 ///
6860 /// @param tag the DWARF tag to consider.
6861 ///
6862 /// @return true iff @p tag is for a declaration.
6863 static bool
is_decl_tag(unsigned tag)6864 is_decl_tag(unsigned tag)
6865 {
6866   switch (tag)
6867     {
6868     case DW_TAG_formal_parameter:
6869     case DW_TAG_imported_declaration:
6870     case DW_TAG_member:
6871     case DW_TAG_unspecified_parameters:
6872     case DW_TAG_subprogram:
6873     case DW_TAG_variable:
6874     case DW_TAG_namespace:
6875     case DW_TAG_GNU_template_template_param:
6876     case DW_TAG_GNU_template_parameter_pack:
6877     case DW_TAG_GNU_formal_parameter_pack:
6878       return true;
6879     }
6880   return false;
6881 }
6882 
6883 /// Test if a DIE represents a type DIE.
6884 ///
6885 /// @param die the DIE to consider.
6886 ///
6887 /// @return true if @p die represents a type, false otherwise.
6888 static bool
die_is_type(const Dwarf_Die * die)6889 die_is_type(const Dwarf_Die* die)
6890 {
6891   if (!die)
6892     return false;
6893   return is_type_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
6894 }
6895 
6896 /// Test if a DIE represents a declaration.
6897 ///
6898 /// @param die the DIE to consider.
6899 ///
6900 /// @return true if @p die represents a decl, false otherwise.
6901 static bool
die_is_decl(const Dwarf_Die * die)6902 die_is_decl(const Dwarf_Die* die)
6903 {
6904   if (!die)
6905     return false;
6906   return is_decl_tag(dwarf_tag(const_cast<Dwarf_Die*>(die)));
6907 }
6908 
6909 /// Test if a DIE represents a namespace.
6910 ///
6911 /// @param die the DIE to consider.
6912 ///
6913 /// @return true if @p die represents a namespace, false otherwise.
6914 static bool
die_is_namespace(const Dwarf_Die * die)6915 die_is_namespace(const Dwarf_Die* die)
6916 {
6917   if (!die)
6918     return false;
6919   return (dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_namespace);
6920 }
6921 
6922 /// Test if a DIE has tag DW_TAG_unspecified_type.
6923 ///
6924 /// @param die the DIE to consider.
6925 ///
6926 /// @return true if @p die has tag DW_TAG_unspecified_type.
6927 static bool
die_is_unspecified(Dwarf_Die * die)6928 die_is_unspecified(Dwarf_Die* die)
6929 {
6930   if (!die)
6931     return false;
6932   return (dwarf_tag(die) == DW_TAG_unspecified_type);
6933 }
6934 
6935 /// Test if a DIE represents a void type.
6936 ///
6937 /// @param die the DIE to consider.
6938 ///
6939 /// @return true if @p die represents a void type, false otherwise.
6940 static bool
die_is_void_type(Dwarf_Die * die)6941 die_is_void_type(Dwarf_Die* die)
6942 {
6943   if (!die || dwarf_tag(die) != DW_TAG_base_type)
6944     return false;
6945 
6946   string name = die_name(die);
6947   if (name == "void")
6948     return true;
6949 
6950   return false;
6951 }
6952 
6953 /// Test if a DIE represents a pointer type.
6954 ///
6955 /// @param die the die to consider.
6956 ///
6957 /// @return true iff @p die represents a pointer type.
6958 static bool
die_is_pointer_type(const Dwarf_Die * die)6959 die_is_pointer_type(const Dwarf_Die* die)
6960 {
6961   if (!die)
6962     return false;
6963 
6964   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
6965   if (tag == DW_TAG_pointer_type)
6966     return true;
6967 
6968   return false;
6969 }
6970 
6971 /// Test if a DIE is for a pointer, reference or qualified type to
6972 /// anonymous class or struct.
6973 ///
6974 /// @param die the DIE to consider.
6975 ///
6976 /// @return true iff @p is for a pointer, reference or qualified type
6977 /// to anonymous class or struct.
6978 static bool
pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die * die)6979 pointer_or_qual_die_of_anonymous_class_type(const Dwarf_Die* die)
6980 {
6981   if (!die_is_pointer_or_reference_type(die)
6982       && !die_is_qualified_type(die))
6983     return false;
6984 
6985   Dwarf_Die underlying_type_die;
6986   if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
6987     return false;
6988 
6989   if (!die_is_class_type(&underlying_type_die))
6990     return false;
6991 
6992   string name = die_name(&underlying_type_die);
6993 
6994   return name.empty();
6995 }
6996 
6997 /// Test if a DIE represents a reference type.
6998 ///
6999 /// @param die the die to consider.
7000 ///
7001 /// @return true iff @p die represents a reference type.
7002 static bool
die_is_reference_type(const Dwarf_Die * die)7003 die_is_reference_type(const Dwarf_Die* die)
7004 {
7005   if (!die)
7006     return false;
7007 
7008   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7009   if (tag == DW_TAG_reference_type || tag == DW_TAG_rvalue_reference_type)
7010     return true;
7011 
7012   return false;
7013 }
7014 
7015 /// Test if a DIE represents an array type.
7016 ///
7017 /// @param die the die to consider.
7018 ///
7019 /// @return true iff @p die represents an array type.
7020 static bool
die_is_array_type(const Dwarf_Die * die)7021 die_is_array_type(const Dwarf_Die* die)
7022 {
7023   if (!die)
7024     return false;
7025 
7026   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7027   if (tag == DW_TAG_array_type)
7028     return true;
7029 
7030   return false;
7031 }
7032 
7033 /// Test if a DIE represents a pointer, reference or array type.
7034 ///
7035 /// @param die the die to consider.
7036 ///
7037 /// @return true iff @p die represents a pointer or reference type.
7038 static bool
die_is_pointer_or_reference_type(const Dwarf_Die * die)7039 die_is_pointer_or_reference_type(const Dwarf_Die* die)
7040 {return (die_is_pointer_type(die)
7041 	 || die_is_reference_type(die)
7042 	 || die_is_array_type(die));}
7043 
7044 /// Test if a DIE represents a pointer, a reference or a typedef type.
7045 ///
7046 /// @param die the die to consider.
7047 ///
7048 /// @return true iff @p die represents a pointer, a reference or a
7049 /// typedef type.
7050 static bool
die_is_pointer_reference_or_typedef_type(const Dwarf_Die * die)7051 die_is_pointer_reference_or_typedef_type(const Dwarf_Die* die)
7052 {return (die_is_pointer_or_reference_type(die)
7053 	 || dwarf_tag(const_cast<Dwarf_Die*>(die)) == DW_TAG_typedef);}
7054 
7055 /// Test if a DIE represents a class type.
7056 ///
7057 /// @param die the die to consider.
7058 ///
7059 /// @return true iff @p die represents a class type.
7060 static bool
die_is_class_type(const Dwarf_Die * die)7061 die_is_class_type(const Dwarf_Die* die)
7062 {
7063   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7064 
7065   if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
7066     return true;
7067 
7068   return false;
7069 }
7070 
7071 /// Test if a DIE is for a qualified type.
7072 ///
7073 /// @param die the DIE to consider.
7074 ///
7075 /// @return true iff @p die is for a qualified type.
7076 static bool
die_is_qualified_type(const Dwarf_Die * die)7077 die_is_qualified_type(const Dwarf_Die* die)
7078 {
7079   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7080     if (tag == DW_TAG_const_type
7081 	|| tag == DW_TAG_volatile_type
7082 	|| tag == DW_TAG_restrict_type)
7083       return true;
7084 
7085     return false;
7086 }
7087 
7088 /// Test if a DIE is for a function type.
7089 ///
7090 /// @param die the DIE to consider.
7091 ///
7092 /// @return true iff @p die is for a function type.
7093 static bool
die_is_function_type(const Dwarf_Die * die)7094 die_is_function_type(const Dwarf_Die *die)
7095 {
7096   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7097   if (tag == DW_TAG_subprogram || tag == DW_TAG_subroutine_type)
7098     return true;
7099 
7100   return false;
7101 }
7102 
7103 /// Test if a DIE for a function pointer or member function has an
7104 /// DW_AT_object_pointer attribute.
7105 ///
7106 /// @param die the DIE to consider.
7107 ///
7108 /// @param object_pointer out parameter.  It's set to the DIE for the
7109 /// object pointer iff the function returns true.
7110 ///
7111 /// @return true iff the DIE @p die has an object pointer.  In that
7112 /// case, the parameter @p object_pointer is set to the DIE of that
7113 /// object pointer.
7114 static bool
die_has_object_pointer(const Dwarf_Die * die,Dwarf_Die & object_pointer)7115 die_has_object_pointer(const Dwarf_Die* die, Dwarf_Die& object_pointer)
7116 {
7117   if (!die)
7118     return false;
7119 
7120   if (die_die_attribute(die, DW_AT_object_pointer, object_pointer))
7121     return true;
7122 
7123   return false;
7124 }
7125 
7126 /// Test if a DIE has children DIEs.
7127 ///
7128 /// @param die the DIE to consider.
7129 ///
7130 /// @return true iff @p DIE has at least one child node.
7131 static bool
die_has_children(const Dwarf_Die * die)7132 die_has_children(const Dwarf_Die* die)
7133 {
7134   if (!die)
7135     return false;
7136 
7137   Dwarf_Die child;
7138   if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
7139     return true;
7140 
7141   return false;
7142 }
7143 
7144 /// When given the object pointer DIE of a function type or member
7145 /// function DIE, this function returns the "this" pointer that points
7146 /// to the associated class.
7147 ///
7148 /// @param die the DIE of the object pointer of the function or member
7149 /// function to consider.
7150 ///
7151 /// @param this_pointer_die out parameter.  This is set to the DIE of
7152 /// the "this" pointer iff the function returns true.
7153 ///
7154 /// @return true iff the function found the "this" pointer from the
7155 /// object pointer DIE @p die.  In that case, the parameter @p
7156 /// this_pointer_die is set to the DIE of that "this" pointer.
7157 static bool
die_this_pointer_from_object_pointer(Dwarf_Die * die,Dwarf_Die & this_pointer_die)7158 die_this_pointer_from_object_pointer(Dwarf_Die* die,
7159 				     Dwarf_Die& this_pointer_die)
7160 {
7161   ABG_ASSERT(die);
7162   ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
7163 
7164   if (die_die_attribute(die, DW_AT_type, this_pointer_die))
7165     return true;
7166 
7167   return false;
7168 }
7169 
7170 /// Test if a given "this" pointer that points to a particular class
7171 /// type is for a const class or not.  If it's for a const class, then
7172 /// it means the function type or the member function associated to
7173 /// that "this" pointer is const.
7174 ///
7175 /// @param die the DIE of the "this" pointer to consider.
7176 ///
7177 /// @return true iff @p die points to a const class type.
7178 static bool
die_this_pointer_is_const(Dwarf_Die * die)7179 die_this_pointer_is_const(Dwarf_Die* die)
7180 {
7181   ABG_ASSERT(die);
7182 
7183   if (dwarf_tag(die) == DW_TAG_pointer_type)
7184     {
7185       Dwarf_Die pointed_to_type_die;
7186       if (die_die_attribute(die, DW_AT_type, pointed_to_type_die))
7187 	if (dwarf_tag(&pointed_to_type_die) == DW_TAG_const_type)
7188 	  return true;
7189     }
7190 
7191   return false;
7192 }
7193 
7194 /// Test if an object pointer (referred-to via a DW_AT_object_pointer
7195 /// attribute) points to a const implicit class and so is for a const
7196 /// method or or a const member function type.
7197 ///
7198 /// @param die the DIE of the object pointer to consider.
7199 ///
7200 /// @return true iff the object pointer represented by @p die is for a
7201 /// a const method or const member function type.
7202 static bool
die_object_pointer_is_for_const_method(Dwarf_Die * die)7203 die_object_pointer_is_for_const_method(Dwarf_Die* die)
7204 {
7205   ABG_ASSERT(die);
7206   ABG_ASSERT(dwarf_tag(die) == DW_TAG_formal_parameter);
7207 
7208   Dwarf_Die this_pointer_die;
7209   if (die_this_pointer_from_object_pointer(die, this_pointer_die))
7210     if (die_this_pointer_is_const(&this_pointer_die))
7211       return true;
7212 
7213   return false;
7214 }
7215 
7216 /// Test if a DIE represents an entity that is at class scope.
7217 ///
7218 /// @param ctxt the read context to use.
7219 ///
7220 /// @param die the DIE to consider.
7221 ///
7222 /// @param where_offset where we are logically at in the DIE stream.
7223 ///
7224 /// @param class_scope_die out parameter.  Set to the DIE of the
7225 /// containing class iff @p die happens to be at class scope; that is,
7226 /// iff the function returns true.
7227 ///
7228 /// @return true iff @p die is at class scope.  In that case, @p
7229 /// class_scope_die is set to the DIE of the class that contains @p
7230 /// die.
7231 static bool
die_is_at_class_scope(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & class_scope_die)7232 die_is_at_class_scope(const read_context& ctxt,
7233 		      const Dwarf_Die* die,
7234 		      size_t where_offset,
7235 		      Dwarf_Die& class_scope_die)
7236 {
7237   if (!get_scope_die(ctxt, die, where_offset, class_scope_die))
7238     return false;
7239 
7240   int tag = dwarf_tag(&class_scope_die);
7241 
7242   return (tag == DW_TAG_structure_type
7243 	  || tag == DW_TAG_class_type
7244 	  || tag == DW_TAG_union_type);
7245 }
7246 
7247 /// Return the leaf object under a pointer, reference or qualified
7248 /// type DIE.
7249 ///
7250 /// @param die the DIE of the type to consider.
7251 ///
7252 /// @param peeled_die out parameter.  Set to the DIE of the leaf
7253 /// object iff the function actually peeled anything.
7254 ///
7255 /// @return true upon successful completion.
7256 static bool
die_peel_qual_ptr(Dwarf_Die * die,Dwarf_Die & peeled_die)7257 die_peel_qual_ptr(Dwarf_Die *die, Dwarf_Die& peeled_die)
7258 {
7259   if (!die)
7260     return false;
7261 
7262   int tag = dwarf_tag(die);
7263 
7264   if (tag == DW_TAG_const_type
7265       || tag == DW_TAG_volatile_type
7266       || tag == DW_TAG_restrict_type
7267       || tag == DW_TAG_pointer_type
7268       || tag == DW_TAG_reference_type
7269       || tag == DW_TAG_rvalue_reference_type)
7270     {
7271       if (!die_die_attribute(die, DW_AT_type, peeled_die))
7272 	return false;
7273     }
7274   else
7275     return false;
7276 
7277   while (tag == DW_TAG_const_type
7278 	 || tag == DW_TAG_volatile_type
7279 	 || tag == DW_TAG_restrict_type
7280 	 || tag == DW_TAG_pointer_type
7281 	 || tag == DW_TAG_reference_type
7282 	 || tag == DW_TAG_rvalue_reference_type)
7283     {
7284       if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7285 	break;
7286       tag = dwarf_tag(&peeled_die);
7287     }
7288 
7289   return true;
7290 }
7291 
7292 /// Return the leaf object under a typedef type DIE.
7293 ///
7294 /// @param die the DIE of the type to consider.
7295 ///
7296 /// @param peeled_die out parameter.  Set to the DIE of the leaf
7297 /// object iff the function actually peeled anything.
7298 ///
7299 /// @return true upon successful completion.
7300 static bool
die_peel_typedef(Dwarf_Die * die,Dwarf_Die & peeled_die)7301 die_peel_typedef(Dwarf_Die *die, Dwarf_Die& peeled_die)
7302 {
7303   if (!die)
7304     return false;
7305 
7306   int tag = dwarf_tag(die);
7307 
7308   if (tag == DW_TAG_typedef)
7309     {
7310       if (!die_die_attribute(die, DW_AT_type, peeled_die))
7311 	return false;
7312     }
7313   else
7314     return false;
7315 
7316   while (tag == DW_TAG_typedef)
7317     {
7318       if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7319 	break;
7320       tag = dwarf_tag(&peeled_die);
7321     }
7322 
7323   return true;
7324 
7325 }
7326 
7327 /// Return the leaf DIE under a pointer, a reference or a typedef DIE.
7328 ///
7329 /// @param die the DIE to consider.
7330 ///
7331 /// @param peeled_die the resulting peeled (or leaf) DIE.  This is set
7332 /// iff the function returned true.
7333 ///
7334 /// @return true iff the function could peel @p die.
7335 static bool
die_peel_pointer_and_typedef(const Dwarf_Die * die,Dwarf_Die & peeled_die)7336 die_peel_pointer_and_typedef(const Dwarf_Die *die, Dwarf_Die& peeled_die)
7337 {
7338   if (!die)
7339     return false;
7340 
7341   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7342 
7343   if (tag == DW_TAG_pointer_type
7344       || tag == DW_TAG_reference_type
7345       || tag == DW_TAG_rvalue_reference_type
7346       || tag == DW_TAG_typedef)
7347     {
7348       if (!die_die_attribute(die, DW_AT_type, peeled_die))
7349 	return false;
7350     }
7351   else
7352     return false;
7353 
7354   while (tag == DW_TAG_pointer_type
7355 	 || tag == DW_TAG_reference_type
7356 	 || tag == DW_TAG_rvalue_reference_type
7357 	 || tag == DW_TAG_typedef)
7358     {
7359       if (!die_die_attribute(&peeled_die, DW_AT_type, peeled_die))
7360 	break;
7361       tag = dwarf_tag(&peeled_die);
7362     }
7363   return true;
7364 }
7365 
7366 /// Test if a DIE for a function type represents a method type.
7367 ///
7368 /// @param ctxt the read context.
7369 ///
7370 /// @param die the DIE to consider.
7371 ///
7372 /// @param where_offset where we logically are in the stream of DIEs.
7373 ///
7374 /// @param object_pointer_die out parameter.  This is set by the
7375 /// function to the DIE that refers to the formal function parameter
7376 /// which holds the implicit "this" pointer of the method.  That die
7377 /// is called the object pointer DIE. This is set iff the function
7378 ///
7379 /// @param class_die out parameter.  This is set by the function to
7380 /// the DIE that represents the class of the method type.  This is set
7381 /// iff the function returns true.
7382 ///
7383 /// @param is_static out parameter.  This is set to true by the
7384 /// function if @p die is a static method.  This is set iff the
7385 /// function returns true.
7386 ///
7387 /// @return true iff @p die is a DIE for a method type.
7388 static bool
die_function_type_is_method_type(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & object_pointer_die,Dwarf_Die & class_die,bool & is_static)7389 die_function_type_is_method_type(const read_context& ctxt,
7390 				 const Dwarf_Die *die,
7391 				 size_t where_offset,
7392 				 Dwarf_Die& object_pointer_die,
7393 				 Dwarf_Die& class_die,
7394 				 bool& is_static)
7395 {
7396   if (!die)
7397     return false;
7398 
7399   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
7400   ABG_ASSERT(tag == DW_TAG_subroutine_type || tag == DW_TAG_subprogram);
7401 
7402   bool has_object_pointer = false;
7403   is_static = false;
7404   if (tag == DW_TAG_subprogram)
7405     {
7406       Dwarf_Die spec_or_origin_die;
7407       if (die_die_attribute(die, DW_AT_specification,
7408 			    spec_or_origin_die)
7409 	  || die_die_attribute(die, DW_AT_abstract_origin,
7410 			       spec_or_origin_die))
7411 	{
7412 	  if (die_has_object_pointer(&spec_or_origin_die,
7413 				     object_pointer_die))
7414 	    has_object_pointer = true;
7415 	  else
7416 	    {
7417 	      if (die_is_at_class_scope(ctxt, &spec_or_origin_die,
7418 					where_offset, class_die))
7419 		is_static = true;
7420 	      else
7421 		return false;
7422 	    }
7423 	}
7424       else
7425 	{
7426 	  if (die_has_object_pointer(die, object_pointer_die))
7427 	    has_object_pointer = true;
7428 	  else
7429 	    {
7430 	      if (die_is_at_class_scope(ctxt, die, where_offset, class_die))
7431 		is_static = true;
7432 	      else
7433 		return false;
7434 	    }
7435 	}
7436     }
7437   else
7438     {
7439       if (die_has_object_pointer(die, object_pointer_die))
7440 	has_object_pointer = true;
7441       else
7442 	return false;
7443     }
7444 
7445   if (!is_static)
7446     {
7447       ABG_ASSERT(has_object_pointer);
7448       // The object pointer die points to a DW_TAG_formal_parameter which
7449       // is the "this" parameter.  The type of the "this" parameter is a
7450       // pointer.  Let's get that pointer type.
7451       Dwarf_Die this_type_die;
7452       if (!die_die_attribute(&object_pointer_die, DW_AT_type, this_type_die))
7453 	return false;
7454 
7455       // So the class type is the type pointed to by the type of the "this"
7456       // parameter.
7457       if (!die_peel_qual_ptr(&this_type_die, class_die))
7458 	return false;
7459 
7460       // And make we return a class type, rather than a typedef to a
7461       // class.
7462       die_peel_typedef(&class_die, class_die);
7463     }
7464 
7465   return true;
7466 }
7467 
7468 enum virtuality
7469 {
7470   VIRTUALITY_NOT_VIRTUAL,
7471   VIRTUALITY_VIRTUAL,
7472   VIRTUALITY_PURE_VIRTUAL
7473 };
7474 
7475 /// Get the virtual-ness of a given DIE, that is, the value of the
7476 /// DW_AT_virtuality attribute.
7477 ///
7478 /// @param die the DIE to read from.
7479 ///
7480 /// @param virt the resulting virtuality attribute.  This is set iff
7481 /// the function returns true.
7482 ///
7483 /// @return true if the virtual-ness could be determined.
7484 static bool
die_virtuality(const Dwarf_Die * die,virtuality & virt)7485 die_virtuality(const Dwarf_Die* die, virtuality& virt)
7486 {
7487   if (!die)
7488     return false;
7489 
7490   uint64_t v = 0;
7491   die_unsigned_constant_attribute(die, DW_AT_virtuality, v);
7492 
7493   if (v == DW_VIRTUALITY_virtual)
7494     virt = VIRTUALITY_VIRTUAL;
7495   else if (v == DW_VIRTUALITY_pure_virtual)
7496     virt = VIRTUALITY_PURE_VIRTUAL;
7497   else
7498     virt = VIRTUALITY_NOT_VIRTUAL;
7499 
7500   return true;
7501 }
7502 
7503 /// Test whether the DIE represent either a virtual base or function.
7504 ///
7505 /// @param die the DIE to consider.
7506 ///
7507 /// @return bool if the DIE represents a virtual base or function,
7508 /// false othersise.
7509 static bool
die_is_virtual(const Dwarf_Die * die)7510 die_is_virtual(const Dwarf_Die* die)
7511 {
7512   virtuality v;
7513   if (!die_virtuality(die, v))
7514     return false;
7515 
7516   return v == VIRTUALITY_PURE_VIRTUAL || v == VIRTUALITY_VIRTUAL;
7517 }
7518 
7519 /// Test if the DIE represents an entity that was declared inlined.
7520 ///
7521 /// @param die the DIE to test for.
7522 ///
7523 /// @return true if the DIE represents an entity that was declared
7524 /// inlined.
7525 static bool
die_is_declared_inline(Dwarf_Die * die)7526 die_is_declared_inline(Dwarf_Die* die)
7527 {
7528   uint64_t inline_value = 0;
7529   if (!die_unsigned_constant_attribute(die, DW_AT_inline, inline_value))
7530     return false;
7531   return inline_value == DW_INL_declared_inlined;
7532 }
7533 
7534 /// This function is a fast routine (optimization) to compare the
7535 /// values of two string attributes of two DIEs.
7536 ///
7537 /// @param l the first DIE to consider.
7538 ///
7539 /// @param r the second DIE to consider.
7540 ///
7541 /// @param attr_name the name of the attribute to compare, on the two
7542 /// DIEs above.
7543 ///
7544 /// @param result out parameter.  This is set to the result of the
7545 /// comparison.  If the value of attribute @p attr_name on DIE @p l
7546 /// equals the value of attribute @p attr_name on DIE @p r, then the
7547 /// the argument of this parameter is set to true.  Otherwise, it's
7548 /// set to false.  Note that the argument of this parameter is set iff
7549 /// the function returned true.
7550 ///
7551 /// @return true iff the comparison could be performed.  There are
7552 /// cases in which the comparison cannot be performed.  For instance,
7553 /// if one of the DIEs does not have the attribute @p attr_name.  In
7554 /// any case, if this function returns true, then the parameter @p
7555 /// result is set to the result of the comparison.
7556 static bool
compare_dies_string_attribute_value(const Dwarf_Die * l,const Dwarf_Die * r,unsigned attr_name,bool & result)7557 compare_dies_string_attribute_value(const Dwarf_Die *l, const Dwarf_Die *r,
7558 				    unsigned attr_name,
7559 				    bool &result)
7560 {
7561   Dwarf_Attribute l_attr, r_attr;
7562   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(l), attr_name, &l_attr)
7563       || !dwarf_attr_integrate(const_cast<Dwarf_Die*>(r), attr_name, &r_attr))
7564     return false;
7565 
7566   ABG_ASSERT(l_attr.form == DW_FORM_strp
7567 	     || l_attr.form == DW_FORM_string
7568 	     || l_attr.form == DW_FORM_GNU_strp_alt
7569 	     || form_is_DW_FORM_strx(l_attr.form)
7570 	     || form_is_DW_FORM_line_strp(l_attr.form));
7571 
7572   ABG_ASSERT(r_attr.form == DW_FORM_strp
7573 	     || r_attr.form == DW_FORM_string
7574 	     || r_attr.form == DW_FORM_GNU_strp_alt
7575 	     || form_is_DW_FORM_strx(r_attr.form)
7576 	     || form_is_DW_FORM_line_strp(r_attr.form));
7577 
7578   if ((l_attr.form == DW_FORM_strp
7579        && r_attr.form == DW_FORM_strp)
7580       || (l_attr.form == DW_FORM_GNU_strp_alt
7581 	  && r_attr.form == DW_FORM_GNU_strp_alt)
7582       || (form_is_DW_FORM_strx(l_attr.form)
7583 	  && form_is_DW_FORM_strx(r_attr.form))
7584       || (form_is_DW_FORM_line_strp(l_attr.form)
7585 	  && form_is_DW_FORM_line_strp(r_attr.form)))
7586     {
7587       // So these string attributes are actually pointers into a
7588       // string table.  The string table is most likely de-duplicated
7589       // so comparing the *values* of the pointers should be enough.
7590       //
7591       // This is the fast path.
7592       if (l_attr.valp == r_attr.valp)
7593 	  result = true;
7594       else if (l_attr.valp && r_attr.valp)
7595 	result = *l_attr.valp == *r_attr.valp;
7596       else
7597 	result = false;
7598       return true;
7599     }
7600 
7601   // If we reached this point it means we couldn't use the fast path
7602   // because the string atttributes are strings that are "inline" in
7603   // the debug info section.  Let's just compare them the slow and
7604   // obvious way.
7605   string l_str = die_string_attribute(l, attr_name),
7606     r_str = die_string_attribute(r, attr_name);
7607   result = l_str == r_str;
7608 
7609   return true;
7610 }
7611 
7612 /// Compare the file path of the compilation units (aka CUs)
7613 /// associated to two DIEs.
7614 ///
7615 /// If the DIEs are for pointers or typedefs, this function also
7616 /// compares the file paths of the CUs of the leaf DIEs (underlying
7617 /// DIEs of the pointer or the typedef).
7618 ///
7619 /// @param l the first type DIE to consider.
7620 ///
7621 /// @param r the second type DIE to consider.
7622 ///
7623 /// @return true iff the file paths of the DIEs of the two types are
7624 /// equal.
7625 static bool
compare_dies_cu_decl_file(const Dwarf_Die * l,const Dwarf_Die * r,bool & result)7626 compare_dies_cu_decl_file(const Dwarf_Die* l, const Dwarf_Die *r, bool &result)
7627 {
7628   Dwarf_Die l_cu, r_cu;
7629   if (!dwarf_diecu(const_cast<Dwarf_Die*>(l), &l_cu, 0, 0)
7630       ||!dwarf_diecu(const_cast<Dwarf_Die*>(r), &r_cu, 0, 0))
7631     return false;
7632 
7633   bool compared =
7634     compare_dies_string_attribute_value(&l_cu, &r_cu,
7635 					DW_AT_name,
7636 					result);
7637   if (compared)
7638     {
7639       Dwarf_Die peeled_l, peeled_r;
7640       if (die_is_pointer_reference_or_typedef_type(l)
7641 	  && die_is_pointer_reference_or_typedef_type(r)
7642 	  && die_peel_pointer_and_typedef(l, peeled_l)
7643 	  && die_peel_pointer_and_typedef(r, peeled_r))
7644 	{
7645 	  if (!dwarf_diecu(&peeled_l, &l_cu, 0, 0)
7646 	      ||!dwarf_diecu(&peeled_r, &r_cu, 0, 0))
7647 	    return false;
7648 	  compared =
7649 	    compare_dies_string_attribute_value(&l_cu, &r_cu,
7650 						DW_AT_name,
7651 						result);
7652 	}
7653     }
7654 
7655   return  compared;
7656 }
7657 
7658 // -----------------------------------
7659 // <location expression evaluation>
7660 // -----------------------------------
7661 
7662 /// Get the value of a given DIE attribute, knowing that it must be a
7663 /// location expression.
7664 ///
7665 /// @param die the DIE to read the attribute from.
7666 ///
7667 /// @param attr_name the name of the attribute to read the value for.
7668 ///
7669 /// @param expr the pointer to allocate and fill with the resulting
7670 /// array of operators + operands forming a dwarf expression.  This is
7671 /// set iff the function returns true.
7672 ///
7673 /// @param expr_len the length of the resulting dwarf expression.
7674 /// This is set iff the function returns true.
7675 ///
7676 /// @return true if the attribute exists and has a non-empty dwarf expression
7677 /// as value.  In that case the expr and expr_len arguments are set to the
7678 /// resulting dwarf expression.
7679 static bool
die_location_expr(const Dwarf_Die * die,unsigned attr_name,Dwarf_Op ** expr,uint64_t * expr_len)7680 die_location_expr(const Dwarf_Die* die,
7681 		  unsigned attr_name,
7682 		  Dwarf_Op** expr,
7683 		  uint64_t* expr_len)
7684 {
7685   if (!die)
7686     return false;
7687 
7688   Dwarf_Attribute attr;
7689   if (!dwarf_attr_integrate(const_cast<Dwarf_Die*>(die), attr_name, &attr))
7690     return false;
7691 
7692   size_t len = 0;
7693   bool result = (dwarf_getlocation(&attr, expr, &len) == 0);
7694 
7695   // Ignore location expressions where reading them succeeded but
7696   // their length is 0.
7697   result &= len > 0;
7698 
7699   if (result)
7700     *expr_len = len;
7701 
7702   return result;
7703 }
7704 
7705 /// If the current operation in the dwarf expression represents a push
7706 /// of a constant value onto the dwarf expr virtual machine (aka
7707 /// DEVM), perform the operation and update the DEVM.
7708 ///
7709 /// If the result of the operation is a constant, update the DEVM
7710 /// accumulator with its value.  Otherwise, the DEVM accumulator is
7711 /// left with its previous value.
7712 ///
7713 /// @param ops the array of the dwarf expression operations to consider.
7714 ///
7715 /// @param ops_len the lengths of @p ops array above.
7716 ///
7717 /// @param index the index of the operation to interpret, in @p ops.
7718 ///
7719 /// @param next_index the index of the operation to interpret at the
7720 /// next step, after this function completed and returned.  This is
7721 /// set an output parameter that is set iff the function returns true.
7722 ///
7723 /// @param ctxt the DEVM evaluation context.
7724 ///
7725 /// @return true if the current operation actually pushes a constant
7726 /// value onto the DEVM stack, false otherwise.
7727 static bool
op_pushes_constant_value(Dwarf_Op * ops,uint64_t ops_len,uint64_t index,uint64_t & next_index,dwarf_expr_eval_context & ctxt)7728 op_pushes_constant_value(Dwarf_Op*			ops,
7729 			 uint64_t			ops_len,
7730 			 uint64_t			index,
7731 			 uint64_t&			next_index,
7732 			 dwarf_expr_eval_context&	ctxt)
7733 {
7734   ABG_ASSERT(index < ops_len);
7735 
7736   Dwarf_Op& op = ops[index];
7737   int64_t value = 0;
7738 
7739   switch (op.atom)
7740     {
7741     case DW_OP_addr:
7742       value = ops[index].number;
7743       break;
7744 
7745     case DW_OP_const1u:
7746     case DW_OP_const1s:
7747     case DW_OP_const2u:
7748     case DW_OP_const2s:
7749     case DW_OP_const4u:
7750     case DW_OP_const4s:
7751     case DW_OP_const8u:
7752     case DW_OP_const8s:
7753     case DW_OP_constu:
7754     case DW_OP_consts:
7755       value = ops[index].number;
7756       break;
7757 
7758     case DW_OP_lit0:
7759       value = 0;
7760       break;
7761     case DW_OP_lit1:
7762       value = 1;
7763       break;
7764     case DW_OP_lit2:
7765       value = 2;
7766       break;
7767     case DW_OP_lit3:
7768       value = 3;
7769       break;
7770     case DW_OP_lit4:
7771       value = 4;
7772       break;
7773     case DW_OP_lit5:
7774       value = 5;
7775       break;
7776     case DW_OP_lit6:
7777       value = 6;
7778       break;
7779     case DW_OP_lit7:
7780       value = 7;
7781       break;
7782     case DW_OP_lit8:
7783       value = 8;
7784       break;
7785     case DW_OP_lit9:
7786       value = 9;
7787       break;
7788     case DW_OP_lit10:
7789       value = 10;
7790       break;
7791     case DW_OP_lit11:
7792       value = 11;
7793       break;
7794     case DW_OP_lit12:
7795       value = 12;
7796       break;
7797     case DW_OP_lit13:
7798       value = 13;
7799       break;
7800     case DW_OP_lit14:
7801       value = 14;
7802       break;
7803     case DW_OP_lit15:
7804       value = 15;
7805       break;
7806     case DW_OP_lit16:
7807       value = 16;
7808       break;
7809     case DW_OP_lit17:
7810       value = 17;
7811       break;
7812     case DW_OP_lit18:
7813       value = 18;
7814       break;
7815     case DW_OP_lit19:
7816       value = 19;
7817       break;
7818     case DW_OP_lit20:
7819       value = 20;
7820       break;
7821     case DW_OP_lit21:
7822       value = 21;
7823       break;
7824     case DW_OP_lit22:
7825       value = 22;
7826       break;
7827     case DW_OP_lit23:
7828       value = 23;
7829       break;
7830     case DW_OP_lit24:
7831       value = 24;
7832       break;
7833     case DW_OP_lit25:
7834       value = 25;
7835       break;
7836     case DW_OP_lit26:
7837       value = 26;
7838       break;
7839     case DW_OP_lit27:
7840       value = 27;
7841       break;
7842     case DW_OP_lit28:
7843       value = 28;
7844       break;
7845     case DW_OP_lit29:
7846       value = 29;
7847       break;
7848     case DW_OP_lit30:
7849       value = 30;
7850       break;
7851     case DW_OP_lit31:
7852       value = 31;
7853       break;
7854 
7855     default:
7856       return false;
7857     }
7858 
7859   expr_result r(value);
7860   ctxt.push(r);
7861   ctxt.accum = r;
7862   next_index = index + 1;
7863 
7864   return true;
7865 }
7866 
7867 /// If the current operation in the dwarf expression represents a push
7868 /// of a non-constant value onto the dwarf expr virtual machine (aka
7869 /// DEVM), perform the operation and update the DEVM.  A non-constant
7870 /// is namely a quantity for which we need inferior (a running program
7871 /// image) state to know the exact value.
7872 ///
7873 /// Upon successful completion, as the result of the operation is a
7874 /// non-constant the DEVM accumulator value is left to its state as of
7875 /// before the invocation of this function.
7876 ///
7877 /// @param ops the array of the dwarf expression operations to consider.
7878 ///
7879 /// @param ops_len the lengths of @p ops array above.
7880 ///
7881 /// @param index the index of the operation to interpret, in @p ops.
7882 ///
7883 /// @param next_index the index of the operation to interpret at the
7884 /// next step, after this function completed and returned.  This is
7885 /// set an output parameter that is set iff the function returns true.
7886 ///
7887 /// @param ctxt the DEVM evaluation context.
7888 ///
7889 /// @return true if the current operation actually pushes a
7890 /// non-constant value onto the DEVM stack, false otherwise.
7891 static bool
op_pushes_non_constant_value(Dwarf_Op * ops,uint64_t ops_len,uint64_t index,uint64_t & next_index,dwarf_expr_eval_context & ctxt)7892 op_pushes_non_constant_value(Dwarf_Op* ops,
7893 			     uint64_t ops_len,
7894 			     uint64_t index,
7895 			     uint64_t& next_index,
7896 			     dwarf_expr_eval_context& ctxt)
7897 {
7898   ABG_ASSERT(index < ops_len);
7899   Dwarf_Op& op = ops[index];
7900 
7901   switch (op.atom)
7902     {
7903     case DW_OP_reg0:
7904     case DW_OP_reg1:
7905     case DW_OP_reg2:
7906     case DW_OP_reg3:
7907     case DW_OP_reg4:
7908     case DW_OP_reg5:
7909     case DW_OP_reg6:
7910     case DW_OP_reg7:
7911     case DW_OP_reg8:
7912     case DW_OP_reg9:
7913     case DW_OP_reg10:
7914     case DW_OP_reg11:
7915     case DW_OP_reg12:
7916     case DW_OP_reg13:
7917     case DW_OP_reg14:
7918     case DW_OP_reg15:
7919     case DW_OP_reg16:
7920     case DW_OP_reg17:
7921     case DW_OP_reg18:
7922     case DW_OP_reg19:
7923     case DW_OP_reg20:
7924     case DW_OP_reg21:
7925     case DW_OP_reg22:
7926     case DW_OP_reg23:
7927     case DW_OP_reg24:
7928     case DW_OP_reg25:
7929     case DW_OP_reg26:
7930     case DW_OP_reg27:
7931     case DW_OP_reg28:
7932     case DW_OP_reg29:
7933     case DW_OP_reg30:
7934     case DW_OP_reg31:
7935       next_index = index + 1;
7936       break;
7937 
7938     case DW_OP_breg0:
7939     case DW_OP_breg1:
7940     case DW_OP_breg2:
7941     case DW_OP_breg3:
7942     case DW_OP_breg4:
7943     case DW_OP_breg5:
7944     case DW_OP_breg6:
7945     case DW_OP_breg7:
7946     case DW_OP_breg8:
7947     case DW_OP_breg9:
7948     case DW_OP_breg10:
7949     case DW_OP_breg11:
7950     case DW_OP_breg12:
7951     case DW_OP_breg13:
7952     case DW_OP_breg14:
7953     case DW_OP_breg15:
7954     case DW_OP_breg16:
7955     case DW_OP_breg17:
7956     case DW_OP_breg18:
7957     case DW_OP_breg19:
7958     case DW_OP_breg20:
7959     case DW_OP_breg21:
7960     case DW_OP_breg22:
7961     case DW_OP_breg23:
7962     case DW_OP_breg24:
7963     case DW_OP_breg25:
7964     case DW_OP_breg26:
7965     case DW_OP_breg27:
7966     case DW_OP_breg28:
7967     case DW_OP_breg29:
7968     case DW_OP_breg30:
7969     case DW_OP_breg31:
7970       next_index = index + 1;
7971       break;
7972 
7973     case DW_OP_regx:
7974       next_index = index + 2;
7975       break;
7976 
7977     case DW_OP_fbreg:
7978       next_index = index + 1;
7979       break;
7980 
7981     case DW_OP_bregx:
7982       next_index = index + 1;
7983       break;
7984 
7985     default:
7986       return false;
7987     }
7988 
7989   expr_result r(false);
7990   ctxt.push(r);
7991 
7992   return true;
7993 }
7994 
7995 /// If the current operation in the dwarf expression represents a
7996 /// manipulation of the stack of the DWARF Expression Virtual Machine
7997 /// (aka DEVM), this function performs the operation and updates the
7998 /// state of the DEVM.  If the result of the operation represents a
7999 /// constant value, then the accumulator of the DEVM is set to that
8000 /// result's value, Otherwise, the DEVM accumulator is left with its
8001 /// previous value.
8002 ///
8003 /// @param expr the array of the dwarf expression operations to consider.
8004 ///
8005 /// @param expr_len the lengths of @p ops array above.
8006 ///
8007 /// @param index the index of the operation to interpret, in @p ops.
8008 ///
8009 /// @param next_index the index of the operation to interpret at the
8010 /// next step, after this function completed and returned.  This is
8011 /// set an output parameter that is set iff the function returns true.
8012 ///
8013 /// @param ctxt the DEVM evaluation context.
8014 ///
8015 /// @return true if the current operation actually manipulates the
8016 /// DEVM stack, false otherwise.
8017 static bool
op_manipulates_stack(Dwarf_Op * expr,uint64_t expr_len,uint64_t index,uint64_t & next_index,dwarf_expr_eval_context & ctxt)8018 op_manipulates_stack(Dwarf_Op* expr,
8019 		     uint64_t expr_len,
8020 		     uint64_t index,
8021 		     uint64_t& next_index,
8022 		     dwarf_expr_eval_context& ctxt)
8023 {
8024   Dwarf_Op& op = expr[index];
8025   expr_result v;
8026 
8027   switch (op.atom)
8028     {
8029     case DW_OP_dup:
8030       v = ctxt.stack.front();
8031       ctxt.push(v);
8032       break;
8033 
8034     case DW_OP_drop:
8035       v = ctxt.stack.front();
8036       ctxt.pop();
8037       break;
8038 
8039     case DW_OP_over:
8040       ABG_ASSERT(ctxt.stack.size() > 1);
8041       v = ctxt.stack[1];
8042       ctxt.push(v);
8043       break;
8044 
8045     case DW_OP_pick:
8046       ABG_ASSERT(index + 1 < expr_len);
8047       v = op.number;
8048       ctxt.push(v);
8049       break;
8050 
8051     case DW_OP_swap:
8052       ABG_ASSERT(ctxt.stack.size() > 1);
8053       v = ctxt.stack[1];
8054       ctxt.stack.erase(ctxt.stack.begin() + 1);
8055       ctxt.push(v);
8056       break;
8057 
8058     case DW_OP_rot:
8059       ABG_ASSERT(ctxt.stack.size() > 2);
8060       v = ctxt.stack[2];
8061       ctxt.stack.erase(ctxt.stack.begin() + 2);
8062       ctxt.push(v);
8063       break;
8064 
8065     case DW_OP_deref:
8066     case DW_OP_deref_size:
8067       ABG_ASSERT(ctxt.stack.size() > 0);
8068       ctxt.pop();
8069       v.is_const(false);
8070       ctxt.push(v);
8071       break;
8072 
8073     case DW_OP_xderef:
8074     case DW_OP_xderef_size:
8075       ABG_ASSERT(ctxt.stack.size() > 1);
8076       ctxt.pop();
8077       ctxt.pop();
8078       v.is_const(false);
8079       ctxt.push(v);
8080       break;
8081 
8082     case DW_OP_push_object_address:
8083       v.is_const(false);
8084       ctxt.push(v);
8085       break;
8086 
8087     case DW_OP_form_tls_address:
8088     case DW_OP_GNU_push_tls_address:
8089       ABG_ASSERT(ctxt.stack.size() > 0);
8090       v = ctxt.pop();
8091       if (op.atom == DW_OP_form_tls_address)
8092 	v.is_const(false);
8093       ctxt.push(v);
8094       break;
8095 
8096     case DW_OP_call_frame_cfa:
8097       v.is_const(false);
8098       ctxt.push(v);
8099       break;
8100 
8101     default:
8102       return false;
8103     }
8104 
8105   if (v.is_const())
8106     ctxt.accum = v;
8107 
8108   if (op.atom == DW_OP_form_tls_address
8109       || op.atom == DW_OP_GNU_push_tls_address)
8110     ctxt.set_tls_address(true);
8111   else
8112     ctxt.set_tls_address(false);
8113 
8114   next_index = index + 1;
8115 
8116   return true;
8117 }
8118 
8119 /// If the current operation in the dwarf expression represents a push
8120 /// of an arithmetic or logic operation onto the dwarf expr virtual
8121 /// machine (aka DEVM), perform the operation and update the DEVM.
8122 ///
8123 /// If the result of the operation is a constant, update the DEVM
8124 /// accumulator with its value.  Otherwise, the DEVM accumulator is
8125 /// left with its previous value.
8126 ///
8127 /// @param expr the array of the dwarf expression operations to consider.
8128 ///
8129 /// @param expr_len the lengths of @p expr array above.
8130 ///
8131 /// @param index the index of the operation to interpret, in @p expr.
8132 ///
8133 /// @param next_index the index of the operation to interpret at the
8134 /// next step, after this function completed and returned.  This is
8135 /// set an output parameter that is set iff the function returns true.
8136 ///
8137 /// @param ctxt the DEVM evaluation context.
8138 ///
8139 /// @return true if the current operation actually represent an
8140 /// arithmetic or logic operation.
8141 static bool
op_is_arith_logic(Dwarf_Op * expr,uint64_t expr_len,uint64_t index,uint64_t & next_index,dwarf_expr_eval_context & ctxt)8142 op_is_arith_logic(Dwarf_Op* expr,
8143 		  uint64_t expr_len,
8144 		  uint64_t index,
8145 		  uint64_t& next_index,
8146 		  dwarf_expr_eval_context& ctxt)
8147 {
8148   ABG_ASSERT(index < expr_len);
8149 
8150   Dwarf_Op& op = expr[index];
8151   expr_result val1, val2;
8152 
8153   switch (op.atom)
8154     {
8155     case DW_OP_abs:
8156       val1 = ctxt.pop();
8157       val1 = val1.abs();
8158       ctxt.push(val1);
8159       break;
8160 
8161     case DW_OP_and:
8162       ABG_ASSERT(ctxt.stack.size() > 1);
8163       val1 = ctxt.pop();
8164       val2 = ctxt.pop();
8165       ctxt.push(val1 & val2);
8166       break;
8167 
8168     case DW_OP_div:
8169       val1 = ctxt.pop();
8170       val2 = ctxt.pop();
8171       if (!val1.is_const())
8172 	val1 = 1;
8173       ctxt.push(val2 / val1);
8174       break;
8175 
8176     case DW_OP_minus:
8177       val1 = ctxt.pop();
8178       val2 = ctxt.pop();
8179       ctxt.push(val2 - val1);
8180       break;
8181 
8182     case DW_OP_mod:
8183       val1 = ctxt.pop();
8184       val2 = ctxt.pop();
8185       ctxt.push(val2 % val1);
8186       break;
8187 
8188     case DW_OP_mul:
8189       val1 = ctxt.pop();
8190       val2 = ctxt.pop();
8191       ctxt.push(val2 * val1);
8192       break;
8193 
8194     case DW_OP_neg:
8195       val1 = ctxt.pop();
8196       ctxt.push(-val1);
8197       break;
8198 
8199     case DW_OP_not:
8200       val1 = ctxt.pop();
8201       ctxt.push(~val1);
8202       break;
8203 
8204     case DW_OP_or:
8205       val1 = ctxt.pop();
8206       val2 = ctxt.pop();
8207       ctxt.push(val1 | val2);
8208       break;
8209 
8210     case DW_OP_plus:
8211       val1 = ctxt.pop();
8212       val2 = ctxt.pop();
8213       ctxt.push(val2 + val1);
8214       break;
8215 
8216     case DW_OP_plus_uconst:
8217       val1 = ctxt.pop();
8218       val1 += op.number;
8219       ctxt.push(val1);
8220       break;
8221 
8222     case DW_OP_shl:
8223       val1 = ctxt.pop();
8224       val2 = ctxt.pop();
8225       ctxt.push(val2 << val1);
8226       break;
8227 
8228     case DW_OP_shr:
8229     case DW_OP_shra:
8230       val1 = ctxt.pop();
8231       val2 = ctxt.pop();
8232       ctxt.push(val2 >> val1);
8233       break;
8234 
8235     case DW_OP_xor:
8236       val1 = ctxt.pop();
8237       val2 = ctxt.pop();
8238       ctxt.push(val2 ^ val1);
8239       break;
8240 
8241     default:
8242       return false;
8243     }
8244 
8245   if (ctxt.stack.front().is_const())
8246     ctxt.accum = ctxt.stack.front();
8247 
8248   next_index = index + 1;
8249   return true;
8250 }
8251 
8252 /// If the current operation in the dwarf expression represents a push
8253 /// of a control flow operation onto the dwarf expr virtual machine
8254 /// (aka DEVM), perform the operation and update the DEVM.
8255 ///
8256 /// If the result of the operation is a constant, update the DEVM
8257 /// accumulator with its value.  Otherwise, the DEVM accumulator is
8258 /// left with its previous value.
8259 ///
8260 /// @param expr the array of the dwarf expression operations to consider.
8261 ///
8262 /// @param expr_len the lengths of @p expr array above.
8263 ///
8264 /// @param index the index of the operation to interpret, in @p expr.
8265 ///
8266 /// @param next_index the index of the operation to interpret at the
8267 /// next step, after this function completed and returned.  This is
8268 /// set an output parameter that is set iff the function returns true.
8269 ///
8270 /// @param ctxt the DEVM evaluation context.
8271 ///
8272 /// @return true if the current operation actually represents a
8273 /// control flow operation, false otherwise.
8274 static bool
op_is_control_flow(Dwarf_Op * expr,uint64_t expr_len,uint64_t index,uint64_t & next_index,dwarf_expr_eval_context & ctxt)8275 op_is_control_flow(Dwarf_Op* expr,
8276 		   uint64_t expr_len,
8277 		   uint64_t index,
8278 		   uint64_t& next_index,
8279 		   dwarf_expr_eval_context& ctxt)
8280 {
8281   ABG_ASSERT(index < expr_len);
8282 
8283   Dwarf_Op& op = expr[index];
8284   expr_result val1, val2;
8285 
8286   switch (op.atom)
8287     {
8288     case DW_OP_eq:
8289     case DW_OP_ge:
8290     case DW_OP_gt:
8291     case DW_OP_le:
8292     case DW_OP_lt:
8293     case DW_OP_ne:
8294       {
8295 	bool value = true;
8296 	val1 = ctxt.pop();
8297 	val2 = ctxt.pop();
8298 	if (op.atom == DW_OP_eq)
8299 	  value = val2 == val1;
8300 	else if (op.atom == DW_OP_ge)
8301 	  value = val2 >= val1;
8302 	else if (op.atom == DW_OP_gt)
8303 	  value = val2 > val1;
8304 	else if (op.atom == DW_OP_le)
8305 	  value = val2 <= val1;
8306 	else if (op.atom == DW_OP_lt)
8307 	  value = val2 < val1;
8308 	else if (op.atom == DW_OP_ne)
8309 	  value = val2 != val1;
8310 
8311 	val1 = value ? 1 : 0;
8312 	ctxt.push(val1);
8313       }
8314       break;
8315 
8316     case DW_OP_skip:
8317       if (op.number > 0)
8318 	index += op.number - 1;
8319       break;
8320 
8321     case DW_OP_bra:
8322       val1 = ctxt.pop();
8323       if (val1 != 0)
8324 	index += val1.const_value() - 1;
8325       break;
8326 
8327     case DW_OP_call2:
8328     case DW_OP_call4:
8329     case DW_OP_call_ref:
8330     case DW_OP_nop:
8331       break;
8332 
8333     default:
8334       return false;
8335     }
8336 
8337   if (ctxt.stack.front().is_const())
8338     ctxt.accum = ctxt.stack.front();
8339 
8340   next_index = index + 1;
8341   return true;
8342 }
8343 
8344 /// This function quickly evaluates a DWARF expression that is a
8345 /// constant.
8346 ///
8347 /// This is a "fast path" function that quickly evaluates a DWARF
8348 /// expression that is only made of a DW_OP_plus_uconst operator.
8349 ///
8350 /// This is a sub-routine of die_member_offset.
8351 ///
8352 /// @param expr the DWARF expression to evaluate.
8353 ///
8354 /// @param expr_len the length of the expression @p expr.
8355 ///
8356 /// @param value out parameter.  This is set to the result of the
8357 /// evaluation of @p expr, iff this function returns true.
8358 ///
8359 /// @return true iff the evaluation of @p expr went OK.
8360 static bool
eval_quickly(Dwarf_Op * expr,uint64_t expr_len,int64_t & value)8361 eval_quickly(Dwarf_Op*	expr,
8362 	     uint64_t	expr_len,
8363 	     int64_t&	value)
8364 {
8365   if (expr_len == 1 && (expr[0].atom == DW_OP_plus_uconst))
8366     {
8367       value = expr[0].number;
8368       return true;
8369     }
8370   return false;
8371 }
8372 
8373 /// Evaluate the value of the last sub-expression that is a constant,
8374 /// inside a given DWARF expression.
8375 ///
8376 /// @param expr the DWARF expression to consider.
8377 ///
8378 /// @param expr_len the length of the expression to consider.
8379 ///
8380 /// @param value the resulting value of the last constant
8381 /// sub-expression of the DWARF expression.  This is set iff the
8382 /// function returns true.
8383 ///
8384 /// @param is_tls_address out parameter.  This is set to true iff
8385 /// the resulting value of the evaluation is a TLS (thread local
8386 /// storage) address.
8387 ///
8388 /// @param eval_ctxt the evaluation context to (re)use.  Note that
8389 /// this function initializes this context before using it.
8390 ///
8391 /// @return true if the function could find a constant sub-expression
8392 /// to evaluate, false otherwise.
8393 static bool
eval_last_constant_dwarf_sub_expr(Dwarf_Op * expr,uint64_t expr_len,int64_t & value,bool & is_tls_address,dwarf_expr_eval_context & eval_ctxt)8394 eval_last_constant_dwarf_sub_expr(Dwarf_Op*	expr,
8395 				  uint64_t	expr_len,
8396 				  int64_t&	value,
8397 				  bool&	is_tls_address,
8398 				  dwarf_expr_eval_context &eval_ctxt)
8399 {
8400   // Reset the evaluation context before evaluating the constant sub
8401   // expression contained in the DWARF expression 'expr'.
8402   eval_ctxt.reset();
8403 
8404   uint64_t index = 0, next_index = 0;
8405   do
8406     {
8407       if (op_is_arith_logic(expr, expr_len, index,
8408 			    next_index, eval_ctxt)
8409 	  || op_pushes_constant_value(expr, expr_len, index,
8410 				      next_index, eval_ctxt)
8411 	  || op_manipulates_stack(expr, expr_len, index,
8412 				  next_index, eval_ctxt)
8413 	  || op_pushes_non_constant_value(expr, expr_len, index,
8414 					  next_index, eval_ctxt)
8415 	  || op_is_control_flow(expr, expr_len, index,
8416 				next_index, eval_ctxt))
8417 	;
8418       else
8419 	next_index = index + 1;
8420 
8421       ABG_ASSERT(next_index > index);
8422       index = next_index;
8423     } while (index < expr_len);
8424 
8425   is_tls_address = eval_ctxt.set_tls_address();
8426   if (eval_ctxt.accum.is_const())
8427     {
8428       value = eval_ctxt.accum;
8429       return true;
8430     }
8431   return false;
8432 }
8433 
8434 /// Evaluate the value of the last sub-expression that is a constant,
8435 /// inside a given DWARF expression.
8436 ///
8437 /// @param expr the DWARF expression to consider.
8438 ///
8439 /// @param expr_len the length of the expression to consider.
8440 ///
8441 /// @param value the resulting value of the last constant
8442 /// sub-expression of the DWARF expression.  This is set iff the
8443 /// function returns true.
8444 ///
8445 /// @return true if the function could find a constant sub-expression
8446 /// to evaluate, false otherwise.
8447 static bool
eval_last_constant_dwarf_sub_expr(Dwarf_Op * expr,uint64_t expr_len,int64_t & value,bool & is_tls_address)8448 eval_last_constant_dwarf_sub_expr(Dwarf_Op*	expr,
8449 				  uint64_t	expr_len,
8450 				  int64_t&	value,
8451 				  bool&	is_tls_address)
8452 {
8453   dwarf_expr_eval_context eval_ctxt;
8454   return eval_last_constant_dwarf_sub_expr(expr, expr_len, value,
8455 					   is_tls_address, eval_ctxt);
8456 }
8457 
8458 // -----------------------------------
8459 // </location expression evaluation>
8460 // -----------------------------------
8461 
8462 /// Convert the value of the DW_AT_bit_offset attribute into the value
8463 /// of the DW_AT_data_bit_offset attribute.
8464 ///
8465 /// On big endian machines, the value of the DW_AT_bit_offset
8466 /// attribute is the same as the value of the DW_AT_data_bit_offset
8467 /// attribute.
8468 ///
8469 /// On little endian machines however, the situation is different.
8470 /// The DW_AT_bit_offset value for a bit field is the number of bits
8471 /// to the left of the most significant bit of the bit field.
8472 ///
8473 /// The DW_AT_data_bit_offset offset value is the number of bits to
8474 /// the right of the least significant bit of the bit field.
8475 ///
8476 /// In other words, DW_AT_data_bit_offset is what everybody would
8477 /// instinctively think of as being the "offset of the bit
8478 /// field". DW_AT_bit_offset however is very counter-intuitive on
8479 /// little endian machines.
8480 ///
8481 /// This function thus reads the value of a DW_AT_bit_offset property
8482 /// of a DIE and converts it into what the DW_AT_data_bit_offset would
8483 /// have been if it was present.
8484 ///
8485 /// Note that DW_AT_bit_offset has been made obsolete starting from
8486 /// DWARF5.
8487 ///
8488 /// If you like coffee and it's not too late, now might be a good time
8489 /// to have a coffee break.  Otherwise if it's late at night, you
8490 /// might want to consider an herbal tea break.  Then come back to
8491 /// read this.
8492 ///
8493 ///
8494 /// Okay, to have a better idea of what DW_AT_bit_offset and
8495 /// DW_AT_data_bit_offset represent, let's consider a struct 'S' which
8496 /// have bit fields data members defined as:
8497 ///
8498 ///      struct S
8499 ///      {
8500 ///        int j:5;
8501 ///        int k:6;
8502 ///        int m:5;
8503 ///        int n:8;
8504 ///      };
8505 ///
8506 /// The below wonderful (at least!) ASCII art sketch describes the
8507 /// layout of the bitfields of 'struct S' on a little endian machine.
8508 /// You need to read the sketch from the bottom-up.
8509 ///
8510 /// So please scroll down to its bottom.  Note how the 32 bits integer
8511 /// word containing the bit fields is laid out with its least
8512 /// significant bit starting on the right hand side, at index 0.
8513 ///
8514 /// Then slowly scroll up starting from there, and take the time to
8515 /// read each line and see how the bit fields are laid out and what
8516 /// DW_AT_bit_offset and DW_AT_data_bit_offset represent for each of
8517 /// the bit fields.
8518 ///
8519 /// DW_AT_bit_offset(n)
8520 /// <   - - - - - - >
8521 /// |               |       n      |
8522 /// ^               ^< - -   - -  >^
8523 ///                                           DW_AT_data_bit_offset(n)
8524 ///                                <  - - - - -  - - - - - - - - - - >
8525 ///                                |                                 |
8526 ///                                ^                                 ^
8527 ///                 DW_AT_bit_offset(m)
8528 /// <--------------------------------->
8529 /// |                                 |   m   |
8530 /// ^                                 ^<  -  >^
8531 ///                                           DW_AT_data_bit_offset(m)
8532 ///                                           <  - - - - - - - - - - >
8533 ///                                           |                      |
8534 ///                                           ^                      ^
8535 ///                           DW_AT_bit_offset(k)
8536 /// <-------------------------------------------->
8537 /// |                                            |    k    |
8538 /// ^                                            ^<  - -  >^
8539 ///                                                     DW_AT_data_bit_offset(k)
8540 ///                                                        < - - - - >
8541 ///                                                        |         |
8542 ///                                                        ^         ^
8543 ///                                      DW_AT_bit_offset(j)
8544 /// <-------------------------------------------------------->
8545 /// |                                                        |
8546 /// ^                                                        ^
8547 ///                       n               m          k          j
8548 ///                 <  - - - - - - >  < - - - >  < - - - - > < - - - >
8549 ///
8550 /// | | | | | | | | |  | | | | | | |  | | | | |  | | | | | | | | | | |
8551 /// ^       ^       ^              ^  ^       ^  ^       ^ ^ ^       ^
8552 /// 31      27      23             16 15      11 10      6 5 4       0
8553 ///
8554 /// So, the different bit fields all fit in one 32 bits word, assuming
8555 /// the bit fields are tightly packed.
8556 ///
8557 /// Let's look at what DW_AT_bit_offset of the 'j' bit field would be
8558 /// on this little endian machine and let's see how it relates to
8559 /// DW_AT_data_bit_offset of j.
8560 ///
8561 /// DW_AT_bit_offset(j) would be equal to the number of bits from the
8562 /// left of the 32 bits word (i.e from bit number 31) to the most
8563 /// significant bit of the j bit field (i.e, bit number 4).  Thus:
8564 ///
8565 ///       DW_AT_bit_offset(j) =
8566 ///         sizeof_in_bits(int) - size_in_bits_of(j) = 32 - 5 = 27.
8567 ///
8568 /// DW_AT_data_bit_offset(j) is the number of bits from the right of the
8569 /// 32 bits word (i.e, bit number 0) to the lest significant bit of
8570 /// the 'j' bit field (ie, bit number 0).  Thus:
8571 ///
8572 ///       DW_AT_data_bit_offset(j) = 0.
8573 ///
8574 /// More generally, we can notice that:
8575 ///
8576 ///       sizeof_in_bits(int) =
8577 ///         DW_AT_bit_offset(j) + sizeof_in_bits(j) + DW_AT_data_bit_offset(j).
8578 ///
8579 /// It follows that:
8580 ///
8581 ///       DW_AT_data_bit_offset(j) =
8582 ///          sizeof_in_bits(int) - sizeof_in_bits(j) - DW_AT_bit_offset(j);
8583 ///
8584 /// Thus:
8585 ///
8586 ///       DW_AT_data_bit_offset(j) = 32 - 27 - 5 = 0;
8587 ///
8588 /// Note that DW_AT_data_bit_offset(j) is the offset of 'j' starting
8589 /// from the right hand side of the word.  It is what we would
8590 /// intuitively think it is.  DW_AT_bit_offset however is super
8591 /// counter-intuitive, pfff.
8592 ///
8593 /// Anyway, this general equation holds true for all bit fields.
8594 ///
8595 /// Similarly, it follows that:
8596 ///
8597 ///       DW_AT_bit_offset(k) =
8598 ///         sizeof_in_bits(int) - sizeof_in_bits(k) - DW_AT_data_bit_offset(k);
8599 ///
8600 /// Thus:
8601 ///       DW_AT_bit_offset(k) = 32 - 6 - 5 = 21.
8602 ///
8603 ///
8604 /// Likewise:
8605 ///
8606 ///      DW_AT_bit_offset(m) =
8607 ///        sizeof_in_bits(int) - sizeof_in_bits(m) - DW_AT_data_bit_offset(m);
8608 ///
8609 ///
8610 /// Thus:
8611 ///      DW_AT_bit_offset(m) = 32 - 5 - (5 + 6) = 16.
8612 ///
8613 /// And:
8614 ///
8615 ///
8616 /// Lastly:
8617 ///
8618 ///      DW_AT_bit_offset(n) =
8619 ///        sizeof_in_bits(int) - sizeof_in_bits(n) - DW_AT_bit_offset(n);
8620 ///
8621 /// Thus:
8622 ///      DW_AT_bit_offset(n) = 32 - 8 - (5 + 6 + 5) = 8.
8623 ///
8624 /// Luckily, the body of the function is much smaller than this
8625 /// comment.  Enjoy!
8626 ///
8627 /// @param die the DIE to consider.
8628 ///
8629 /// @param is_big_endian this is true iff the machine we are looking at
8630 /// is big endian.
8631 ///
8632 /// @param offset this is the output parameter into which the value of
8633 /// the DW_AT_bit_offset is put, converted as if it was the value of
8634 /// the DW_AT_data_bit_offset parameter.  This parameter is set iff
8635 /// the function returns true.
8636 ///
8637 /// @return true if DW_AT_bit_offset was found on @p die.
8638 static bool
read_and_convert_DW_at_bit_offset(const Dwarf_Die * die,bool is_big_endian,uint64_t & offset)8639 read_and_convert_DW_at_bit_offset(const Dwarf_Die* die,
8640 				  bool is_big_endian,
8641 				  uint64_t &offset)
8642 {
8643   uint64_t off = 0;
8644   if (!die_unsigned_constant_attribute(die, DW_AT_bit_offset, off))
8645     return false;
8646 
8647   if (is_big_endian)
8648     {
8649       offset = off;
8650       return true;
8651     }
8652 
8653   // Okay, we are looking at a little endian machine.  We need to
8654   // convert DW_AT_bit_offset into what DW_AT_data_bit_offset would
8655   // have been.  To understand this, you really need to read the
8656   // preliminary comment of this function.
8657   uint64_t containing_anonymous_object_size = 0;
8658   ABG_ASSERT(die_unsigned_constant_attribute(die, DW_AT_byte_size,
8659 					     containing_anonymous_object_size));
8660   containing_anonymous_object_size *= 8;
8661 
8662   uint64_t bitfield_size = 0;
8663   ABG_ASSERT(die_unsigned_constant_attribute(die, DW_AT_bit_size,
8664 					     bitfield_size));
8665 
8666   // As noted in the the preliminary comment of this function if we
8667   // want to get the DW_AT_data_bit_offset of a bit field 'k' from the
8668   // its DW_AT_bit_offset value, the equation is:
8669   //
8670   //     DW_AT_data_bit_offset(k) =
8671   //       sizeof_in_bits(containing_anonymous_object_size)
8672   //       - DW_AT_data_bit_offset(k)
8673   //       - sizeof_in_bits(k)
8674   offset = containing_anonymous_object_size - off - bitfield_size;
8675 
8676   return true;
8677 }
8678 
8679 /// Get the offset of a struct/class member as represented by the
8680 /// value of the DW_AT_data_member_location attribute.
8681 ///
8682 /// There is a huge gotcha in here.  The value of the
8683 /// DW_AT_data_member_location is not necessarily a constant that one
8684 /// would just read and be done with it.  Rather, it can be a DWARF
8685 /// expression that one has to interpret.  In general, the offset can
8686 /// be given by the DW_AT_bit_offset or DW_AT_data_bit_offset
8687 /// attribute.  In that case the offset is a constant.  But it can
8688 /// also be given by the DW_AT_data_member_location attribute.  In
8689 /// that case it's a DWARF location expression.
8690 ///
8691 /// When the it's the DW_AT_data_member_location that is present,
8692 /// there are three cases to possibly take into account:
8693 ///
8694 ///     1/ The offset in the vtable where the offset of a virtual base
8695 ///        can be found, aka vptr offset.  Given the address of a
8696 ///        given object O, the vptr offset for B is given by the
8697 ///        (DWARF) expression:
8698 ///
8699 ///            address(O) + *(*address(0) - VIRTUAL_OFFSET)
8700 ///
8701 ///        where VIRTUAL_OFFSET is a constant value; In this case,
8702 ///        this function returns the constant VIRTUAL_OFFSET, as this
8703 ///        is enough to detect changes in a given virtual base
8704 ///        relative to the other virtual bases.
8705 ///
8706 ///     2/ The offset of a regular data member.  Given the address of
8707 ///        a struct object named O, the memory location for a
8708 ///        particular data member is given by the (DWARF) expression:
8709 ///
8710 ///            address(O) + OFFSET
8711 ///
8712 ///       where OFFSET is a constant.  In this case, this function
8713 ///       returns the OFFSET constant.
8714 ///
8715 ///     3/ The offset of a virtual member function in the virtual
8716 ///     pointer.  The DWARF expression is a constant that designates
8717 ///     the offset of the function in the vtable.  In this case this
8718 ///     function returns that constant.
8719 ///
8720 ///@param ctxt the read context to consider.
8721 ///
8722 ///@param die the DIE to read the information from.
8723 ///
8724 ///@param offset the resulting constant offset, in bits.  This
8725 ///argument is set iff the function returns true.
8726 static bool
die_member_offset(const read_context & ctxt,const Dwarf_Die * die,int64_t & offset)8727 die_member_offset(const read_context& ctxt,
8728 		  const Dwarf_Die* die,
8729 		  int64_t& offset)
8730 {
8731   Dwarf_Op* expr = NULL;
8732   uint64_t expr_len = 0;
8733   uint64_t off = 0;
8734 
8735   // First let's see if the DW_AT_data_bit_offset attribute is
8736   // present.
8737   if (die_unsigned_constant_attribute(die, DW_AT_data_bit_offset, off))
8738     {
8739       offset = off;
8740       return true;
8741     }
8742 
8743   // Otherwise, let's see if the DW_AT_bit_offset attribute is
8744   // present.  On little endian machines, we need to convert this
8745   // attribute into what it would have been if the
8746   // DW_AT_data_bit_offset was used instead.  In other words,
8747   // DW_AT_bit_offset needs to be converted into a
8748   // human-understandable form that represents the offset of the
8749   // bitfield data member it describes.  For details about the
8750   // conversion, please read the extensive comments of
8751   // read_and_convert_DW_at_bit_offset.
8752   bool is_big_endian = architecture_is_big_endian(ctxt.elf_handle());
8753   if (read_and_convert_DW_at_bit_offset(die, is_big_endian, off))
8754     {
8755       offset = off;
8756       return true;
8757     }
8758 
8759   if (!die_location_expr(die, DW_AT_data_member_location, &expr, &expr_len))
8760     return false;
8761 
8762   // Otherwise, the DW_AT_data_member_location attribute is present.
8763   // In that case, let's evaluate it and get its constant
8764   // sub-expression and return that one.
8765 
8766   if (!eval_quickly(expr, expr_len, offset))
8767     {
8768       bool is_tls_address = false;
8769       if (!eval_last_constant_dwarf_sub_expr(expr, expr_len,
8770 					     offset, is_tls_address,
8771 					     ctxt.dwarf_expr_eval_ctxt()))
8772 	return false;
8773     }
8774 
8775   offset *= 8;
8776   return true;
8777 }
8778 
8779 /// Read the value of the DW_AT_location attribute from a DIE,
8780 /// evaluate the resulting DWARF expression and, if it's a constant
8781 /// expression, return it.
8782 ///
8783 /// @param die the DIE to consider.
8784 ///
8785 /// @param address the resulting constant address.  This is set iff
8786 /// the function returns true.
8787 ///
8788 /// @return true iff the whole sequence of action described above
8789 /// could be completed normally.
8790 static bool
die_location_address(Dwarf_Die * die,Dwarf_Addr & address,bool & is_tls_address)8791 die_location_address(Dwarf_Die*	die,
8792 		     Dwarf_Addr&	address,
8793 		     bool&		is_tls_address)
8794 {
8795   Dwarf_Op* expr = NULL;
8796   uint64_t expr_len = 0;
8797 
8798   is_tls_address = false;
8799   if (!die_location_expr(die, DW_AT_location, &expr, &expr_len))
8800     return false;
8801 
8802   int64_t addr = 0;
8803   if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, addr, is_tls_address))
8804     return false;
8805 
8806   address = addr;
8807   return true;
8808 }
8809 
8810 
8811 /// Return the index of a function in its virtual table.  That is,
8812 /// return the value of the DW_AT_vtable_elem_location attribute.
8813 ///
8814 /// @param die the DIE of the function to consider.
8815 ///
8816 /// @param vindex the resulting index.  This is set iff the function
8817 /// returns true.
8818 ///
8819 /// @return true if the DIE has a DW_AT_vtable_elem_location
8820 /// attribute.
8821 static bool
die_virtual_function_index(Dwarf_Die * die,int64_t & vindex)8822 die_virtual_function_index(Dwarf_Die* die,
8823 			   int64_t& vindex)
8824 {
8825   if (!die)
8826     return false;
8827 
8828   Dwarf_Op* expr = NULL;
8829   uint64_t expr_len = 0;
8830   if (!die_location_expr(die, DW_AT_vtable_elem_location,
8831 			 &expr, &expr_len))
8832     return false;
8833 
8834   int64_t i = 0;
8835   bool is_tls_addr = false;
8836   if (!eval_last_constant_dwarf_sub_expr(expr, expr_len, i, is_tls_addr))
8837     return false;
8838 
8839   vindex = i;
8840   return true;
8841 }
8842 
8843 /// Test if a given DIE represents an anonymous type.
8844 ///
8845 /// Anonymous types we are interested in are classes, unions and
8846 /// enumerations.
8847 ///
8848 /// @param die the DIE to consider.
8849 ///
8850 /// @return true iff @p die represents an anonymous type.
8851 bool
is_anonymous_type_die(Dwarf_Die * die)8852 is_anonymous_type_die(Dwarf_Die *die)
8853 {
8854   int tag = dwarf_tag(die);
8855 
8856   if (tag == DW_TAG_class_type
8857       || tag == DW_TAG_structure_type
8858       || tag == DW_TAG_union_type
8859       || tag == DW_TAG_enumeration_type)
8860     return die_is_anonymous(die);
8861 
8862   return false;
8863 }
8864 
8865 /// Return the base of the internal name to represent an anonymous
8866 /// type.
8867 ///
8868 /// Typically, anonymous enums would be named
8869 /// __anonymous_enum__<number>, anonymous struct or classes would be
8870 /// named __anonymous_struct__<number> and anonymous unions would be
8871 /// named __anonymous_union__<number>.  The first part of these
8872 /// anonymous names (i.e, __anonymous_{enum,struct,union}__ is called
8873 /// the base name.  This function returns that base name, depending on
8874 /// the kind of type DIE we are looking at.
8875 ///
8876 /// @param die the type DIE to look at.  This function expects a type
8877 /// DIE with an empty DW_AT_name property value (anonymous).
8878 ///
8879 /// @return a string representing the base of the internal anonymous
8880 /// name.
8881 static string
get_internal_anonymous_die_prefix_name(const Dwarf_Die * die)8882 get_internal_anonymous_die_prefix_name(const Dwarf_Die *die)
8883 {
8884   ABG_ASSERT(die_is_type(die));
8885   ABG_ASSERT(die_string_attribute(die, DW_AT_name) == "");
8886 
8887   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
8888   string type_name;
8889   if (tag == DW_TAG_class_type || tag == DW_TAG_structure_type)
8890     type_name = tools_utils::get_anonymous_struct_internal_name_prefix();
8891   else if (tag == DW_TAG_union_type)
8892     type_name = tools_utils::get_anonymous_union_internal_name_prefix();
8893   else if (tag == DW_TAG_enumeration_type)
8894     type_name = tools_utils::get_anonymous_enum_internal_name_prefix();
8895 
8896   return type_name;
8897 }
8898 
8899 /// Build a full internal anonymous type name.
8900 ///
8901 /// @param base_name this is the base name as returned by the function
8902 /// @ref get_internal_anonymous_die_prefix_name.
8903 ///
8904 /// @param anonymous_type_index this is the index of the anonymous
8905 /// type in its scope.  That is, if there are more than one anonymous
8906 /// types of a given kind in a scope, this index is what tells them
8907 /// appart, starting from 0.
8908 ///
8909 /// @return the built string, which is a concatenation of @p base_name
8910 /// and @p anonymous_type_index.
8911 static string
build_internal_anonymous_die_name(const string & base_name,size_t anonymous_type_index)8912 build_internal_anonymous_die_name(const string &base_name,
8913 				  size_t anonymous_type_index)
8914 {
8915   string name = base_name;
8916   if (anonymous_type_index && !base_name.empty())
8917     {
8918       std::ostringstream o;
8919       o << base_name << anonymous_type_index;
8920       name = o.str();
8921     }
8922   return name;
8923 }
8924 
8925 /// Build the internal name of the underlying type of an enum.
8926 ///
8927 /// @param base_name the (unqualified) name of the enum the underlying
8928 /// type is destined to.
8929 ///
8930 /// @param is_anonymous true if the underlying type of the enum is to
8931 /// be anonymous.
8932 static string
build_internal_underlying_enum_type_name(const string & base_name,bool is_anonymous,uint64_t size)8933 build_internal_underlying_enum_type_name(const string &base_name,
8934 					 bool is_anonymous,
8935 					 uint64_t size)
8936 {
8937   std::ostringstream o;
8938 
8939   if (is_anonymous)
8940     o << "unnamed-enum";
8941   else
8942     o << "enum-" << base_name;
8943 
8944   o << "-underlying-type-" << size;
8945 
8946   return o.str();
8947 }
8948 
8949 /// Build a full internal anonymous type name.
8950 ///
8951 /// @param die the DIE representing the anonymous type to consider.
8952 ///
8953 /// @param anonymous_type_index the index of the anonymous type
8954 /// represented by @p DIE, in its scope.  That is, if there are
8955 /// several different anonymous types of the same kind as @p die, this
8956 /// index is what tells them appart.
8957 ///
8958 /// @return the internal name of the anonymous type represented by @p
8959 /// DIE.
8960 static string
get_internal_anonymous_die_name(Dwarf_Die * die,size_t anonymous_type_index)8961 get_internal_anonymous_die_name(Dwarf_Die *die,
8962 				size_t anonymous_type_index)
8963 {
8964   string name = get_internal_anonymous_die_prefix_name(die);
8965   name = build_internal_anonymous_die_name(name, anonymous_type_index);
8966   return name;
8967 }
8968 
8969 // ------------------------------------
8970 // <DIE pretty printer>
8971 // ------------------------------------
8972 
8973 /// Compute the qualified name of a DIE that represents a type.
8974 ///
8975 /// For instance, if the DIE tag is DW_TAG_subprogram then this
8976 /// function computes the name of the function *type*.
8977 ///
8978 /// @param ctxt the read context.
8979 ///
8980 /// @param die the DIE to consider.
8981 ///
8982 /// @param where_offset where in the are logically are in the DIE
8983 /// stream.
8984 ///
8985 /// @return a copy of the qualified name of the type.
8986 static string
die_qualified_type_name(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset)8987 die_qualified_type_name(const read_context& ctxt,
8988 			const Dwarf_Die* die,
8989 			size_t where_offset)
8990 {
8991   if (!die)
8992     return "";
8993 
8994   int tag = dwarf_tag (const_cast<Dwarf_Die*>(die));
8995   if (tag == DW_TAG_compile_unit
8996       || tag == DW_TAG_partial_unit
8997       || tag == DW_TAG_type_unit)
8998     return "";
8999 
9000   string name = die_name(die);
9001 
9002   Dwarf_Die scope_die;
9003   if (!get_scope_die(ctxt, die, where_offset, scope_die))
9004     return "";
9005 
9006   string parent_name = die_qualified_name(ctxt, &scope_die, where_offset);
9007   bool colon_colon = die_is_type(die) || die_is_namespace(die);
9008   string separator = colon_colon ? "::" : ".";
9009 
9010   string repr;
9011 
9012   switch (tag)
9013     {
9014     case DW_TAG_unspecified_type:
9015       break;
9016 
9017     case DW_TAG_base_type:
9018       {
9019 	abigail::ir::integral_type int_type;
9020 	if (parse_integral_type(name, int_type))
9021 	  repr = int_type;
9022 	else
9023 	  repr = name;
9024       }
9025       break;
9026 
9027     case DW_TAG_typedef:
9028     case DW_TAG_enumeration_type:
9029     case DW_TAG_structure_type:
9030     case DW_TAG_class_type:
9031     case DW_TAG_union_type:
9032       {
9033 	if (tag == DW_TAG_typedef)
9034 	  {
9035 	    // If the underlying type of the typedef is unspecified,
9036 	    // bail out as we don't support that yet.
9037 	    Dwarf_Die underlying_type_die;
9038 	    if (die_die_attribute(die, DW_AT_type, underlying_type_die))
9039 	      {
9040 		string n = die_qualified_type_name(ctxt, &underlying_type_die,
9041 						   where_offset);
9042 		if (die_is_unspecified(&underlying_type_die)
9043 		    || n.empty())
9044 		  break;
9045 	      }
9046 	  }
9047 
9048 	if (name.empty())
9049 	  // TODO: handle cases where there are more than one
9050 	  // anonymous type of the same kind in the same scope.  In
9051 	  // that case, their name must be built with the function
9052 	  // get_internal_anonymous_die_name or something of the same
9053 	  // kind.
9054 	  name = get_internal_anonymous_die_prefix_name(die);
9055 
9056 	ABG_ASSERT(!name.empty());
9057 	repr = parent_name.empty() ? name : parent_name + separator + name;
9058       }
9059       break;
9060 
9061     case DW_TAG_const_type:
9062     case DW_TAG_volatile_type:
9063     case DW_TAG_restrict_type:
9064       {
9065 	Dwarf_Die underlying_type_die;
9066 	bool has_underlying_type_die =
9067 	  die_die_attribute(die, DW_AT_type, underlying_type_die);
9068 
9069 	if (has_underlying_type_die && die_is_unspecified(&underlying_type_die))
9070 	  break;
9071 
9072 	if (tag == DW_TAG_const_type)
9073 	  {
9074 	    if (has_underlying_type_die
9075 		&& die_is_reference_type(&underlying_type_die))
9076 	      // A reference is always const.  So, to lower false
9077 	      // positive reports in diff computations, we consider a
9078 	      // const reference just as a reference.  But we need to
9079 	      // keep the qualified-ness of the type.  So we introduce
9080 	      // a 'no-op' qualifier here.  Please remember that this
9081 	      // has to be kept in sync with what is done in
9082 	      // get_name_of_qualified_type.  So if you change this
9083 	      // here, you have to change that code there too.
9084 	      repr = "";
9085 	    else if (!has_underlying_type_die
9086 		     || die_is_void_type(&underlying_type_die))
9087 	      {
9088 		repr = "void";
9089 		break;
9090 	      }
9091 	    else
9092 	      repr = "const";
9093 	  }
9094 	else if (tag == DW_TAG_volatile_type)
9095 	  repr = "volatile";
9096 	else if (tag == DW_TAG_restrict_type)
9097 	  repr = "restrict";
9098 	else
9099 	  ABG_ASSERT_NOT_REACHED;
9100 
9101 	string underlying_type_repr;
9102 	if (has_underlying_type_die)
9103 	  underlying_type_repr =
9104 	    die_qualified_type_name(ctxt, &underlying_type_die, where_offset);
9105 	else
9106 	  underlying_type_repr = "void";
9107 
9108 	if (underlying_type_repr.empty())
9109 	  repr.clear();
9110 	else
9111 	  {
9112 	    if (has_underlying_type_die
9113 		&& die_is_pointer_or_reference_type(&underlying_type_die))
9114 	      repr = underlying_type_repr + " " + repr;
9115 	    else
9116 	      repr += " " + underlying_type_repr;
9117 	  }
9118       }
9119       break;
9120 
9121     case DW_TAG_pointer_type:
9122     case DW_TAG_reference_type:
9123     case DW_TAG_rvalue_reference_type:
9124       {
9125 	Dwarf_Die pointed_to_type_die;
9126 	if (!die_die_attribute(die, DW_AT_type, pointed_to_type_die))
9127 	  {
9128 	    if (tag == DW_TAG_pointer_type)
9129 	      repr = "void*";
9130 	    break;
9131 	  }
9132 
9133 	if (die_is_unspecified(&pointed_to_type_die))
9134 	  break;
9135 
9136 	string pointed_type_repr =
9137 	  die_qualified_type_name(ctxt, &pointed_to_type_die, where_offset);
9138 
9139 	repr = pointed_type_repr;
9140 	if (repr.empty())
9141 	  break;
9142 
9143 	if (tag == DW_TAG_pointer_type)
9144 	  repr += "*";
9145 	else if (tag == DW_TAG_reference_type)
9146 	  repr += "&";
9147 	else if (tag == DW_TAG_rvalue_reference_type)
9148 	  repr += "&&";
9149 	else
9150 	  ABG_ASSERT_NOT_REACHED;
9151       }
9152       break;
9153 
9154     case DW_TAG_subrange_type:
9155       {
9156 	// In Ada, this one can be generated on its own, that is, not
9157 	// as a sub-type of an array.  So we need to support it on its
9158 	// own.  Note that when it's emitted as the sub-type of an
9159 	// array like in C and C++, this is handled differently, for
9160 	// now.  But we try to make this usable by other languages
9161 	// that are not Ada, even if we modelled it after Ada.
9162 
9163 	// So we build a subrange type for the sole purpose of using
9164 	// the ::as_string() method of that type.  So we don't add
9165 	// that type to the current type tree being built.
9166 	array_type_def::subrange_sptr s =
9167 	  build_subrange_type(const_cast<read_context&>(ctxt),
9168 			      die, where_offset,
9169 			      /*associate_die_to_type=*/false);
9170 	repr += s->as_string();
9171 	break;
9172       }
9173 
9174     case DW_TAG_array_type:
9175       {
9176 	Dwarf_Die element_type_die;
9177 	if (!die_die_attribute(die, DW_AT_type, element_type_die))
9178 	  break;
9179 	string element_type_name =
9180 	  die_qualified_type_name(ctxt, &element_type_die, where_offset);
9181 	if (element_type_name.empty())
9182 	  break;
9183 
9184 	array_type_def::subranges_type subranges;
9185 	build_subranges_from_array_type_die(const_cast<read_context&>(ctxt),
9186 					    die, subranges, where_offset,
9187 					    /*associate_type_to_die=*/false);
9188 
9189 	repr = element_type_name;
9190 	repr += array_type_def::subrange_type::vector_as_string(subranges);
9191       }
9192       break;
9193 
9194     case DW_TAG_subroutine_type:
9195     case DW_TAG_subprogram:
9196       {
9197 	string return_type_name;
9198 	string class_name;
9199 	vector<string> parm_names;
9200 	bool is_const = false;
9201 	bool is_static = false;
9202 
9203 	die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
9204 						   /*pretty_print=*/true,
9205 						   return_type_name, class_name,
9206 						   parm_names, is_const,
9207 						   is_static);
9208 	if (return_type_name.empty())
9209 	  return_type_name = "void";
9210 
9211 	repr = return_type_name;
9212 
9213 	if (!class_name.empty())
9214 	  {
9215 	    // This is a method, so print the class name.
9216 	    repr += " (" + class_name + "::*)";
9217 	  }
9218 
9219 	// Now parameters.
9220 	repr += " (";
9221 	for (vector<string>::const_iterator i = parm_names.begin();
9222 	     i != parm_names.end();
9223 	     ++i)
9224 	  {
9225 	    if (i != parm_names.begin())
9226 	      repr += ", ";
9227 	    repr += *i;
9228 	  }
9229 	repr += ")";
9230 
9231       }
9232       break;
9233 
9234     case DW_TAG_string_type:
9235     case DW_TAG_ptr_to_member_type:
9236     case DW_TAG_set_type:
9237     case DW_TAG_file_type:
9238     case DW_TAG_packed_type:
9239     case DW_TAG_thrown_type:
9240     case DW_TAG_interface_type:
9241     case DW_TAG_shared_type:
9242       break;
9243     }
9244 
9245   return repr;
9246 }
9247 
9248 /// Compute the qualified name of a decl represented by a given DIE.
9249 ///
9250 /// For instance, for a DIE of tag DW_TAG_subprogram this function
9251 /// computes the signature of the function *declaration*.
9252 ///
9253 /// @param ctxt the read context.
9254 ///
9255 /// @param die the DIE to consider.
9256 ///
9257 /// @param where_offset where we are logically at in the DIE stream.
9258 ///
9259 /// @return a copy of the computed name.
9260 static string
die_qualified_decl_name(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9261 die_qualified_decl_name(const read_context& ctxt,
9262 			const Dwarf_Die* die,
9263 			size_t where_offset)
9264 {
9265   if (!die || !die_is_decl(die))
9266     return "";
9267 
9268   string name = die_name(die);
9269 
9270   Dwarf_Die scope_die;
9271   if (!get_scope_die(ctxt, die, where_offset, scope_die))
9272     return "";
9273 
9274   string scope_name = die_qualified_name(ctxt, &scope_die, where_offset);
9275   string separator = "::";
9276 
9277   string repr;
9278 
9279   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9280   switch (tag)
9281     {
9282     case DW_TAG_namespace:
9283     case DW_TAG_member:
9284     case DW_TAG_variable:
9285       repr = scope_name.empty() ? name : scope_name + separator + name;
9286       break;
9287     case DW_TAG_subprogram:
9288       repr = die_function_signature(ctxt, die, where_offset);
9289       break;
9290 
9291     case DW_TAG_unspecified_parameters:
9292       repr = "...";
9293       break;
9294 
9295     case DW_TAG_formal_parameter:
9296     case DW_TAG_imported_declaration:
9297     case DW_TAG_GNU_template_template_param:
9298     case DW_TAG_GNU_template_parameter_pack:
9299     case DW_TAG_GNU_formal_parameter_pack:
9300       break;
9301     }
9302   return repr;
9303 }
9304 
9305 /// Compute the qualified name of the artifact represented by a given
9306 /// DIE.
9307 ///
9308 /// If the DIE represents a type, then the function computes the name
9309 /// of the type.  Otherwise, if the DIE represents a decl then the
9310 /// function computes the name of the decl.  Note that a DIE of tag
9311 /// DW_TAG_subprogram is going to be considered as a "type" -- just
9312 /// like if it was a DW_TAG_subroutine_type.
9313 ///
9314 /// @param ctxt the read context.
9315 ///
9316 /// @param die the DIE to consider.
9317 ///
9318 /// @param where_offset where we are logically at in the DIE stream.
9319 ///
9320 /// @return a copy of the computed name.
9321 static string
die_qualified_name(const read_context & ctxt,const Dwarf_Die * die,size_t where)9322 die_qualified_name(const read_context& ctxt, const Dwarf_Die* die, size_t where)
9323 {
9324   if (die_is_type(die))
9325     return die_qualified_type_name(ctxt, die, where);
9326   else if (die_is_decl(die))
9327     return die_qualified_decl_name(ctxt, die, where);
9328   return "";
9329 }
9330 
9331 /// Test if the qualified name of a given type should be empty.
9332 ///
9333 /// The reason why the name of a DIE with a given tag would be empty
9334 /// is that libabigail's internal representation doesn't yet support
9335 /// that tag; or if the DIE's qualified name is built from names of
9336 /// sub-types DIEs whose tags are not yet supported.
9337 ///
9338 /// @param ctxt the reading context.
9339 ///
9340 /// @param die the DIE to consider.
9341 ///
9342 /// @param where where we are logically at, in the DIE stream.
9343 ///
9344 /// @param qualified_name the qualified name of the DIE.  This is set
9345 /// only iff the function returns false.
9346 ///
9347 /// @return true if the qualified name of the DIE is empty.
9348 static bool
die_qualified_type_name_empty(const read_context & ctxt,const Dwarf_Die * die,size_t where,string & qualified_name)9349 die_qualified_type_name_empty(const read_context& ctxt,
9350 			      const Dwarf_Die* die,
9351 			      size_t where, string &qualified_name)
9352 {
9353   if (!die)
9354     return true;
9355 
9356   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9357 
9358   string qname;
9359   if (tag == DW_TAG_typedef
9360       || tag == DW_TAG_pointer_type
9361       || tag == DW_TAG_reference_type
9362       || tag == DW_TAG_rvalue_reference_type
9363       || tag == DW_TAG_array_type
9364       || tag == DW_TAG_const_type
9365       || tag == DW_TAG_volatile_type
9366       || tag == DW_TAG_restrict_type)
9367     {
9368       Dwarf_Die underlying_type_die;
9369       if (die_die_attribute(die, DW_AT_type, underlying_type_die))
9370 	{
9371 	  string name =
9372 	    die_qualified_type_name(ctxt, &underlying_type_die, where);
9373 	  if (name.empty())
9374 	    return true;
9375 	}
9376     }
9377   else
9378     {
9379       string name = die_qualified_type_name(ctxt, die, where);
9380       if (name.empty())
9381 	return true;
9382     }
9383 
9384   qname = die_qualified_type_name(ctxt, die, where);
9385   if (qname.empty())
9386     return true;
9387 
9388   qualified_name = qname;
9389   return false;
9390 }
9391 
9392 /// Given the DIE that represents a function type, compute the names
9393 /// of the following properties the function's type:
9394 ///
9395 ///   - return type
9396 ///   - enclosing class (if the function is a member function)
9397 ///   - function parameter types
9398 ///
9399 /// When the function we are looking at is a member function, it also
9400 /// tells if it's const.
9401 ///
9402 /// @param ctxt the reading context.
9403 ///
9404 /// @param die the DIE of the function or function type we are looking
9405 /// at.
9406 ///
9407 /// @param where_offset where we are logically at in the DIE stream.
9408 ///
9409 /// @param pretty_print if set to yes, the type names are going to be
9410 /// pretty-printed names; otherwise, they are just qualified type
9411 /// names.
9412 ///
9413 /// @param return_type_name out parameter.  This contains the name of
9414 /// the return type of the function.
9415 ///
9416 /// @param class_name out parameter.  If the function is a member
9417 /// function, this contains the name of the enclosing class.
9418 ///
9419 /// @param parm_names out parameter.  This vector is set to the names
9420 /// of the types of the parameters of the function.
9421 ///
9422 /// @param is_const out parameter.  If the function is a member
9423 /// function, this is set to true iff the member function is const.
9424 ///
9425 /// @param is_static out parameter.  If the function is a static
9426 /// member function, then this is set to true.
9427 static void
die_return_and_parm_names_from_fn_type_die(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,bool pretty_print,string & return_type_name,string & class_name,vector<string> & parm_names,bool & is_const,bool & is_static)9428 die_return_and_parm_names_from_fn_type_die(const read_context& ctxt,
9429 					   const Dwarf_Die* die,
9430 					   size_t where_offset,
9431 					   bool pretty_print,
9432 					   string &return_type_name,
9433 					   string &class_name,
9434 					   vector<string>& parm_names,
9435 					   bool& is_const,
9436 					   bool& is_static)
9437 {
9438   Dwarf_Die child;
9439   Dwarf_Die ret_type_die;
9440   if (!die_die_attribute(die, DW_AT_type, ret_type_die))
9441     return_type_name = "void";
9442   else
9443     return_type_name =
9444       pretty_print
9445       ? ctxt.get_die_pretty_representation(&ret_type_die, where_offset)
9446       : ctxt.get_die_qualified_type_name(&ret_type_die, where_offset);
9447 
9448   if (return_type_name.empty())
9449     return_type_name = "void";
9450 
9451   Dwarf_Die object_pointer_die, class_die;
9452   bool is_method_type =
9453     die_function_type_is_method_type(ctxt, die, where_offset,
9454 				     object_pointer_die,
9455 				     class_die, is_static);
9456 
9457   is_const = false;
9458   if (is_method_type)
9459     {
9460       class_name = ctxt.get_die_qualified_type_name(&class_die, where_offset);
9461 
9462       Dwarf_Die this_pointer_die;
9463       Dwarf_Die pointed_to_die;
9464       if (!is_static
9465 	  && die_die_attribute(&object_pointer_die, DW_AT_type,
9466 			       this_pointer_die))
9467 	if (die_die_attribute(&this_pointer_die, DW_AT_type, pointed_to_die))
9468 	  if (dwarf_tag(&pointed_to_die) == DW_TAG_const_type)
9469 	    is_const = true;
9470 
9471       string fn_name = die_name(die);
9472       string non_qualified_class_name = die_name(&class_die);
9473       bool is_ctor = fn_name == non_qualified_class_name;
9474       bool is_dtor = !fn_name.empty() && fn_name[0] == '~';
9475 
9476       if (is_ctor || is_dtor)
9477 	return_type_name.clear();
9478     }
9479 
9480   if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
9481     do
9482       {
9483 	int child_tag = dwarf_tag(&child);
9484 	if (child_tag == DW_TAG_formal_parameter)
9485 	  {
9486 	    Dwarf_Die parm_type_die;
9487 	    if (!die_die_attribute(&child, DW_AT_type, parm_type_die))
9488 	      continue;
9489 	    string qualified_name =
9490 	      pretty_print
9491 	      ? ctxt.get_die_pretty_representation(&parm_type_die, where_offset)
9492 	      : ctxt.get_die_qualified_type_name(&parm_type_die, where_offset);
9493 
9494 	    if (qualified_name.empty())
9495 	      continue;
9496 	    parm_names.push_back(qualified_name);
9497 	  }
9498 	else if (child_tag == DW_TAG_unspecified_parameters)
9499 	  {
9500 	    // This is a variadic function parameter.
9501 	    parm_names.push_back("variadic parameter type");
9502 	    // After a DW_TAG_unspecified_parameters tag, we shouldn't
9503 	    // keep reading for parameters.  The
9504 	    // unspecified_parameters TAG should be the last parameter
9505 	    // that we record. For instance, if there are multiple
9506 	    // DW_TAG_unspecified_parameters DIEs then we should care
9507 	    // only for the first one.
9508 	    break;
9509 	  }
9510       }
9511     while (dwarf_siblingof(&child, &child) == 0);
9512 
9513   if (class_name.empty())
9514     {
9515       Dwarf_Die parent_die;
9516       if (get_parent_die(ctxt, die, parent_die, where_offset))
9517 	{
9518 	  if (die_is_class_type(&parent_die))
9519 	    class_name =
9520 	      ctxt.get_die_qualified_type_name(&parent_die, where_offset);
9521 	}
9522     }
9523 }
9524 
9525 /// This computes the signature of the a function declaration
9526 /// represented by a DIE.
9527 ///
9528 /// @param ctxt the reading context.
9529 ///
9530 /// @param fn_die the DIE of the function to consider.
9531 ///
9532 /// @param where_offset where we are logically at in the stream of
9533 /// DIEs.
9534 ///
9535 /// @return a copy of the computed function signature string.
9536 static string
die_function_signature(const read_context & ctxt,const Dwarf_Die * fn_die,size_t where_offset)9537 die_function_signature(const read_context& ctxt,
9538 		       const Dwarf_Die *fn_die,
9539 		       size_t where_offset)
9540 {
9541 
9542   translation_unit::language lang;
9543   bool has_lang = false;
9544   if ((has_lang = ctxt.get_die_language(fn_die, lang)))
9545     {
9546       // In a binary originating from the C language, it's OK to use
9547       // the linkage name of the function as a key for the map which
9548       // is meant to reduce the number of DIE comparisons involved
9549       // during DIE canonicalization computation.
9550       if (is_c_language(lang))
9551 	{
9552 	  string fn_name = die_linkage_name(fn_die);
9553 	  if (fn_name.empty())
9554 	    fn_name = die_name(fn_die);
9555 	  return fn_name;
9556 	}
9557     }
9558 
9559   // TODO: When we can structurally compare DIEs originating from C++
9560   // as well, we can use the linkage name of functions in C++ too, to
9561   // reduce the number of comparisons involved during DIE
9562   // canonicalization.
9563 
9564   string return_type_name;
9565   Dwarf_Die ret_type_die;
9566   if (die_die_attribute(fn_die, DW_AT_type, ret_type_die))
9567     return_type_name = ctxt.get_die_qualified_type_name(&ret_type_die,
9568 							where_offset);
9569 
9570   if (return_type_name.empty())
9571     return_type_name = "void";
9572 
9573   Dwarf_Die scope_die;
9574   string scope_name;
9575   if (get_scope_die(ctxt, fn_die, where_offset, scope_die))
9576     scope_name = ctxt.get_die_qualified_name(&scope_die, where_offset);
9577   string fn_name = die_name(fn_die);
9578   if (!scope_name.empty())
9579     fn_name  = scope_name + "::" + fn_name;
9580 
9581   string class_name;
9582   vector<string> parm_names;
9583   bool is_const = false;
9584   bool is_static = false;
9585 
9586   die_return_and_parm_names_from_fn_type_die(ctxt, fn_die, where_offset,
9587 					     /*pretty_print=*/false,
9588 					     return_type_name, class_name,
9589 					     parm_names, is_const, is_static);
9590 
9591   bool is_virtual = die_is_virtual(fn_die);
9592 
9593   string repr = class_name.empty() ? "function" : "method";
9594   if (is_virtual)
9595     repr += " virtual";
9596 
9597   if (!return_type_name.empty())
9598     repr += " " + return_type_name;
9599 
9600   repr += " " + fn_name;
9601 
9602   // Now parameters.
9603   repr += "(";
9604   bool some_parm_emitted = false;
9605   for (vector<string>::const_iterator i = parm_names.begin();
9606        i != parm_names.end();
9607        ++i)
9608     {
9609       if (i != parm_names.begin())
9610 	{
9611 	  if (some_parm_emitted)
9612 	    repr += ", ";
9613 	}
9614       else
9615 	if (!is_static && !class_name.empty())
9616 	  // We are printing a non-static method name, skip the implicit "this"
9617 	  // parameter type.
9618 	  continue;
9619       repr += *i;
9620       some_parm_emitted = true;
9621     }
9622   repr += ")";
9623 
9624   if (is_const)
9625     {
9626       ABG_ASSERT(!class_name.empty());
9627       repr += " const";
9628     }
9629 
9630   return repr;
9631 }
9632 
9633 /// Return a pretty string representation of a type, for internal purposes.
9634 ///
9635 /// By internal purpose, we mean things like key-ing types for lookup
9636 /// purposes and so on.
9637 ///
9638 /// Note that this function is also used to pretty print functions.
9639 /// For functions, it prints the *type* of the function.
9640 ///
9641 /// @param ctxt the context to use.
9642 ///
9643 /// @param the DIE of the type to pretty print.
9644 ///
9645 /// @param where_offset where we logically are placed when calling
9646 /// this.  It's useful to handle inclusion of DW_TAG_compile_unit
9647 /// entries.
9648 ///
9649 /// @return the resulting pretty representation.
9650 static string
die_pretty_print_type(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9651 die_pretty_print_type(read_context& ctxt,
9652 		      const Dwarf_Die* die,
9653 		      size_t where_offset)
9654 {
9655   if (!die
9656       || (!die_is_type(die)
9657 	  && dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_subprogram))
9658     return "";
9659 
9660   string repr;
9661 
9662   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9663   switch (tag)
9664     {
9665     case DW_TAG_string_type:
9666       // For now, we won't try to go get the actual representation of
9667       // the string because this would make things more complicated;
9668       // for that we'd need to interpret some location expressions to
9669       // get the length of the string.  And for dynamically allocated
9670       // strings, the result of the location expression evaluation
9671       // might not even be a constant.  So at the moment I consider
9672       // this to be a lot of hassle for no great return.  Until proven
9673       // otherwise, of course.
9674       repr = "string type";
9675 
9676     case DW_TAG_unspecified_type:
9677     case DW_TAG_ptr_to_member_type:
9678       break;
9679 
9680     case DW_TAG_namespace:
9681       repr = "namespace " + ctxt.get_die_qualified_type_name(die, where_offset);
9682       break;
9683 
9684     case DW_TAG_base_type:
9685       repr = ctxt.get_die_qualified_type_name(die, where_offset);
9686       break;
9687 
9688     case DW_TAG_typedef:
9689       {
9690 	string qualified_name;
9691 	if (!die_qualified_type_name_empty(ctxt, die,
9692 					   where_offset,
9693 					   qualified_name))
9694 	  repr = "typedef " + qualified_name;
9695       }
9696       break;
9697 
9698     case DW_TAG_const_type:
9699     case DW_TAG_volatile_type:
9700     case DW_TAG_restrict_type:
9701     case DW_TAG_pointer_type:
9702     case DW_TAG_reference_type:
9703     case DW_TAG_rvalue_reference_type:
9704       repr = ctxt.get_die_qualified_type_name(die, where_offset);
9705       break;
9706 
9707     case DW_TAG_enumeration_type:
9708       {
9709 	string qualified_name =
9710 	  ctxt.get_die_qualified_type_name(die, where_offset);
9711 	repr = "enum " + qualified_name;
9712       }
9713       break;
9714 
9715     case DW_TAG_structure_type:
9716     case DW_TAG_class_type:
9717       {
9718 	string qualified_name =
9719 	  ctxt.get_die_qualified_type_name(die, where_offset);
9720 	repr = "class " + qualified_name;
9721       }
9722       break;
9723 
9724     case DW_TAG_union_type:
9725       {
9726 	string qualified_name =
9727 	  ctxt.get_die_qualified_type_name(die, where_offset);
9728 	repr = "union " + qualified_name;
9729       }
9730       break;
9731 
9732     case DW_TAG_array_type:
9733       {
9734 	Dwarf_Die element_type_die;
9735 	if (!die_die_attribute(die, DW_AT_type, element_type_die))
9736 	  break;
9737 	string element_type_name =
9738 	  ctxt.get_die_qualified_type_name(&element_type_die, where_offset);
9739 	if (element_type_name.empty())
9740 	  break;
9741 
9742 	array_type_def::subranges_type subranges;
9743 	build_subranges_from_array_type_die(ctxt, die, subranges, where_offset,
9744 					    /*associate_type_to_die=*/false);
9745 
9746 	repr = element_type_name;
9747 	repr += array_type_def::subrange_type::vector_as_string(subranges);
9748       }
9749       break;
9750 
9751     case DW_TAG_subrange_type:
9752       {
9753 	// So this can be generated by Ada, on its own; that is, not
9754 	// as a subtype of an array.  In that case we need to handle
9755 	// it properly.
9756 
9757 	// For now, we consider that the pretty printed name of the
9758 	// subrange type is its name.  We might need something more
9759 	// advance, should the needs of the users get more
9760 	// complicated.
9761 	repr += die_qualified_type_name(ctxt, die, where_offset);
9762       }
9763       break;
9764 
9765     case DW_TAG_subroutine_type:
9766     case DW_TAG_subprogram:
9767       {
9768 	string return_type_name;
9769 	string class_name;
9770 	vector<string> parm_names;
9771 	bool is_const = false;
9772 	bool is_static = false;
9773 
9774 	die_return_and_parm_names_from_fn_type_die(ctxt, die, where_offset,
9775 						   /*pretty_print=*/true,
9776 						   return_type_name, class_name,
9777 						   parm_names, is_const,
9778 						   is_static);
9779 	if (class_name.empty())
9780 	  repr = "function type";
9781 	else
9782 	  repr = "method type";
9783 	repr += " " + ctxt.get_die_qualified_type_name(die, where_offset);
9784       }
9785       break;
9786 
9787     case DW_TAG_set_type:
9788     case DW_TAG_file_type:
9789     case DW_TAG_packed_type:
9790     case DW_TAG_thrown_type:
9791     case DW_TAG_interface_type:
9792     case DW_TAG_shared_type:
9793       ABG_ASSERT_NOT_REACHED;
9794     }
9795 
9796   return repr;
9797 }
9798 
9799 /// Return a pretty string representation of a declaration, for
9800 /// internal purposes.
9801 ///
9802 /// By internal purpose, we mean things like key-ing declarations for
9803 /// lookup purposes and so on.
9804 ///
9805 /// Note that this function is also used to pretty print functions.
9806 /// For functions, it prints the signature of the function.
9807 ///
9808 /// @param ctxt the context to use.
9809 ///
9810 /// @param the DIE of the declaration to pretty print.
9811 ///
9812 /// @param where_offset where we logically are placed when calling
9813 /// this.  It's useful to handle inclusion of DW_TAG_compile_unit
9814 /// entries.
9815 ///
9816 /// @return the resulting pretty representation.
9817 static string
die_pretty_print_decl(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9818 die_pretty_print_decl(read_context& ctxt,
9819 		      const Dwarf_Die* die,
9820 		      size_t where_offset)
9821 {
9822   if (!die || !die_is_decl(die))
9823     return "";
9824 
9825   string repr;
9826 
9827   int tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
9828   switch (tag)
9829     {
9830     case DW_TAG_namespace:
9831       repr = "namespace " + die_qualified_name(ctxt, die, where_offset);
9832       break;
9833 
9834     case DW_TAG_member:
9835     case DW_TAG_variable:
9836       {
9837 	string type_repr = "void";
9838 	Dwarf_Die type_die;
9839 	if (die_die_attribute(die, DW_AT_type, type_die))
9840 	  type_repr = die_qualified_type_name(ctxt, &type_die, where_offset);
9841 	repr = die_qualified_name(ctxt, die, where_offset);
9842 	if (!repr.empty())
9843 	  repr = type_repr + " " + repr;
9844       }
9845       break;
9846 
9847     case DW_TAG_subprogram:
9848       repr = die_function_signature(ctxt, die, where_offset);
9849       break;
9850 
9851     default:
9852       break;
9853     }
9854   return repr;
9855 }
9856 
9857 /// Compute the pretty printed representation of an artifact
9858 /// represented by a DIE.
9859 ///
9860 /// If the DIE is a type, compute the its pretty representation as a
9861 /// type; otherwise, if it's a declaration, compute its pretty
9862 /// representation as a declaration.  Note for For instance, that a
9863 /// DW_TAG_subprogram DIE is going to be represented as a function
9864 /// *type*.
9865 ///
9866 /// @param ctxt the reading context.
9867 ///
9868 /// @param die the DIE to consider.
9869 ///
9870 /// @param where_offset we in the DIE stream we are logically at.
9871 ///
9872 /// @return a copy of the pretty printed artifact.
9873 static string
die_pretty_print(read_context & ctxt,const Dwarf_Die * die,size_t where_offset)9874 die_pretty_print(read_context& ctxt, const Dwarf_Die* die, size_t where_offset)
9875 {
9876   if (die_is_type(die))
9877     return die_pretty_print_type(ctxt, die, where_offset);
9878   else if (die_is_decl(die))
9879     return die_pretty_print_decl(ctxt, die, where_offset);
9880   return "";
9881 }
9882 
9883 // -----------------------------------
9884 // </die pretty printer>
9885 // -----------------------------------
9886 
9887 
9888 // ----------------------------------
9889 // <die comparison engine>
9890 // ---------------------------------
9891 
9892 /// Compares two decls DIEs
9893 ///
9894 /// This works only for DIEs emitted by the C language.
9895 ///
9896 /// This implementation doesn't yet support namespaces.
9897 ///
9898 /// This is a subroutine of compare_dies.
9899 ///
9900 /// @return true iff @p l equals @p r.
9901 static bool
compare_as_decl_dies(const Dwarf_Die * l,const Dwarf_Die * r)9902 compare_as_decl_dies(const Dwarf_Die *l, const Dwarf_Die *r)
9903 {
9904   ABG_ASSERT(l && r);
9905 
9906   int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
9907   int r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
9908   if (l_tag != r_tag)
9909     return false;
9910 
9911   bool result = false;
9912 
9913   if (l_tag == DW_TAG_subprogram || l_tag == DW_TAG_variable)
9914     {
9915       // Fast path for functions and global variables.
9916       if (compare_dies_string_attribute_value(l, r, DW_AT_linkage_name,
9917 					      result)
9918 	  || compare_dies_string_attribute_value(l, r, DW_AT_MIPS_linkage_name,
9919 						 result))
9920 	{
9921 	  if (!result)
9922 	    return false;
9923 	}
9924 
9925       if (compare_dies_string_attribute_value(l, r, DW_AT_name,
9926 					      result))
9927 	{
9928 	  if (!result)
9929 	    return false;
9930 	}
9931       return true;
9932     }
9933 
9934   // Fast path for types.
9935   if (compare_dies_string_attribute_value(l, r, DW_AT_name,
9936 					  result))
9937     return result;
9938   return true;
9939 }
9940 
9941 /// Compares two type DIEs
9942 ///
9943 /// This is a subroutine of compare_dies.
9944 ///
9945 /// @param l the left operand of the comparison operator.
9946 ///
9947 /// @param r the right operand of the comparison operator.
9948 ///
9949 /// @return true iff @p l equals @p r.
9950 static bool
compare_as_type_dies(const Dwarf_Die * l,const Dwarf_Die * r)9951 compare_as_type_dies(const Dwarf_Die *l, const Dwarf_Die *r)
9952 {
9953   ABG_ASSERT(l && r);
9954   ABG_ASSERT(die_is_type(l));
9955   ABG_ASSERT(die_is_type(r));
9956 
9957   if (dwarf_tag(const_cast<Dwarf_Die*>(l)) == DW_TAG_string_type
9958       && dwarf_tag(const_cast<Dwarf_Die*>(r)) == DW_TAG_string_type
9959       && (dwarf_dieoffset(const_cast<Dwarf_Die*>(l))
9960 	  != dwarf_dieoffset(const_cast<Dwarf_Die*>(r))))
9961     // For now, we cannot compare DW_TAG_string_type because of its
9962     // string_length attribute that is a location descriptor that is
9963     // not necessarily a constant.  So it's super hard to evaluate it
9964     // in a libabigail context.  So for now, we just say that all
9965     // DW_TAG_string_type DIEs are different, by default.
9966     return false;
9967 
9968   uint64_t l_size = 0, r_size = 0;
9969   die_size_in_bits(l, l_size);
9970   die_size_in_bits(r, r_size);
9971 
9972   return l_size == r_size;
9973 }
9974 
9975 /// Test if two DIEs representing function declarations have the same
9976 /// linkage name, and thus are considered equal if they are C or C++,
9977 /// because the two DIEs represent functions in the same binary.
9978 ///
9979 /// If the DIEs don't have a linkage name, the function compares their
9980 /// name.  But in that case, the caller of the function must know that
9981 /// in C++ for instance, that doesn't imply that the two functions are
9982 /// equal.
9983 ///
9984 /// @param ctxt the @ref read_context to consider.
9985 ///
9986 /// @param l the first function DIE to consider.
9987 ///
9988 /// @param r the second function DIE to consider.
9989 ///
9990 /// @return true iff the function represented by @p l have the same
9991 /// linkage name as the function represented by @p r.
9992 static bool
fn_die_equal_by_linkage_name(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r)9993 fn_die_equal_by_linkage_name(const read_context &ctxt,
9994 			     const Dwarf_Die *l,
9995 			     const Dwarf_Die *r)
9996 {
9997   if (!!l != !!r)
9998     return false;
9999 
10000   if (!l)
10001     return false;
10002 
10003   int tag = dwarf_tag(const_cast<Dwarf_Die*>(l));
10004   ABG_ASSERT(tag == DW_TAG_subprogram);
10005   tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
10006   ABG_ASSERT(tag == DW_TAG_subprogram);
10007 
10008   string lname = die_name(l), rname = die_name(r);
10009   string llinkage_name = die_linkage_name(l),
10010     rlinkage_name = die_linkage_name(r);
10011 
10012   if (ctxt.die_is_in_c_or_cplusplus(l)
10013       && ctxt.die_is_in_c_or_cplusplus(r))
10014     {
10015       if (!llinkage_name.empty() && !rlinkage_name.empty())
10016 	return llinkage_name == rlinkage_name;
10017       else if (!!llinkage_name.empty() != !!rlinkage_name.empty())
10018 	return false;
10019       else
10020 	return lname == rname;
10021     }
10022 
10023   return (!llinkage_name.empty()
10024 	  && !rlinkage_name.empty()
10025 	  && llinkage_name == rlinkage_name);
10026 }
10027 
10028 /// Compare two DIEs emitted by a C compiler.
10029 ///
10030 /// @param ctxt the read context used to load the DWARF information.
10031 ///
10032 /// @param l the left-hand-side argument of this comparison operator.
10033 ///
10034 /// @param r the righ-hand-side argument of this comparison operator.
10035 ///
10036 /// @param aggregates_being_compared this holds the names of the set
10037 /// of aggregates being compared.  It's used by the comparison
10038 /// function to avoid recursing infinitely when faced with types
10039 /// referencing themselves through pointers or references.  By
10040 /// default, just pass an empty instance of @ref istring_set_type to
10041 /// it.
10042 ///
10043 /// @param update_canonical_dies_on_the_fly if true, when two
10044 /// sub-types compare equal (during the comparison of @p l and @p r)
10045 /// update their canonical type.  That way, two types of the same name
10046 /// are structurally compared to each other only once.  So the
10047 /// non-linear structural comparison of two types of the same name
10048 /// only happen once.
10049 ///
10050 /// @return true iff @p l equals @p r.
10051 static bool
compare_dies(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r,istring_set_type & aggregates_being_compared,bool update_canonical_dies_on_the_fly)10052 compare_dies(const read_context& ctxt,
10053 	     const Dwarf_Die *l, const Dwarf_Die *r,
10054 	     istring_set_type& aggregates_being_compared,
10055 	     bool update_canonical_dies_on_the_fly)
10056 {
10057   ABG_ASSERT(l);
10058   ABG_ASSERT(r);
10059 
10060   int l_tag = dwarf_tag(const_cast<Dwarf_Die*>(l)),
10061     r_tag = dwarf_tag(const_cast<Dwarf_Die*>(r));
10062 
10063   if (l_tag != r_tag)
10064     return false;
10065 
10066   Dwarf_Off l_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(l)),
10067     r_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(r));
10068   Dwarf_Off l_canonical_die_offset = 0, r_canonical_die_offset = 0;
10069   const die_source l_die_source = ctxt.get_die_source(l);
10070   const die_source r_die_source = ctxt.get_die_source(r);
10071 
10072   // If 'l' and 'r' already have canonical DIEs, then just compare the
10073   // offsets of their canonical DIEs.
10074   bool l_has_canonical_die_offset =
10075     (l_canonical_die_offset =
10076      ctxt.get_canonical_die_offset(l_offset, l_die_source,
10077 				   /*die_as_type=*/true));
10078 
10079   bool r_has_canonical_die_offset =
10080     (r_canonical_die_offset =
10081      ctxt.get_canonical_die_offset(r_offset, r_die_source,
10082 				   /*die_as_type=*/true));
10083 
10084   if (l_has_canonical_die_offset && r_has_canonical_die_offset)
10085     return l_canonical_die_offset == r_canonical_die_offset;
10086 
10087   bool result = true;
10088 
10089   switch (l_tag)
10090     {
10091     case DW_TAG_base_type:
10092     case DW_TAG_string_type:
10093       if (!compare_as_type_dies(l, r)
10094 	  || !compare_as_decl_dies(l, r))
10095 	result = false;
10096       break;
10097 
10098     case DW_TAG_typedef:
10099     case DW_TAG_pointer_type:
10100     case DW_TAG_reference_type:
10101     case DW_TAG_rvalue_reference_type:
10102     case DW_TAG_const_type:
10103     case DW_TAG_volatile_type:
10104     case DW_TAG_restrict_type:
10105       {
10106 	if (!compare_as_type_dies(l, r))
10107 	  {
10108 	    result = false;
10109 	    break;
10110 	  }
10111 
10112 	bool from_the_same_tu = false;
10113 	if (!pointer_or_qual_die_of_anonymous_class_type(l)
10114 	    && compare_dies_cu_decl_file(l, r, from_the_same_tu)
10115 	    && from_the_same_tu)
10116 	  {
10117 	    // These two typedefs, pointer, reference, or qualified
10118 	    // types have the same name and are defined in the same TU.
10119 	    // They thus ought to be the same.
10120 	    //
10121 	    // Note that pointers, reference or qualified types to
10122 	    // anonymous types are not taking into account here because
10123 	    // those always need to be structurally compared.
10124 	    result = true;
10125 	    break;
10126 	  }
10127       }
10128 
10129       {
10130 	// No fancy optimization in this case.  We need to
10131 	// structurally compare the two DIEs.
10132 	Dwarf_Die lu_type_die, ru_type_die;
10133 	bool lu_is_void, ru_is_void;
10134 
10135 	lu_is_void = !die_die_attribute(l, DW_AT_type, lu_type_die);
10136 	ru_is_void = !die_die_attribute(r, DW_AT_type, ru_type_die);
10137 
10138 	if (lu_is_void && ru_is_void)
10139 	  result = true;
10140 	else if (lu_is_void != ru_is_void)
10141 	  result = false;
10142 	else
10143 	  result = compare_dies(ctxt, &lu_type_die, &ru_type_die,
10144 				aggregates_being_compared,
10145 				update_canonical_dies_on_the_fly);
10146       }
10147       break;
10148 
10149     case DW_TAG_enumeration_type:
10150       if (!compare_as_type_dies(l, r)
10151 	  || !compare_as_decl_dies(l, r))
10152 	result = false;
10153       else
10154 	{
10155 	  // Walk the enumerators.
10156 	  Dwarf_Die l_enumtor, r_enumtor;
10157 	  bool found_l_enumtor, found_r_enumtor;
10158 
10159 	  for (found_l_enumtor = dwarf_child(const_cast<Dwarf_Die*>(l),
10160 					     &l_enumtor) == 0,
10161 		 found_r_enumtor = dwarf_child(const_cast<Dwarf_Die*>(r),
10162 					       &r_enumtor) == 0;
10163 	       found_l_enumtor && found_r_enumtor;
10164 	       found_l_enumtor = dwarf_siblingof(&l_enumtor, &l_enumtor) == 0,
10165 		 found_r_enumtor = dwarf_siblingof(&r_enumtor, &r_enumtor) == 0)
10166 	    {
10167 	      int l_tag = dwarf_tag(&l_enumtor), r_tag = dwarf_tag(&r_enumtor);
10168 	      if ( l_tag != r_tag)
10169 		{
10170 		  result = false;
10171 		  break;
10172 		}
10173 
10174 	      if (l_tag != DW_TAG_enumerator)
10175 		continue;
10176 
10177 	      uint64_t l_val = 0, r_val = 0;
10178 	      die_unsigned_constant_attribute(&l_enumtor,
10179 					      DW_AT_const_value,
10180 					      l_val);
10181 	      die_unsigned_constant_attribute(&r_enumtor,
10182 					      DW_AT_const_value,
10183 					      r_val);
10184 	      if (l_val != r_val)
10185 		{
10186 		  result = false;
10187 		  break;
10188 		}
10189 	    }
10190 	  if (found_l_enumtor != found_r_enumtor )
10191 	    result = false;
10192 
10193 	}
10194       break;
10195 
10196     case DW_TAG_structure_type:
10197     case DW_TAG_union_type:
10198       {
10199 	interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
10200 	interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
10201 
10202 	if ((aggregates_being_compared.find(ln)
10203 	     != aggregates_being_compared.end())
10204 	    || (aggregates_being_compared.find(rn)
10205 		!= aggregates_being_compared.end()))
10206 	  result = true;
10207 	else if (!compare_as_decl_dies(l, r))
10208 	  result = false;
10209 	else if (!compare_as_type_dies(l, r))
10210 	  result = false;
10211 	else
10212 	  {
10213 	    aggregates_being_compared.insert(ln);
10214 	    aggregates_being_compared.insert(rn);
10215 
10216 	    Dwarf_Die l_member, r_member;
10217 	    bool found_l_member, found_r_member;
10218 	    for (found_l_member = dwarf_child(const_cast<Dwarf_Die*>(l),
10219 					      &l_member) == 0,
10220 		   found_r_member = dwarf_child(const_cast<Dwarf_Die*>(r),
10221 						&r_member) == 0;
10222 		 found_l_member && found_r_member;
10223 		 found_l_member = dwarf_siblingof(&l_member, &l_member) == 0,
10224 		   found_r_member = dwarf_siblingof(&r_member, &r_member) == 0)
10225 	      {
10226 		int l_tag = dwarf_tag(&l_member), r_tag = dwarf_tag(&r_member);
10227 		if (l_tag != r_tag)
10228 		  {
10229 		    result = false;
10230 		    break;
10231 		  }
10232 
10233 		if (l_tag != DW_TAG_member && l_tag != DW_TAG_variable)
10234 		  continue;
10235 
10236 		if (!compare_dies(ctxt, &l_member, &r_member,
10237 				  aggregates_being_compared,
10238 				  update_canonical_dies_on_the_fly))
10239 		  {
10240 		    result = false;
10241 		    break;
10242 		  }
10243 	      }
10244 	    if (found_l_member != found_r_member)
10245 	      result = false;
10246 
10247 	    aggregates_being_compared.erase(ln);
10248 	    aggregates_being_compared.erase(rn);
10249 	  }
10250       }
10251       break;
10252 
10253     case DW_TAG_array_type:
10254       {
10255 	Dwarf_Die l_child, r_child;
10256 	bool found_l_child, found_r_child;
10257 	for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
10258 					 &l_child) == 0,
10259 	       found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
10260 					   &r_child) == 0;
10261 	     found_l_child && found_r_child;
10262 	     found_l_child = dwarf_siblingof(&l_child, &l_child) == 0,
10263 	       found_r_child = dwarf_siblingof(&r_child, &r_child) == 0)
10264 	  {
10265 	    int l_child_tag = dwarf_tag(&l_child),
10266 	      r_child_tag = dwarf_tag(&r_child);
10267 	    if (l_child_tag == DW_TAG_subrange_type
10268 		|| r_child_tag == DW_TAG_subrange_type)
10269 	      if (!compare_dies(ctxt, &l_child, &r_child,
10270 				aggregates_being_compared,
10271 				update_canonical_dies_on_the_fly))
10272 		{
10273 		  result = false;
10274 		  break;
10275 		}
10276 	  }
10277 	if (found_l_child != found_r_child)
10278 	  result = false;
10279       }
10280       break;
10281 
10282     case DW_TAG_subrange_type:
10283       {
10284 	uint64_t l_lower_bound = 0, r_lower_bound = 0,
10285 	  l_upper_bound = 0, r_upper_bound = 0;
10286 	die_unsigned_constant_attribute(l, DW_AT_lower_bound, l_lower_bound);
10287 	die_unsigned_constant_attribute(r, DW_AT_lower_bound, r_lower_bound);
10288 	if (!die_unsigned_constant_attribute(l, DW_AT_upper_bound,
10289 					     l_upper_bound))
10290 	  {
10291 	    uint64_t l_count = 0;
10292 	    if (die_unsigned_constant_attribute(l, DW_AT_count, l_count))
10293 	      {
10294 		l_upper_bound = l_lower_bound + l_count;
10295 		if (l_upper_bound)
10296 		  --l_upper_bound;
10297 	      }
10298 	  }
10299 	if (!die_unsigned_constant_attribute(r, DW_AT_upper_bound,
10300 					     r_upper_bound))
10301 	  {
10302 	    uint64_t r_count = 0;
10303 	    if (die_unsigned_constant_attribute(l, DW_AT_count, r_count))
10304 	      {
10305 		r_upper_bound = r_lower_bound + r_count;
10306 		if (r_upper_bound)
10307 		  --r_upper_bound;
10308 	      }
10309 	  }
10310 
10311 	if ((l_lower_bound != r_lower_bound)
10312 	    || (l_upper_bound != r_upper_bound))
10313 	  result = false;
10314       }
10315       break;
10316 
10317     case DW_TAG_subroutine_type:
10318     case DW_TAG_subprogram:
10319       {
10320 	interned_string ln = ctxt.get_die_pretty_type_representation(l, 0);
10321 	interned_string rn = ctxt.get_die_pretty_type_representation(r, 0);
10322 
10323 	if ((aggregates_being_compared.find(ln)
10324 	     != aggregates_being_compared.end())
10325 	    || (aggregates_being_compared.find(rn)
10326 		!= aggregates_being_compared.end()))
10327 	  {
10328 	    result = true;
10329 	    break;
10330 	  }
10331 	else if (l_tag == DW_TAG_subroutine_type)
10332 	  {
10333 	    // So, we are looking at types that are pointed to by a
10334 	    // function pointer.  These are not real concrete function
10335 	    // types, rather, they denote interfaces of functions.
10336 	    //
10337 	    // If the textual representations are different, then
10338 	    // obviously they are different DIEs.
10339 	    if (ln != rn)
10340 	      {
10341 		result = false;
10342 		break;
10343 	      }
10344 
10345 	    // So if their textual representation are the same and
10346 	    // they come from the same TU, then they represent the
10347 	    // same DIE.
10348 	    bool from_the_same_tu = false;
10349 	    if (compare_dies_cu_decl_file(l, r, from_the_same_tu)
10350 		&& from_the_same_tu)
10351 	      {
10352 		result = true;
10353 		break;
10354 	      }
10355 	  }
10356 
10357 	if (l_tag == DW_TAG_subprogram
10358 	    && !fn_die_equal_by_linkage_name(ctxt, l, r))
10359 	  {
10360 	    result = false;
10361 	    break;
10362 	  }
10363 	else if (l_tag == DW_TAG_subprogram
10364 		 && ctxt.die_is_in_c(l) && ctxt.die_is_in_c(r)
10365 		 /*&& fn_die_equal_by_linkage_name(ctxt, l, r)*/)
10366 	  {
10367 	    result = true;
10368 	    break;
10369 	  }
10370 	else if (!ctxt.die_is_in_c(l) && !ctxt.die_is_in_c(r))
10371 	  {
10372 	    // In C, we cannot have two different functions with the
10373 	    // same linkage name in a given binary.  But here we are
10374 	    // looking at DIEs that don't originate from C.  So we
10375 	    // need to compare return types and parameter types.
10376 	    Dwarf_Die l_return_type, r_return_type;
10377 	    bool l_return_type_is_void = !die_die_attribute(l, DW_AT_type,
10378 							    l_return_type);
10379 	    bool r_return_type_is_void = !die_die_attribute(r, DW_AT_type,
10380 							    r_return_type);
10381 	    if (l_return_type_is_void != r_return_type_is_void
10382 		|| (!l_return_type_is_void
10383 		    && !compare_dies(ctxt,
10384 				     &l_return_type, &r_return_type,
10385 				     aggregates_being_compared,
10386 				     update_canonical_dies_on_the_fly)))
10387 	      result = false;
10388 	    else
10389 	      {
10390 		Dwarf_Die l_child, r_child;
10391 		bool found_l_child, found_r_child;
10392 		for (found_l_child = dwarf_child(const_cast<Dwarf_Die*>(l),
10393 						 &l_child) == 0,
10394 		       found_r_child = dwarf_child(const_cast<Dwarf_Die*>(r),
10395 						   &r_child) == 0;
10396 		     found_l_child && found_r_child;
10397 		     found_l_child = dwarf_siblingof(&l_child,
10398 						     &l_child) == 0,
10399 		       found_r_child = dwarf_siblingof(&r_child,
10400 						       &r_child)==0)
10401 		  {
10402 		    int l_child_tag = dwarf_tag(&l_child);
10403 		    int r_child_tag = dwarf_tag(&r_child);
10404 		    if (l_child_tag != r_child_tag
10405 			|| (l_child_tag == DW_TAG_formal_parameter
10406 			    && !compare_dies(ctxt, &l_child, &r_child,
10407 					     aggregates_being_compared,
10408 					     update_canonical_dies_on_the_fly)))
10409 		      {
10410 			result = false;
10411 			break;
10412 		      }
10413 		  }
10414 		if (found_l_child != found_r_child)
10415 		  result = false;
10416 	      }
10417 	  }
10418 
10419 	aggregates_being_compared.erase(ln);
10420 	aggregates_being_compared.erase(rn);
10421       }
10422       break;
10423 
10424     case DW_TAG_formal_parameter:
10425       {
10426 	Dwarf_Die l_type, r_type;
10427 	bool l_type_is_void = !die_die_attribute(l, DW_AT_type, l_type);
10428 	bool r_type_is_void = !die_die_attribute(r, DW_AT_type, r_type);
10429 	if ((l_type_is_void != r_type_is_void)
10430 	    || !compare_dies(ctxt, &l_type, &r_type,
10431 			     aggregates_being_compared,
10432 			     update_canonical_dies_on_the_fly))
10433 	  result = false;
10434       }
10435       break;
10436 
10437     case DW_TAG_variable:
10438     case DW_TAG_member:
10439       if (compare_as_decl_dies(l, r))
10440 	{
10441 	  // Compare the offsets of the data members
10442 	  if (l_tag == DW_TAG_member)
10443 	    {
10444 	      int64_t l_offset_in_bits = 0, r_offset_in_bits = 0;
10445 	      die_member_offset(ctxt, l, l_offset_in_bits);
10446 	      die_member_offset(ctxt, r, r_offset_in_bits);
10447 	      if (l_offset_in_bits != r_offset_in_bits)
10448 		result = false;
10449 	    }
10450 	  if (result)
10451 	    {
10452 	      // Compare the types of the data members or variables.
10453 	      Dwarf_Die l_type, r_type;
10454 	      ABG_ASSERT(die_die_attribute(l, DW_AT_type, l_type));
10455 	      ABG_ASSERT(die_die_attribute(r, DW_AT_type, r_type));
10456 	      if (aggregates_being_compared.size () < 5)
10457 		{
10458 		  if (!compare_dies(ctxt, &l_type, &r_type,
10459 				    aggregates_being_compared,
10460 				    update_canonical_dies_on_the_fly))
10461 		    result = false;
10462 		}
10463 	      else
10464 		{
10465 		  if (!compare_as_type_dies(&l_type, &r_type)
10466 		      ||!compare_as_decl_dies(&l_type, &r_type))
10467 		    return false;
10468 		}
10469 	    }
10470 	}
10471       else
10472 	result = false;
10473       break;
10474 
10475     case DW_TAG_class_type:
10476     case DW_TAG_enumerator:
10477     case DW_TAG_packed_type:
10478     case DW_TAG_set_type:
10479     case DW_TAG_file_type:
10480     case DW_TAG_ptr_to_member_type:
10481     case DW_TAG_thrown_type:
10482     case DW_TAG_interface_type:
10483     case DW_TAG_unspecified_type:
10484     case DW_TAG_shared_type:
10485     case DW_TAG_compile_unit:
10486     case DW_TAG_namespace:
10487     case DW_TAG_module:
10488     case DW_TAG_constant:
10489     case DW_TAG_partial_unit:
10490     case DW_TAG_imported_unit:
10491     case DW_TAG_dwarf_procedure:
10492     case DW_TAG_imported_declaration:
10493     case DW_TAG_entry_point:
10494     case DW_TAG_label:
10495     case DW_TAG_lexical_block:
10496     case DW_TAG_unspecified_parameters:
10497     case DW_TAG_variant:
10498     case DW_TAG_common_block:
10499     case DW_TAG_common_inclusion:
10500     case DW_TAG_inheritance:
10501     case DW_TAG_inlined_subroutine:
10502     case DW_TAG_with_stmt:
10503     case DW_TAG_access_declaration:
10504     case DW_TAG_catch_block:
10505     case DW_TAG_friend:
10506     case DW_TAG_namelist:
10507     case DW_TAG_namelist_item:
10508     case DW_TAG_template_type_parameter:
10509     case DW_TAG_template_value_parameter:
10510     case DW_TAG_try_block:
10511     case DW_TAG_variant_part:
10512     case DW_TAG_imported_module:
10513     case DW_TAG_condition:
10514     case DW_TAG_type_unit:
10515     case DW_TAG_template_alias:
10516     case DW_TAG_lo_user:
10517     case DW_TAG_MIPS_loop:
10518     case DW_TAG_format_label:
10519     case DW_TAG_function_template:
10520     case DW_TAG_class_template:
10521     case DW_TAG_GNU_BINCL:
10522     case DW_TAG_GNU_EINCL:
10523     case DW_TAG_GNU_template_template_param:
10524     case DW_TAG_GNU_template_parameter_pack:
10525     case DW_TAG_GNU_formal_parameter_pack:
10526     case DW_TAG_GNU_call_site:
10527     case DW_TAG_GNU_call_site_parameter:
10528     case DW_TAG_hi_user:
10529       ABG_ASSERT_NOT_REACHED;
10530     }
10531 
10532   if (result == true
10533       && update_canonical_dies_on_the_fly
10534       && is_canonicalizeable_type_tag(l_tag))
10535     {
10536       // If 'l' has no canonical DIE and if 'r' has one, then propagage
10537       // the canonical DIE of 'r' to 'l'.
10538       //
10539       // In case 'r' has no canonical DIE, then compute it, and then
10540       // propagate that canonical DIE to 'r'.
10541       const die_source l_source = ctxt.get_die_source(l);
10542       const die_source r_source = ctxt.get_die_source(r);
10543 
10544       if (!l_has_canonical_die_offset
10545 	  // A DIE can be equivalent only to another DIE of the same
10546 	  // source.
10547 	  && l_source == r_source)
10548 	{
10549 	  if (!r_has_canonical_die_offset)
10550 	    ctxt.compute_canonical_die_offset(r, r_canonical_die_offset,
10551 					      /*die_as_type=*/true);
10552 	  ABG_ASSERT(r_canonical_die_offset);
10553 	  ctxt.set_canonical_die_offset(l, r_canonical_die_offset,
10554 					/*die_as_type=*/true);
10555 	}
10556     }
10557   return result;
10558 }
10559 
10560 /// Compare two DIEs emitted by a C compiler.
10561 ///
10562 /// @param ctxt the read context used to load the DWARF information.
10563 ///
10564 /// @param l the left-hand-side argument of this comparison operator.
10565 ///
10566 /// @param r the righ-hand-side argument of this comparison operator.
10567 ///
10568 /// @param update_canonical_dies_on_the_fly if yes, then this function
10569 /// updates the canonical DIEs of sub-type DIEs of 'l' and 'r', while
10570 /// comparing l and r.  This helps in making so that sub-type DIEs of
10571 /// 'l' and 'r' are compared structurally only once.  This is how we
10572 /// turn this exponential comparison problem into a problem that is a
10573 /// closer to a linear one.
10574 ///
10575 /// @return true iff @p l equals @p r.
10576 static bool
compare_dies(const read_context & ctxt,const Dwarf_Die * l,const Dwarf_Die * r,bool update_canonical_dies_on_the_fly)10577 compare_dies(const read_context& ctxt,
10578 	     const Dwarf_Die *l,
10579 	     const Dwarf_Die *r,
10580 	     bool update_canonical_dies_on_the_fly)
10581 {
10582   istring_set_type aggregates_being_compared;
10583   return compare_dies(ctxt, l, r, aggregates_being_compared,
10584 		      update_canonical_dies_on_the_fly);
10585 }
10586 
10587 // ----------------------------------
10588 // </die comparison engine>
10589 // ---------------------------------
10590 
10591 /// Get the point where a DW_AT_import DIE is used to import a given
10592 /// (unit) DIE, between two DIEs.
10593 ///
10594 /// @param ctxt the dwarf reading context to consider.
10595 ///
10596 /// @param partial_unit_offset the imported unit for which we want to
10597 /// know the insertion point.  This is usually a partial unit (with
10598 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
10599 /// so.
10600 ///
10601 /// @param first_die_offset the offset of the DIE from which this
10602 /// function starts looking for the import point of
10603 /// @partial_unit_offset.  Note that this offset is excluded from the
10604 /// set of potential solutions.
10605 ///
10606 /// @param first_die_cu_offset the offset of the (compilation) unit
10607 /// that @p first_die_cu_offset belongs to.
10608 ///
10609 /// @param source where the DIE of first_die_cu_offset unit comes
10610 /// from.
10611 ///
10612 /// @param last_die_offset the offset of the last DIE of the up to
10613 /// which this function looks for the import point of @p
10614 /// partial_unit_offset.  Note that this offset is excluded from the
10615 /// set of potential solutions.
10616 ///
10617 /// @param imported_point_offset.  The resulting
10618 /// imported_point_offset.  Note that if the imported DIE @p
10619 /// partial_unit_offset is not found between @p first_die_offset and
10620 /// @p last_die_offset, this parameter is left untouched by this
10621 /// function.
10622 ///
10623 /// @return true iff an imported unit is found between @p
10624 /// first_die_offset and @p last_die_offset.
10625 static bool
find_import_unit_point_between_dies(const read_context & ctxt,size_t partial_unit_offset,Dwarf_Off first_die_offset,Dwarf_Off first_die_cu_offset,die_source source,size_t last_die_offset,size_t & imported_point_offset)10626 find_import_unit_point_between_dies(const read_context& ctxt,
10627 				    size_t		partial_unit_offset,
10628 				    Dwarf_Off		first_die_offset,
10629 				    Dwarf_Off		first_die_cu_offset,
10630 				    die_source		source,
10631 				    size_t		last_die_offset,
10632 				    size_t&		imported_point_offset)
10633 {
10634   const tu_die_imported_unit_points_map_type& tu_die_imported_unit_points_map =
10635     ctxt.tu_die_imported_unit_points_map(source);
10636 
10637   tu_die_imported_unit_points_map_type::const_iterator iter =
10638     tu_die_imported_unit_points_map.find(first_die_cu_offset);
10639 
10640   ABG_ASSERT(iter != tu_die_imported_unit_points_map.end());
10641 
10642   const imported_unit_points_type& imported_unit_points = iter->second;
10643   if (imported_unit_points.empty())
10644     return false;
10645 
10646   imported_unit_points_type::const_iterator b = imported_unit_points.begin();
10647   imported_unit_points_type::const_iterator e = imported_unit_points.end();
10648 
10649   find_lower_bound_in_imported_unit_points(imported_unit_points,
10650 					   first_die_offset,
10651 					   b);
10652 
10653   if (last_die_offset != static_cast<size_t>(-1))
10654     find_lower_bound_in_imported_unit_points(imported_unit_points,
10655 					     last_die_offset,
10656 					     e);
10657 
10658   if (e != imported_unit_points.end())
10659     {
10660       for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
10661 	if (i->imported_unit_die_off == partial_unit_offset)
10662 	  {
10663 	    imported_point_offset = i->offset_of_import ;
10664 	    return true;
10665 	  }
10666 
10667       for (imported_unit_points_type::const_iterator i = e; i >= b; --i)
10668 	{
10669 	  if (find_import_unit_point_between_dies(ctxt,
10670 						  partial_unit_offset,
10671 						  i->imported_unit_child_off,
10672 						  i->imported_unit_cu_off,
10673 						  i->imported_unit_die_source,
10674 						  /*(Dwarf_Off)*/-1,
10675 						  imported_point_offset))
10676 	    return true;
10677 	}
10678     }
10679   else
10680     {
10681       for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
10682 	if (i->imported_unit_die_off == partial_unit_offset)
10683 	  {
10684 	    imported_point_offset = i->offset_of_import ;
10685 	    return true;
10686 	  }
10687 
10688       for (imported_unit_points_type::const_iterator i = b; i != e; ++i)
10689 	{
10690 	  if (find_import_unit_point_between_dies(ctxt,
10691 						  partial_unit_offset,
10692 						  i->imported_unit_child_off,
10693 						  i->imported_unit_cu_off,
10694 						  i->imported_unit_die_source,
10695 						  /*(Dwarf_Off)*/-1,
10696 						  imported_point_offset))
10697 	    return true;
10698 	}
10699     }
10700 
10701   return false;
10702 }
10703 
10704 /// In the current translation unit, get the last point where a
10705 /// DW_AT_import DIE is used to import a given (unit) DIE, before a
10706 /// given DIE is found.  That given DIE is called the limit DIE.
10707 ///
10708 /// Said otherwise, this function returns the last import point of a
10709 /// unit, before a limit.
10710 ///
10711 /// @param ctxt the dwarf reading context to consider.
10712 ///
10713 /// @param partial_unit_offset the imported unit for which we want to
10714 /// know the insertion point of.  This is usually a partial unit (with
10715 /// tag DW_TAG_partial_unit) but it does not necessarily have to be
10716 /// so.
10717 ///
10718 /// @param where_offset the offset of the limit DIE.
10719 ///
10720 /// @param imported_point_offset.  The resulting imported_point_offset.
10721 /// Note that if the imported DIE @p partial_unit_offset is not found
10722 /// before @p die_offset, this is set to the last @p
10723 /// partial_unit_offset found under @p parent_die.
10724 ///
10725 /// @return true iff an imported unit is found before @p die_offset.
10726 /// Note that if an imported unit is found after @p die_offset then @p
10727 /// imported_point_offset is set and the function return false.
10728 static bool
find_import_unit_point_before_die(const read_context & ctxt,size_t partial_unit_offset,size_t where_offset,size_t & imported_point_offset)10729 find_import_unit_point_before_die(const read_context&	ctxt,
10730 				  size_t		partial_unit_offset,
10731 				  size_t		where_offset,
10732 				  size_t&		imported_point_offset)
10733 {
10734   size_t import_point_offset = 0;
10735   Dwarf_Die first_die_of_tu;
10736 
10737   if (dwarf_child(const_cast<Dwarf_Die*>(ctxt.cur_tu_die()),
10738 		  &first_die_of_tu) != 0)
10739     return false;
10740 
10741   Dwarf_Die cu_die_memory;
10742   Dwarf_Die *cu_die;
10743 
10744   cu_die = dwarf_diecu(const_cast<Dwarf_Die*>(&first_die_of_tu),
10745 		       &cu_die_memory, 0, 0);
10746 
10747   if (find_import_unit_point_between_dies(ctxt, partial_unit_offset,
10748 					  dwarf_dieoffset(&first_die_of_tu),
10749 					  dwarf_dieoffset(cu_die),
10750 					  /*source=*/PRIMARY_DEBUG_INFO_DIE_SOURCE,
10751 					  where_offset,
10752 					  import_point_offset))
10753     {
10754       imported_point_offset = import_point_offset;
10755       return true;
10756     }
10757 
10758   if (import_point_offset)
10759     {
10760       imported_point_offset = import_point_offset;
10761       return true;
10762     }
10763 
10764   return false;
10765 }
10766 
10767 /// Return the parent DIE for a given DIE.
10768 ///
10769 /// Note that the function build_die_parent_map() must have been
10770 /// called before this one can work.  This function either succeeds or
10771 /// aborts the current process.
10772 ///
10773 /// @param ctxt the read context to consider.
10774 ///
10775 /// @param die the DIE for which we want the parent.
10776 ///
10777 /// @param parent_die the output parameter set to the parent die of
10778 /// @p die.  Its memory must be allocated and handled by the caller.
10779 ///
10780 /// @param where_offset the offset of the DIE where we are "logically"
10781 /// positionned at, in the DIE tree.  This is useful when @p die is
10782 /// e.g, DW_TAG_partial_unit that can be included in several places in
10783 /// the DIE tree.
10784 ///
10785 /// @return true if the function could get a parent DIE, false
10786 /// otherwise.
10787 static bool
get_parent_die(const read_context & ctxt,const Dwarf_Die * die,Dwarf_Die & parent_die,size_t where_offset)10788 get_parent_die(const read_context&	ctxt,
10789 	       const Dwarf_Die*	die,
10790 	       Dwarf_Die&		parent_die,
10791 	       size_t			where_offset)
10792 {
10793   ABG_ASSERT(ctxt.dwarf());
10794 
10795   const die_source source = ctxt.get_die_source(die);
10796 
10797   const offset_offset_map_type& m = ctxt.die_parent_map(source);
10798   offset_offset_map_type::const_iterator i =
10799     m.find(dwarf_dieoffset(const_cast<Dwarf_Die*>(die)));
10800 
10801   if (i == m.end())
10802     return false;
10803 
10804   switch (source)
10805     {
10806     case PRIMARY_DEBUG_INFO_DIE_SOURCE:
10807       ABG_ASSERT(dwarf_offdie(ctxt.dwarf(), i->second, &parent_die));
10808       break;
10809     case ALT_DEBUG_INFO_DIE_SOURCE:
10810       ABG_ASSERT(dwarf_offdie(ctxt.alt_dwarf(), i->second, &parent_die));
10811       break;
10812     case TYPE_UNIT_DIE_SOURCE:
10813       ABG_ASSERT(dwarf_offdie_types(ctxt.dwarf(), i->second, &parent_die));
10814       break;
10815     case NO_DEBUG_INFO_DIE_SOURCE:
10816     case NUMBER_OF_DIE_SOURCES:
10817       ABG_ASSERT_NOT_REACHED;
10818     }
10819 
10820   if (dwarf_tag(&parent_die) == DW_TAG_partial_unit)
10821     {
10822       if (where_offset == 0)
10823 	{
10824 	  parent_die = *ctxt.cur_tu_die();
10825 	  return true;
10826 	}
10827       size_t import_point_offset = 0;
10828       bool found =
10829 	find_import_unit_point_before_die(ctxt,
10830 					  dwarf_dieoffset(&parent_die),
10831 					  where_offset,
10832 					  import_point_offset);
10833       if (!found)
10834 	// It looks like parent_die (which comes from the alternate
10835 	// debug info file) hasn't been imported into this TU.  So,
10836 	// Let's assume its logical parent is the DIE of the current
10837 	// TU.
10838 	parent_die = *ctxt.cur_tu_die();
10839       else
10840 	{
10841 	  ABG_ASSERT(import_point_offset);
10842 	  Dwarf_Die import_point_die;
10843 	  ABG_ASSERT(dwarf_offdie(ctxt.dwarf(),
10844 			      import_point_offset,
10845 			      &import_point_die));
10846 	  return get_parent_die(ctxt, &import_point_die,
10847 				parent_die, where_offset);
10848 	}
10849     }
10850 
10851   return true;
10852 }
10853 
10854 /// Get the DIE representing the scope of a given DIE.
10855 ///
10856 /// Please note that when the DIE we are looking at has a
10857 /// DW_AT_specification or DW_AT_abstract_origin attribute, the scope
10858 /// DIE is the parent DIE of the DIE referred to by that attribute.
10859 /// This is the only case where a scope DIE is different from the
10860 /// parent DIE of a given DIE.
10861 ///
10862 /// Also note that if the current translation unit is from C, then
10863 /// this returns the global scope.
10864 ///
10865 /// @param ctxt the reading context to use.
10866 ///
10867 /// @param die the DIE to consider.
10868 ///
10869 /// @param where_offset where we are logically at in the DIE stream.
10870 ///
10871 /// @param scope_die out parameter.  This is set to the resulting
10872 /// scope DIE iff the function returns true.
10873 static bool
get_scope_die(const read_context & ctxt,const Dwarf_Die * die,size_t where_offset,Dwarf_Die & scope_die)10874 get_scope_die(const read_context&	ctxt,
10875 	      const Dwarf_Die*		die,
10876 	      size_t			where_offset,
10877 	      Dwarf_Die&		scope_die)
10878 {
10879   if (is_c_language(ctxt.cur_transl_unit()->get_language()))
10880     {
10881       ABG_ASSERT(dwarf_tag(const_cast<Dwarf_Die*>(die)) != DW_TAG_member);
10882       return dwarf_diecu(const_cast<Dwarf_Die*>(die), &scope_die, 0, 0);
10883     }
10884 
10885   Dwarf_Die logical_parent_die;
10886   if (die_die_attribute(die, DW_AT_specification,
10887 			logical_parent_die, false)
10888       || die_die_attribute(die, DW_AT_abstract_origin,
10889 			   logical_parent_die, false))
10890     return get_scope_die(ctxt, &logical_parent_die, where_offset, scope_die);
10891 
10892   if (!get_parent_die(ctxt, die, scope_die, where_offset))
10893     return false;
10894 
10895   if (dwarf_tag(&scope_die) == DW_TAG_subprogram
10896       || dwarf_tag(&scope_die) == DW_TAG_subroutine_type
10897       || dwarf_tag(&scope_die) == DW_TAG_array_type)
10898     return get_scope_die(ctxt, &scope_die, where_offset, scope_die);
10899 
10900   return true;
10901 }
10902 
10903 /// Return the abigail IR node representing the scope of a given DIE.
10904 ///
10905 /// Note that it is the logical scope that is returned.  That is, if
10906 /// the DIE has a DW_AT_specification or DW_AT_abstract_origin
10907 /// attribute, it's the scope of the referred-to DIE (via these
10908 /// attributes) that is returned.
10909 ///
10910 /// Also note that if the current translation unit is from C, then
10911 /// this returns the global scope.
10912 ///
10913 /// @param ctxt the dwarf reading context to use.
10914 ///
10915 /// @param die the DIE to get the scope for.
10916 ///
10917 /// @param called_from_public_decl is true if this function has been
10918 /// initially called within the context of a public decl.
10919 ///
10920 /// @param where_offset the offset of the DIE where we are "logically"
10921 /// positionned at, in the DIE tree.  This is useful when @p die is
10922 /// e.g, DW_TAG_partial_unit that can be included in several places in
10923 /// the DIE tree.
10924 static scope_decl_sptr
get_scope_for_die(read_context & ctxt,Dwarf_Die * die,bool called_for_public_decl,size_t where_offset)10925 get_scope_for_die(read_context& ctxt,
10926 		  Dwarf_Die*	die,
10927 		  bool		called_for_public_decl,
10928 		  size_t	where_offset)
10929 {
10930   const die_source source_of_die = ctxt.get_die_source(die);
10931 
10932   translation_unit::language die_lang = translation_unit::LANG_UNKNOWN;
10933   ctxt.get_die_language(die, die_lang);
10934   if (is_c_language(die_lang))
10935     {
10936       ABG_ASSERT(dwarf_tag(die) != DW_TAG_member);
10937       return ctxt.global_scope();
10938     }
10939 
10940   Dwarf_Die cloned_die;
10941   if (die_die_attribute(die, DW_AT_specification, cloned_die, false)
10942       || die_die_attribute(die, DW_AT_abstract_origin, cloned_die, false))
10943     return get_scope_for_die(ctxt, &cloned_die,
10944 			     called_for_public_decl,
10945 			     where_offset);
10946 
10947   Dwarf_Die parent_die;
10948 
10949   if (!get_parent_die(ctxt, die, parent_die, where_offset))
10950     return ctxt.nil_scope();
10951 
10952   if (dwarf_tag(&parent_die) == DW_TAG_compile_unit
10953       || dwarf_tag(&parent_die) == DW_TAG_partial_unit
10954       || dwarf_tag(&parent_die) == DW_TAG_type_unit)
10955     {
10956       if (dwarf_tag(&parent_die) == DW_TAG_partial_unit
10957 	  || dwarf_tag(&parent_die) == DW_TAG_type_unit)
10958 	{
10959 	  ABG_ASSERT(source_of_die == ALT_DEBUG_INFO_DIE_SOURCE
10960 		 || source_of_die == TYPE_UNIT_DIE_SOURCE);
10961 	  return ctxt.cur_transl_unit()->get_global_scope();
10962 	}
10963 
10964       // For top level DIEs like DW_TAG_compile_unit, we just want to
10965       // return the global scope for the corresponding translation
10966       // unit.  This must have been set by
10967       // build_translation_unit_and_add_to_ir if we already started to
10968       // build the translation unit of parent_die.  Otherwise, just
10969       // return the global scope of the current translation unit.
10970       die_tu_map_type::const_iterator i =
10971 	ctxt.die_tu_map().find(dwarf_dieoffset(&parent_die));
10972       if (i != ctxt.die_tu_map().end())
10973 	return i->second->get_global_scope();
10974       return ctxt.cur_transl_unit()->get_global_scope();
10975     }
10976 
10977   scope_decl_sptr s;
10978   type_or_decl_base_sptr d;
10979   if (dwarf_tag(&parent_die) == DW_TAG_subprogram
10980       || dwarf_tag(&parent_die) == DW_TAG_array_type)
10981     // this is an entity defined in a scope that is a function.
10982     // Normally, I would say that this should be dropped.  But I have
10983     // seen a case where a typedef DIE needed by a function parameter
10984     // was defined right before the parameter, under the scope of the
10985     // function.  Yeah, weird.  So if I drop the typedef DIE, I'd drop
10986     // the function parm too.  So for that case, let's say that the
10987     // scope is the scope of the function itself.  Note that this is
10988     // an error of the DWARF emitter.  We should never see this DIE in
10989     // this context.
10990     {
10991       scope_decl_sptr s = get_scope_for_die(ctxt, &parent_die,
10992 					    called_for_public_decl,
10993 					    where_offset);
10994       if (is_anonymous_type_die(die))
10995 	// For anonymous type that have nothing to do in a function or
10996 	// array type context, let's put it in the containing
10997 	// namespace.  That is, do not let it be in a containing class
10998 	// or union where it has nothing to do.
10999 	while (is_class_or_union_type(s))
11000 	  {
11001 	    if (!get_parent_die(ctxt, &parent_die, parent_die, where_offset))
11002 	      return ctxt.nil_scope();
11003 	    s = get_scope_for_die(ctxt, &parent_die,
11004 				  called_for_public_decl,
11005 				  where_offset);
11006 	  }
11007       return s;
11008     }
11009   else
11010     d = build_ir_node_from_die(ctxt, &parent_die,
11011 			       called_for_public_decl,
11012 			       where_offset);
11013   s =  dynamic_pointer_cast<scope_decl>(d);
11014   if (!s)
11015     // this is an entity defined in someting that is not a scope.
11016     // Let's drop it.
11017     return ctxt.nil_scope();
11018 
11019   class_decl_sptr cl = dynamic_pointer_cast<class_decl>(d);
11020   if (cl && cl->get_is_declaration_only())
11021     {
11022       scope_decl_sptr scop  =
11023 	dynamic_pointer_cast<scope_decl>(cl->get_definition_of_declaration());
11024       if (scop)
11025 	s = scop;
11026       else
11027 	s = cl;
11028     }
11029   return s;
11030 }
11031 
11032 /// Convert a DWARF constant representing the value of the
11033 /// DW_AT_language property into the translation_unit::language
11034 /// enumerator.
11035 ///
11036 /// @param l the DWARF constant to convert.
11037 ///
11038 /// @return the resulting translation_unit::language enumerator.
11039 static translation_unit::language
dwarf_language_to_tu_language(size_t l)11040 dwarf_language_to_tu_language(size_t l)
11041 {
11042   switch (l)
11043     {
11044     case DW_LANG_C89:
11045       return translation_unit::LANG_C89;
11046     case DW_LANG_C:
11047       return translation_unit::LANG_C;
11048     case DW_LANG_Ada83:
11049       return translation_unit::LANG_Ada83;
11050     case DW_LANG_C_plus_plus:
11051       return translation_unit::LANG_C_plus_plus;
11052     case DW_LANG_Cobol74:
11053       return translation_unit::LANG_Cobol74;
11054     case DW_LANG_Cobol85:
11055       return translation_unit::LANG_Cobol85;
11056     case DW_LANG_Fortran77:
11057       return translation_unit::LANG_Fortran77;
11058     case DW_LANG_Fortran90:
11059       return translation_unit::LANG_Fortran90;
11060     case DW_LANG_Pascal83:
11061       return translation_unit::LANG_Pascal83;
11062     case DW_LANG_Modula2:
11063       return translation_unit::LANG_Modula2;
11064     case DW_LANG_Java:
11065       return translation_unit::LANG_Java;
11066     case DW_LANG_C99:
11067       return translation_unit::LANG_C99;
11068     case DW_LANG_Ada95:
11069       return translation_unit::LANG_Ada95;
11070     case DW_LANG_Fortran95:
11071       return translation_unit::LANG_Fortran95;
11072     case DW_LANG_PL1:
11073       return translation_unit::LANG_PL1;
11074     case DW_LANG_ObjC:
11075       return translation_unit::LANG_ObjC;
11076     case DW_LANG_ObjC_plus_plus:
11077       return translation_unit::LANG_ObjC_plus_plus;
11078 
11079 #ifdef HAVE_DW_LANG_Rust_enumerator
11080     case DW_LANG_Rust:
11081       return translation_unit::LANG_Rust;
11082 #endif
11083 
11084 #ifdef HAVE_DW_LANG_UPC_enumerator
11085     case DW_LANG_UPC:
11086       return translation_unit::LANG_UPC;
11087 #endif
11088 
11089 #ifdef HAVE_DW_LANG_D_enumerator
11090     case DW_LANG_D:
11091       return translation_unit::LANG_D;
11092 #endif
11093 
11094 #ifdef HAVE_DW_LANG_Python_enumerator
11095     case DW_LANG_Python:
11096       return translation_unit::LANG_Python;
11097 #endif
11098 
11099 #ifdef HAVE_DW_LANG_Go_enumerator
11100     case DW_LANG_Go:
11101       return translation_unit::LANG_Go;
11102 #endif
11103 
11104 #ifdef HAVE_DW_LANG_C11_enumerator
11105     case DW_LANG_C11:
11106       return translation_unit::LANG_C11;
11107 #endif
11108 
11109 #ifdef HAVE_DW_LANG_C_plus_plus_03_enumerator
11110       case DW_LANG_C_plus_plus_03:
11111 	return translation_unit::LANG_C_plus_plus_03;
11112 #endif
11113 
11114 #ifdef HAVE_DW_LANG_C_plus_plus_11_enumerator
11115     case DW_LANG_C_plus_plus_11:
11116       return translation_unit::LANG_C_plus_plus_11;
11117 #endif
11118 
11119 #ifdef HAVE_DW_LANG_C_plus_plus_14_enumerator
11120     case DW_LANG_C_plus_plus_14:
11121       return translation_unit::LANG_C_plus_plus_14;
11122 #endif
11123 
11124 #ifdef HAVE_DW_LANG_Mips_Assembler_enumerator
11125     case DW_LANG_Mips_Assembler:
11126       return translation_unit::LANG_Mips_Assembler;
11127 #endif
11128 
11129     default:
11130       return translation_unit::LANG_UNKNOWN;
11131     }
11132 }
11133 
11134 /// Get the default array lower bound value as defined by the DWARF
11135 /// specification, version 4, depending on the language of the
11136 /// translation unit.
11137 ///
11138 /// @param l the language of the translation unit.
11139 ///
11140 /// @return the default array lower bound value.
11141 static uint64_t
get_default_array_lower_bound(translation_unit::language l)11142 get_default_array_lower_bound(translation_unit::language l)
11143 {
11144   int value = 0;
11145   switch (l)
11146     {
11147     case translation_unit::LANG_UNKNOWN:
11148       value = 0;
11149       break;
11150     case translation_unit::LANG_Cobol74:
11151     case translation_unit::LANG_Cobol85:
11152       value = 1;
11153       break;
11154     case translation_unit::LANG_C89:
11155     case translation_unit::LANG_C99:
11156     case translation_unit::LANG_C11:
11157     case translation_unit::LANG_C:
11158     case translation_unit::LANG_C_plus_plus_03:
11159     case translation_unit::LANG_C_plus_plus_11:
11160     case translation_unit::LANG_C_plus_plus_14:
11161     case translation_unit::LANG_C_plus_plus:
11162     case translation_unit::LANG_ObjC:
11163     case translation_unit::LANG_ObjC_plus_plus:
11164     case translation_unit::LANG_Rust:
11165       value = 0;
11166       break;
11167     case translation_unit::LANG_Fortran77:
11168     case translation_unit::LANG_Fortran90:
11169     case translation_unit::LANG_Fortran95:
11170     case translation_unit::LANG_Ada83:
11171     case translation_unit::LANG_Ada95:
11172     case translation_unit::LANG_Pascal83:
11173     case translation_unit::LANG_Modula2:
11174       value = 1;
11175       break;
11176     case translation_unit::LANG_Java:
11177       value = 0;
11178       break;
11179     case translation_unit::LANG_PL1:
11180       value = 1;
11181       break;
11182     case translation_unit::LANG_UPC:
11183     case translation_unit::LANG_D:
11184     case translation_unit::LANG_Python:
11185     case translation_unit::LANG_Go:
11186     case translation_unit::LANG_Mips_Assembler:
11187       value = 0;
11188       break;
11189     }
11190 
11191   return value;
11192 }
11193 
11194 /// For a given offset, find the lower bound of a sorted vector of
11195 /// imported unit point offset.
11196 ///
11197 /// The lower bound is the smallest point (the point with the smallest
11198 /// offset) which is the greater than a given offset.
11199 ///
11200 /// @param imported_unit_points_type the sorted vector  of imported
11201 /// unit points.
11202 ///
11203 /// @param val the offset to consider when looking for the lower
11204 /// bound.
11205 ///
11206 /// @param r an iterator to the lower bound found.  This parameter is
11207 /// set iff the function returns true.
11208 ///
11209 /// @return true iff the lower bound has been found.
11210 static bool
find_lower_bound_in_imported_unit_points(const imported_unit_points_type & p,Dwarf_Off val,imported_unit_points_type::const_iterator & r)11211 find_lower_bound_in_imported_unit_points(const imported_unit_points_type& p,
11212 					 Dwarf_Off val,
11213 					 imported_unit_points_type::const_iterator& r)
11214 {
11215   imported_unit_point v(val);
11216   imported_unit_points_type::const_iterator result =
11217     std::lower_bound(p.begin(), p.end(), v);
11218 
11219   bool is_ok = result != p.end();
11220 
11221   if (is_ok)
11222     r = result;
11223 
11224   return is_ok;
11225 }
11226 
11227 /// Given a DW_TAG_compile_unit, build and return the corresponding
11228 /// abigail::translation_unit ir node.  Note that this function
11229 /// recursively reads the children dies of the current DIE and
11230 /// populates the resulting translation unit.
11231 ///
11232 /// @param ctxt the read_context to use.
11233 ///
11234 /// @param die the DW_TAG_compile_unit DIE to consider.
11235 ///
11236 /// @param address_size the size of the addresses expressed in this
11237 /// translation unit in general.
11238 ///
11239 /// @return a pointer to the resulting translation_unit.
11240 static translation_unit_sptr
build_translation_unit_and_add_to_ir(read_context & ctxt,Dwarf_Die * die,char address_size)11241 build_translation_unit_and_add_to_ir(read_context&	ctxt,
11242 				     Dwarf_Die*	die,
11243 				     char		address_size)
11244 {
11245   translation_unit_sptr result;
11246 
11247   if (!die)
11248     return result;
11249   ABG_ASSERT(dwarf_tag(die) == DW_TAG_compile_unit);
11250 
11251   // Clear the part of the context that is dependent on the translation
11252   // unit we are reading.
11253   ctxt.clear_per_translation_unit_data();
11254 
11255   ctxt.cur_tu_die(die);
11256 
11257   string path = die_string_attribute(die, DW_AT_name);
11258   if (path == "<artificial>")
11259     {
11260       // This is a file artificially generated by the compiler, so its
11261       // name is '<artificial>'.  As we want all different translation
11262       // units to have unique path names, let's suffix this path name
11263       // with its die offset.
11264       std::ostringstream o;
11265       o << path << "-" << std::hex << dwarf_dieoffset(die);
11266       path = o.str();
11267     }
11268   string compilation_dir = die_string_attribute(die, DW_AT_comp_dir);
11269 
11270   uint64_t lang = 0;
11271   die_unsigned_constant_attribute(die, DW_AT_language, lang);
11272   translation_unit::language language = dwarf_language_to_tu_language(lang);
11273 
11274   corpus_sptr corp = ctxt.current_corpus();
11275 
11276   if (ctxt.merge_translation_units())
11277     {
11278       // See if there is already a translation for the address_size
11279       // and language. If so, just reuse that one.
11280       for (const auto& tu : corp->get_translation_units())
11281 	{
11282 	  if (tu->get_address_size() == address_size
11283 	      && tu->get_language() == language)
11284 	    {
11285 	      result = tu;
11286 	      break;
11287 	    }
11288 	}
11289     }
11290   else
11291     {
11292       // See if the same translation unit exits already in the current
11293       // corpus.  Sometimes, the same translation unit can be present
11294       // several times in the same debug info.  The content of the
11295       // different instances of the translation unit are different.  So to
11296       // represent that, we are going to re-use the same translation
11297       // unit.  That is, it's going to be the union of all the translation
11298       // units of the same path.
11299       const std::string& abs_path =
11300 	  compilation_dir.empty() ? path : compilation_dir + "/" + path;
11301       result = corp->find_translation_unit(abs_path);
11302     }
11303 
11304   if (!result)
11305     {
11306       result.reset(new translation_unit(ctxt.env(),
11307 					(ctxt.merge_translation_units()
11308 					 ? "" : path),
11309 					address_size));
11310       if (!ctxt.merge_translation_units())
11311 	result->set_compilation_dir_path(compilation_dir);
11312       ctxt.current_corpus()->add(result);
11313       result->set_language(language);
11314     }
11315 
11316   ctxt.cur_transl_unit(result);
11317   ctxt.die_tu_map()[dwarf_dieoffset(die)] = result;
11318 
11319   Dwarf_Die child;
11320   if (dwarf_child(die, &child) != 0)
11321     return result;
11322 
11323   result->set_is_constructed(false);
11324 
11325   do
11326     build_ir_node_from_die(ctxt, &child,
11327 			   die_is_public_decl(&child),
11328 			   dwarf_dieoffset(&child));
11329   while (dwarf_siblingof(&child, &child) == 0);
11330 
11331   if (!ctxt.var_decls_to_re_add_to_tree().empty())
11332     for (list<var_decl_sptr>::const_iterator v =
11333 	   ctxt.var_decls_to_re_add_to_tree().begin();
11334 	 v != ctxt.var_decls_to_re_add_to_tree().end();
11335 	 ++v)
11336       {
11337 	if (is_member_decl(*v))
11338 	  continue;
11339 
11340 	ABG_ASSERT((*v)->get_scope());
11341 	string demangled_name =
11342 	  demangle_cplus_mangled_name((*v)->get_linkage_name());
11343 	if (!demangled_name.empty())
11344 	  {
11345 	    std::list<string> fqn_comps;
11346 	    fqn_to_components(demangled_name, fqn_comps);
11347 	    string mem_name = fqn_comps.back();
11348 	    fqn_comps.pop_back();
11349 	    class_decl_sptr class_type;
11350 	    string ty_name;
11351 	    if (!fqn_comps.empty())
11352 	      {
11353 		ty_name = components_to_type_name(fqn_comps);
11354 		class_type =
11355 		  lookup_class_type(ty_name, *ctxt.cur_transl_unit());
11356 	      }
11357 	    if (class_type)
11358 	      {
11359 		// So we are seeing a member variable for which there
11360 		// is a global variable definition DIE not having a
11361 		// reference attribute pointing back to the member
11362 		// variable declaration DIE.  Thus remove the global
11363 		// variable definition from its current non-class
11364 		// scope ...
11365 		decl_base_sptr d;
11366 		if ((d = lookup_var_decl_in_scope(mem_name, class_type)))
11367 		  // This is the data member with the same name in cl.
11368 		  // We just need to flag it as static.
11369 		  ;
11370 		else
11371 		  {
11372 		    // In this case there is no data member with the
11373 		    // same name in cl already.  Let's add it there then
11374 		    // ...
11375 		    remove_decl_from_scope(*v);
11376 		    d = add_decl_to_scope(*v, class_type);
11377 		  }
11378 
11379 		ABG_ASSERT(dynamic_pointer_cast<var_decl>(d));
11380 		// Let's flag the data member as static.
11381 		set_member_is_static(d, true);
11382 	      }
11383 	  }
11384       }
11385   ctxt.var_decls_to_re_add_to_tree().clear();
11386 
11387   result->set_is_constructed(true);
11388 
11389   return result;
11390 }
11391 
11392 /// Build a abigail::namespace_decl out of a DW_TAG_namespace or
11393 /// DW_TAG_module (for fortran) DIE.
11394 ///
11395 /// Note that this function connects the DW_TAG_namespace to the IR
11396 /// being currently created, reads the children of the DIE and
11397 /// connects them to the IR as well.
11398 ///
11399 /// @param ctxt the read context to use.
11400 ///
11401 /// @param die the DIE to read from.  Must be either DW_TAG_namespace
11402 /// or DW_TAG_module.
11403 ///
11404 /// @param where_offset the offset of the DIE where we are "logically"
11405 /// positionned at, in the DIE tree.  This is useful when @p die is
11406 /// e.g, DW_TAG_partial_unit that can be included in several places in
11407 /// the DIE tree.
11408 ///
11409 /// @return the resulting @ref abigail::namespace_decl or NULL if it
11410 /// couldn't be created.
11411 static namespace_decl_sptr
build_namespace_decl_and_add_to_ir(read_context & ctxt,Dwarf_Die * die,size_t where_offset)11412 build_namespace_decl_and_add_to_ir(read_context&	ctxt,
11413 				   Dwarf_Die*		die,
11414 				   size_t		where_offset)
11415 {
11416   namespace_decl_sptr result;
11417 
11418   if (!die)
11419     return result;
11420 
11421   unsigned tag = dwarf_tag(die);
11422   if (tag != DW_TAG_namespace && tag != DW_TAG_module)
11423     return result;
11424 
11425   scope_decl_sptr scope = get_scope_for_die(ctxt, die,
11426 					    /*called_for_public_decl=*/false,
11427 					    where_offset);
11428 
11429   string name, linkage_name;
11430   location loc;
11431   die_loc_and_name(ctxt, die, loc, name, linkage_name);
11432 
11433   result.reset(new namespace_decl(ctxt.env(), name, loc));
11434   add_decl_to_scope(result, scope.get());
11435   ctxt.associate_die_to_decl(die, result, where_offset);
11436 
11437   Dwarf_Die child;
11438   if (dwarf_child(die, &child) != 0)
11439     return result;
11440 
11441   ctxt.scope_stack().push(result.get());
11442   do
11443     build_ir_node_from_die(ctxt, &child,
11444 			   /*called_from_public_decl=*/false,
11445 			   where_offset);
11446   while (dwarf_siblingof(&child, &child) == 0);
11447   ctxt.scope_stack().pop();
11448 
11449   return result;
11450 }
11451 
11452 /// Build a @ref type_decl out of a DW_TAG_base_type DIE.
11453 ///
11454 /// @param ctxt the read context to use.
11455 ///
11456 /// @param die the DW_TAG_base_type to consider.
11457 ///
11458 /// @param where_offset where we are logically at in the DIE stream.
11459 ///
11460 /// @return the resulting decl_base_sptr.
11461 static type_decl_sptr
build_type_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset)11462 build_type_decl(read_context& ctxt, Dwarf_Die* die, size_t where_offset)
11463 {
11464   type_decl_sptr result;
11465 
11466   if (!die)
11467     return result;
11468   ABG_ASSERT(dwarf_tag(die) == DW_TAG_base_type);
11469 
11470   uint64_t byte_size = 0, bit_size = 0;
11471   if (!die_unsigned_constant_attribute(die, DW_AT_byte_size, byte_size))
11472     if (!die_unsigned_constant_attribute(die, DW_AT_bit_size, bit_size))
11473       return result;
11474 
11475   if (bit_size == 0 && byte_size != 0)
11476     // Update the bit size.
11477     bit_size = byte_size * 8;
11478 
11479   string type_name, linkage_name;
11480   location loc;
11481   die_loc_and_name(ctxt, die, loc, type_name, linkage_name);
11482 
11483   if (byte_size == 0)
11484     {
11485       // The size of the type is zero, that must mean that we are
11486       // looking at the definition of the void type.
11487       if (type_name == "void")
11488 	result = is_type_decl(build_ir_node_for_void_type(ctxt));
11489       else
11490 	// A type of size zero that is not void? Hmmh, I am not sure
11491 	// what that means.  Return nil for now.
11492 	return result;
11493     }
11494 
11495   if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
11496     {
11497       string normalized_type_name = type_name;
11498       integral_type int_type;
11499       if (parse_integral_type(type_name, int_type))
11500 	normalized_type_name = int_type.to_string();
11501       result = lookup_basic_type(normalized_type_name, *corp);
11502     }
11503 
11504   if (!result)
11505     if (corpus_sptr corp = ctxt.current_corpus())
11506       result = lookup_basic_type(type_name, *corp);
11507   if (!result)
11508     result.reset(new type_decl(ctxt.env(), type_name, bit_size,
11509 			       /*alignment=*/0, loc, linkage_name));
11510   ctxt.associate_die_to_type(die, result, where_offset);
11511   return result;
11512 }
11513 
11514 /// Construct the type that is to be used as the underlying type of an
11515 /// enum.
11516 ///
11517 /// @param ctxt the read context to use.
11518 ///
11519 /// @param enum_name the name of the enum that this type is going to
11520 /// be the underlying type of.
11521 ///
11522 /// @param enum_size the size of the enum.
11523 ///
11524 /// @param is_anonymous whether the underlying type is anonymous or
11525 /// not. By default, this should be set to true as before c++11 (and
11526 /// in C), it's almost the case.
11527 static type_decl_sptr
build_enum_underlying_type(read_context & ctxt,string enum_name,uint64_t enum_size,bool is_anonymous=true)11528 build_enum_underlying_type(read_context& ctxt,
11529 			   string enum_name,
11530 			   uint64_t enum_size,
11531 			   bool is_anonymous = true)
11532 {
11533   string underlying_type_name =
11534     build_internal_underlying_enum_type_name(enum_name, is_anonymous,
11535 					     enum_size);
11536 
11537   type_decl_sptr result(new type_decl(ctxt.env(), underlying_type_name,
11538 				      enum_size, enum_size, location()));
11539   result->set_is_anonymous(is_anonymous);
11540   result->set_is_artificial(true);
11541   translation_unit_sptr tu = ctxt.cur_transl_unit();
11542   decl_base_sptr d = add_decl_to_scope(result, tu->get_global_scope().get());
11543   result = dynamic_pointer_cast<type_decl>(d);
11544   ABG_ASSERT(result);
11545   canonicalize(result);
11546   return result;
11547 }
11548 
11549 /// Build an enum_type_decl from a DW_TAG_enumeration_type DIE.
11550 ///
11551 /// @param ctxt the read context to use.
11552 ///
11553 /// @param die the DIE to read from.
11554 ///
11555 /// @param scope the scope of the final enum.  Note that this function
11556 /// does *NOT* add the built type to this scope.  The scope is just so
11557 /// that the function knows how to name anonymous enums.
11558 ///
11559 /// @param is_declaration_only is true if the DIE denoted by @p die is
11560 /// a declaration-only DIE.
11561 ///
11562 /// @return the built enum_type_decl or NULL if it could not be built.
11563 static enum_type_decl_sptr
build_enum_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,size_t where_offset,bool is_declaration_only)11564 build_enum_type(read_context&	ctxt,
11565 		Dwarf_Die*	die,
11566 		scope_decl*	scope,
11567 		size_t		where_offset,
11568 		bool		is_declaration_only)
11569 {
11570   enum_type_decl_sptr result;
11571   if (!die)
11572     return result;
11573 
11574   unsigned tag = dwarf_tag(die);
11575   if (tag != DW_TAG_enumeration_type)
11576     return result;
11577 
11578   string name, linkage_name;
11579   location loc;
11580   die_loc_and_name(ctxt, die, loc, name, linkage_name);
11581 
11582   bool is_anonymous = false;
11583   // If the enum is anonymous, let's give it a name.
11584   if (name.empty())
11585     {
11586       name = get_internal_anonymous_die_prefix_name(die);
11587       ABG_ASSERT(!name.empty());
11588       // But we remember that the type is anonymous.
11589       is_anonymous = true;
11590 
11591       if (size_t s = scope->get_num_anonymous_member_enums())
11592 	name = build_internal_anonymous_die_name(name, s);
11593     }
11594 
11595   bool use_odr = ctxt.odr_is_relevant(die);
11596   // If the type has location, then associate it to its
11597   // representation.  This way, all occurences of types with the same
11598   // representation (name) and location can be later detected as being
11599   // for the same type.
11600 
11601   if (!is_anonymous)
11602     {
11603       if (use_odr)
11604 	{
11605 	  if (enum_type_decl_sptr pre_existing_enum =
11606 	      is_enum_type(ctxt.lookup_artifact_from_die(die)))
11607 	    result = pre_existing_enum;
11608 	}
11609       else if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
11610 	{
11611 	  if (loc)
11612 	    result = lookup_enum_type_per_location(loc.expand(), *corp);
11613 	}
11614       else if (loc)
11615 	{
11616 	  if (enum_type_decl_sptr pre_existing_enum =
11617 	      is_enum_type(ctxt.lookup_artifact_from_die(die)))
11618 	    if (pre_existing_enum->get_location() == loc)
11619 	      result = pre_existing_enum;
11620 	}
11621 
11622       if (result)
11623 	{
11624 	  ctxt.associate_die_to_type(die, result, where_offset);
11625 	  return result;
11626 	}
11627     }
11628   // TODO: for anonymous enums, maybe have a map of loc -> enums so that
11629   // we can look them up?
11630 
11631   uint64_t size = 0;
11632   if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
11633     size *= 8;
11634   bool is_artificial = die_is_artificial(die);
11635 
11636   // for now we consider that underlying types of enums are all anonymous
11637   bool enum_underlying_type_is_anonymous= true;
11638 
11639   enum_type_decl::enumerators enms;
11640   Dwarf_Die child;
11641   if (dwarf_child(die, &child) == 0)
11642     {
11643       do
11644 	{
11645 	  if (dwarf_tag(&child) != DW_TAG_enumerator)
11646 	    continue;
11647 
11648 	  string n, m;
11649 	  location l;
11650 	  die_loc_and_name(ctxt, &child, l, n, m);
11651 	  uint64_t val = 0;
11652 	  die_unsigned_constant_attribute(&child, DW_AT_const_value, val);
11653 	  enms.push_back(enum_type_decl::enumerator(ctxt.env(), n, val));
11654 	}
11655       while (dwarf_siblingof(&child, &child) == 0);
11656     }
11657 
11658   // DWARF up to version 4 (at least) doesn't seem to carry the
11659   // underlying type, so let's create an artificial one here, which
11660   // sole purpose is to be passed to the constructor of the
11661   // enum_type_decl type.
11662   type_decl_sptr t =
11663     build_enum_underlying_type(ctxt, name, size,
11664 			       enum_underlying_type_is_anonymous);
11665   t->set_is_declaration_only(is_declaration_only);
11666 
11667   result.reset(new enum_type_decl(name, loc, t, enms, linkage_name));
11668   result->set_is_anonymous(is_anonymous);
11669   result->set_is_declaration_only(is_declaration_only);
11670   result->set_is_artificial(is_artificial);
11671   ctxt.associate_die_to_type(die, result, where_offset);
11672 
11673   ctxt.maybe_schedule_declaration_only_enum_for_resolution(result);
11674 
11675   return result;
11676 }
11677 
11678 /// Once a function_decl has been built and added to a class as a
11679 /// member function, this function updates the information of the
11680 /// function_decl concerning the properties of its relationship with
11681 /// the member class.  That is, it updates properties like
11682 /// virtualness, access, constness, cdtorness, etc ...
11683 ///
11684 /// @param die the DIE of the function_decl that has been just built.
11685 ///
11686 /// @param f the function_decl that has just been built from @p die.
11687 ///
11688 /// @param klass the @ref class_or_union that @p f belongs to.
11689 ///
11690 /// @param ctxt the context used to read the ELF/DWARF information.
11691 static void
finish_member_function_reading(Dwarf_Die * die,const function_decl_sptr & f,const class_or_union_sptr & klass,read_context & ctxt)11692 finish_member_function_reading(Dwarf_Die*		  die,
11693 			       const function_decl_sptr&  f,
11694 			       const class_or_union_sptr& klass,
11695 			       read_context&		  ctxt)
11696 {
11697   ABG_ASSERT(klass);
11698 
11699   method_decl_sptr m = is_method_decl(f);
11700   ABG_ASSERT(m);
11701 
11702   method_type_sptr method_t = is_method_type(m->get_type());
11703   ABG_ASSERT(method_t);
11704 
11705   bool is_ctor = (f->get_name() == klass->get_name());
11706   bool is_dtor = (!f->get_name().empty()
11707 		  && static_cast<string>(f->get_name())[0] == '~');
11708   bool is_virtual = die_is_virtual(die);
11709   int64_t vindex = -1;
11710   if (is_virtual)
11711     die_virtual_function_index(die, vindex);
11712   access_specifier access = public_access;
11713   if (class_decl_sptr c = is_class_type(klass))
11714     if (!c->is_struct())
11715       access = private_access;
11716   die_access_specifier(die, access);
11717 
11718   bool is_static = false;
11719   {
11720     // Let's see if the first parameter is a pointer to an instance of
11721     // the same class type as the current class and has a
11722     // DW_AT_artificial attribute flag set.  We are not looking at
11723     // DW_AT_object_pointer (for DWARF 3) because it wasn't being
11724     // emitted in GCC 4_4, which was already DWARF 3.
11725     function_decl::parameter_sptr first_parm;
11726     if (!f->get_parameters().empty())
11727       first_parm = f->get_parameters()[0];
11728 
11729     bool is_artificial = first_parm && first_parm->get_is_artificial();
11730     type_base_sptr this_ptr_type, other_klass;
11731 
11732     if (is_artificial)
11733       this_ptr_type = first_parm->get_type();
11734 
11735     // Sometimes, the type of the "this" pointer is "const class_type* const".
11736     //
11737     // Meaning that the "this pointer" itself is const qualified.  So
11738     // let's get the underlying underlying non-qualified pointer.
11739     if (qualified_type_def_sptr q = is_qualified_type(this_ptr_type))
11740       this_ptr_type = q->get_underlying_type();
11741 
11742     // Now, get the pointed-to type.
11743     if (pointer_type_def_sptr p = is_pointer_type(this_ptr_type))
11744       other_klass = p->get_pointed_to_type();
11745 
11746     // Sometimes, other_klass can be qualified; e.g, volatile.  In
11747     // that case, let's get the unqualified version of other_klass.
11748     if (qualified_type_def_sptr q = is_qualified_type(other_klass))
11749       other_klass = q->get_underlying_type();
11750 
11751     if (other_klass
11752 	&& get_type_name(other_klass) == klass->get_qualified_name())
11753       ;
11754     else
11755       is_static = true;
11756 
11757     if (is_static)
11758       {
11759 	// If we are looking at a DWARF version that is high enough
11760 	// for the DW_AT_object_pointer attribute to be present, let's
11761 	// see if it's present.  If it is, then the current member
11762 	// function is not static.
11763 	Dwarf_Die object_pointer_die;
11764 	if (die_has_object_pointer(die, object_pointer_die))
11765 	  is_static = false;
11766       }
11767   }
11768   set_member_access_specifier(m, access);
11769   if (vindex != -1)
11770     set_member_function_vtable_offset(m, vindex);
11771   set_member_function_is_virtual(m, is_virtual);
11772   set_member_is_static(m, is_static);
11773   set_member_function_is_ctor(m, is_ctor);
11774   set_member_function_is_dtor(m, is_dtor);
11775   set_member_function_is_const(m, method_t->get_is_const());
11776 
11777   ABG_ASSERT(is_member_function(m));
11778 
11779   if (is_virtual && !f->get_linkage_name().empty() && !f->get_symbol())
11780     {
11781       // This is a virtual member function which has a linkage name
11782       // but has no underlying symbol set.
11783       //
11784       // The underlying elf symbol to set to this function can show up
11785       // later in the DWARF input or it can be that, because of some
11786       // compiler optimization, the relation between this function and
11787       // its underlying elf symbol is simply not emitted in the DWARF.
11788       //
11789       // Let's thus schedule this function for a later fixup pass
11790       // (performed by
11791       // read_context::fixup_functions_with_no_symbols()) that will
11792       // set its underlying symbol.
11793       //
11794       // Note that if the underying symbol is encountered later in the
11795       // DWARF input, then the part of build_function_decl() that
11796       // updates the function to set its underlying symbol will
11797       // de-schedule this function wrt fixup pass.
11798       Dwarf_Off die_offset = dwarf_dieoffset(die);
11799       die_function_decl_map_type &fns_with_no_symbol =
11800 	ctxt.die_function_decl_with_no_symbol_map();
11801       die_function_decl_map_type::const_iterator i =
11802 	fns_with_no_symbol.find(die_offset);
11803       if (i == fns_with_no_symbol.end())
11804 	fns_with_no_symbol[die_offset] = f;
11805     }
11806 
11807 }
11808 
11809 /// If a function DIE has attributes which have not yet been read and
11810 /// added to the internal representation that represents that function
11811 /// then read those extra attributes and update the internal
11812 /// representation.
11813 ///
11814 /// @param ctxt the read context to use.
11815 ///
11816 /// @param die the function DIE to consider.
11817 ///
11818 /// @param where_offset where we logical are, currently, in the stream
11819 /// of DIEs.  If you don't know what this is, you can just set it to zero.
11820 ///
11821 /// @param existing_fn the representation of the function to update.
11822 ///
11823 /// @return the updated function  representation.
11824 static function_decl_sptr
maybe_finish_function_decl_reading(read_context & ctxt,Dwarf_Die * die,size_t where_offset,const function_decl_sptr & existing_fn)11825 maybe_finish_function_decl_reading(read_context&		ctxt,
11826 				   Dwarf_Die*			die,
11827 				   size_t			where_offset,
11828 				   const function_decl_sptr&	existing_fn)
11829 {
11830   function_decl_sptr result = build_function_decl(ctxt, die,
11831 						  where_offset,
11832 						  existing_fn);
11833 
11834   return result;
11835 }
11836 
11837 /// Lookup a class or a typedef with a given qualified name in the
11838 /// corpus that a given scope belongs to.
11839 ///
11840 /// @param scope the scope to consider.
11841 ///
11842 /// @param type_name the qualified name of the type to look for.
11843 ///
11844 /// @return the typedef or class type found.
11845 static type_base_sptr
lookup_class_or_typedef_from_corpus(scope_decl * scope,const string & type_name)11846 lookup_class_or_typedef_from_corpus(scope_decl* scope, const string& type_name)
11847 {
11848   string qname = build_qualified_name(scope, type_name);
11849   corpus* corp = scope->get_corpus();
11850   type_base_sptr result = lookup_class_or_typedef_type(qname, *corp);
11851   return result;
11852 }
11853 
11854 /// Lookup a class of typedef type from the current corpus being
11855 /// constructed.
11856 ///
11857 /// The type being looked for has the same name as a given DIE.
11858 ///
11859 /// @param ctxt the reading context to use.
11860 ///
11861 /// @param die the DIE which has the same name as the type we are
11862 /// looking for.
11863 ///
11864 /// @param called_for_public_decl whether this function is being
11865 /// called from a a publicly defined declaration.
11866 ///
11867 /// @param where_offset where we are logically at in the DIE stream.
11868 ///
11869 /// @return the type found.
11870 static type_base_sptr
lookup_class_or_typedef_from_corpus(read_context & ctxt,Dwarf_Die * die,bool called_for_public_decl,size_t where_offset)11871 lookup_class_or_typedef_from_corpus(read_context& ctxt,
11872 				    Dwarf_Die* die,
11873 				    bool called_for_public_decl,
11874 				    size_t where_offset)
11875 {
11876   if (!die)
11877     return class_decl_sptr();
11878 
11879   string class_name = die_string_attribute(die, DW_AT_name);
11880   if (class_name.empty())
11881     return class_decl_sptr();
11882 
11883   scope_decl_sptr scope = get_scope_for_die(ctxt, die,
11884 					    called_for_public_decl,
11885 					    where_offset);
11886   if (scope)
11887     return lookup_class_or_typedef_from_corpus(scope.get(), class_name);
11888 
11889   return type_base_sptr();
11890 }
11891 
11892 /// Lookup a class, typedef or enum type with a given qualified name
11893 /// in the corpus that a given scope belongs to.
11894 ///
11895 /// @param scope the scope to consider.
11896 ///
11897 /// @param type_name the qualified name of the type to look for.
11898 ///
11899 /// @return the typedef, enum or class type found.
11900 static type_base_sptr
lookup_class_typedef_or_enum_type_from_corpus(scope_decl * scope,const string & type_name)11901 lookup_class_typedef_or_enum_type_from_corpus(scope_decl* scope,
11902 					      const string& type_name)
11903 {
11904   string qname = build_qualified_name(scope, type_name);
11905   corpus* corp = scope->get_corpus();
11906   type_base_sptr result = lookup_class_typedef_or_enum_type(qname, *corp);
11907   return result;
11908 }
11909 
11910 /// Lookup a class, typedef or enum type in a given scope, in the
11911 /// corpus that scope belongs to.
11912 ///
11913 /// @param die the DIE of the class, typedef or enum to lookup.
11914 ///
11915 /// @param anonymous_member_type_idx if @p DIE represents an anonymous
11916 /// type, this is the index of that anonymous type in its scope, in
11917 /// case there are several anonymous types of the same kind in that
11918 /// scope.
11919 ///
11920 /// @param scope the scope in which to look the type for.
11921 ///
11922 /// @return the typedef, enum or class type found.
11923 static type_base_sptr
lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die * die,size_t anonymous_member_type_idx,scope_decl * scope)11924 lookup_class_typedef_or_enum_type_from_corpus(Dwarf_Die* die,
11925 					      size_t anonymous_member_type_idx,
11926 					      scope_decl* scope)
11927 {
11928   if (!die)
11929     return class_decl_sptr();
11930 
11931   string type_name = die_string_attribute(die, DW_AT_name);
11932   if (is_anonymous_type_die(die))
11933     type_name =
11934       get_internal_anonymous_die_name(die, anonymous_member_type_idx);
11935 
11936   if (type_name.empty())
11937     return class_decl_sptr();
11938 
11939   return lookup_class_typedef_or_enum_type_from_corpus(scope, type_name);
11940 }
11941 
11942 /// Test if a DIE represents a function that is a member of a given
11943 /// class type.
11944 ///
11945 /// @param ctxt the reading context.
11946 ///
11947 /// @param function_die the DIE of the function to consider.
11948 ///
11949 /// @param class_type the class type to consider.
11950 ///
11951 /// @param where_offset where we are logically at in the DIE stream.
11952 ///
11953 /// @return the method declaration corresponding to the member
11954 /// function of @p class_type, iff @p function_die is for a member
11955 /// function of @p class_type.
11956 static method_decl_sptr
is_function_for_die_a_member_of_class(read_context & ctxt,Dwarf_Die * function_die,const class_or_union_sptr & class_type)11957 is_function_for_die_a_member_of_class(read_context& ctxt,
11958 				      Dwarf_Die* function_die,
11959 				      const class_or_union_sptr& class_type)
11960 {
11961   type_or_decl_base_sptr artifact = ctxt.lookup_artifact_from_die(function_die);
11962 
11963   if (!artifact)
11964     return method_decl_sptr();
11965 
11966   method_decl_sptr method = is_method_decl(artifact);
11967   method_type_sptr method_type;
11968 
11969   if (method)
11970     method_type = method->get_type();
11971   else
11972     method_type = is_method_type(artifact);
11973   ABG_ASSERT(method_type);
11974 
11975   class_or_union_sptr method_class = method_type->get_class_type();
11976   ABG_ASSERT(method_class);
11977 
11978   string method_class_name = method_class->get_qualified_name(),
11979     class_type_name = class_type->get_qualified_name();
11980 
11981   if (method_class_name == class_type_name)
11982     {
11983       //ABG_ASSERT(class_type.get() == method_class.get());
11984       return method;
11985     }
11986 
11987   return method_decl_sptr();
11988 }
11989 
11990 /// If a given function DIE represents an existing member function of
11991 /// a given class, then update that member function with new
11992 /// properties present in the DIE.  Otherwise, if the DIE represents a
11993 /// new member function that is not already present in the class then
11994 /// add that new member function to the class.
11995 ///
11996 /// @param ctxt the reading context.
11997 ///
11998 /// @param function_die the DIE of the potential member function to
11999 /// consider.
12000 ///
12001 /// @param class_type the class type to consider.
12002 ///
12003 /// @param called_from_public_decl is true iff this function was
12004 /// called from a publicly defined and exported declaration.
12005 ///
12006 /// @param where_offset where we are logically at in the DIE stream.
12007 ///
12008 /// @return the method decl representing the member function.
12009 static method_decl_sptr
add_or_update_member_function(read_context & ctxt,Dwarf_Die * function_die,const class_or_union_sptr & class_type,bool called_from_public_decl,size_t where_offset)12010 add_or_update_member_function(read_context& ctxt,
12011 			      Dwarf_Die* function_die,
12012 			      const class_or_union_sptr& class_type,
12013 			      bool called_from_public_decl,
12014 			      size_t where_offset)
12015 {
12016   method_decl_sptr method =
12017     is_function_for_die_a_member_of_class(ctxt, function_die, class_type);
12018 
12019   if (!method)
12020     method = is_method_decl(build_ir_node_from_die(ctxt, function_die,
12021 						   class_type.get(),
12022 						   called_from_public_decl,
12023 						   where_offset));
12024   if (!method)
12025     return method_decl_sptr();
12026 
12027   finish_member_function_reading(function_die,
12028 				 is_function_decl(method),
12029 				 class_type, ctxt);
12030   return method;
12031 }
12032 
12033 /// Build a an IR node for class type from a DW_TAG_structure_type or
12034 /// DW_TAG_class_type DIE and add that node to the ABI corpus being
12035 /// currently built.
12036 ///
12037 /// If the represents class type that already exists, then update the
12038 /// existing class type with the new properties found in the DIE.
12039 ///
12040 /// It meanst that this function can also update an existing
12041 /// class_decl node with data members, member functions and other
12042 /// properties coming from the DIE.
12043 ///
12044 /// @param ctxt the read context to consider.
12045 ///
12046 /// @param die the DIE to read information from.  Must be either a
12047 /// DW_TAG_structure_type or a DW_TAG_class_type.
12048 ///
12049 /// @param scope a pointer to the scope_decl* under which this class
12050 /// is to be added to.
12051 ///
12052 /// @param is_struct whether the class was declared as a struct.
12053 ///
12054 /// @param klass if non-null, this is a klass to append the members
12055 /// to.  Otherwise, this function just builds the class from scratch.
12056 ///
12057 /// @param called_from_public_decl set to true if this class is being
12058 /// called from a "Public declaration like vars or public symbols".
12059 ///
12060 /// @param where_offset the offset of the DIE where we are "logically"
12061 /// positionned at, in the DIE tree.  This is useful when @p die is
12062 /// e.g, DW_TAG_partial_unit that can be included in several places in
12063 /// the DIE tree.
12064 ///
12065 /// @param is_declaration_only is true if the DIE denoted by @p die is
12066 /// a declaration-only DIE.
12067 ///
12068 /// @return the resulting class_type.
12069 static class_decl_sptr
add_or_update_class_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,bool is_struct,class_decl_sptr klass,bool called_from_public_decl,size_t where_offset,bool is_declaration_only)12070 add_or_update_class_type(read_context&	 ctxt,
12071 			 Dwarf_Die*	 die,
12072 			 scope_decl*	 scope,
12073 			 bool		 is_struct,
12074 			 class_decl_sptr klass,
12075 			 bool		 called_from_public_decl,
12076 			 size_t		 where_offset,
12077 			 bool		 is_declaration_only)
12078 {
12079   class_decl_sptr result;
12080   if (!die)
12081     return result;
12082 
12083   const die_source source = ctxt.get_die_source(die);
12084 
12085   unsigned tag = dwarf_tag(die);
12086 
12087   if (tag != DW_TAG_class_type && tag != DW_TAG_structure_type)
12088     return result;
12089 
12090   {
12091     die_class_or_union_map_type::const_iterator i =
12092       ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12093     if (i != ctxt.die_wip_classes_map(source).end())
12094       {
12095 	class_decl_sptr class_type = is_class_type(i->second);
12096 	ABG_ASSERT(class_type);
12097 	return class_type;
12098       }
12099   }
12100 
12101   if (!ctxt.die_is_in_cplus_plus(die))
12102     // In c++, a given class might be put together "piecewise".  That
12103     // is, in a translation unit, some data members of that class
12104     // might be defined; then in another later, some member types
12105     // might be defined.  So we can't just re-use a class "verbatim"
12106     // just because we've seen previously.  So in c++, re-using the
12107     // class is a much clever process.  In the other languages however
12108     // (like in C) we can re-use a class definition verbatim.
12109     if (class_decl_sptr class_type =
12110 	is_class_type(ctxt.lookup_type_from_die(die)))
12111       if (!class_type->get_is_declaration_only())
12112 	return class_type;
12113 
12114   string name, linkage_name;
12115   location loc;
12116   die_loc_and_name(ctxt, die, loc, name, linkage_name);
12117 
12118   bool is_anonymous = false;
12119   if (name.empty())
12120     {
12121       // So we are looking at an anonymous struct.  Let's
12122       // give it a name.
12123       name = get_internal_anonymous_die_prefix_name(die);
12124       ABG_ASSERT(!name.empty());
12125       // But we remember that the type is anonymous.
12126       is_anonymous = true;
12127 
12128       if (size_t s = scope->get_num_anonymous_member_classes())
12129 	name = build_internal_anonymous_die_name(name, s);
12130     }
12131 
12132   if (!is_anonymous)
12133     {
12134       if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
12135 	{
12136 	  if (loc)
12137 	    // TODO: if there is only one class defined in the corpus
12138 	    // for this location, then re-use it.  But if there are
12139 	    // more than one, then do not re-use it, for now.
12140 	    result = lookup_class_type_per_location(loc.expand(), *corp);
12141 	  else
12142 	    // TODO: if there is just one class for that name defined,
12143 	    // then re-use it.  Otherwise, don't.
12144 	    result = lookup_class_type(name, *corp);
12145 	  if (result
12146 	      // If we are seeing a declaration of a definition we
12147 	      // already had, or if we are seing a type with the same
12148 	      // declaration-only-ness that we had before, then keep
12149 	      // the one we already had.
12150 	      && (result->get_is_declaration_only() == is_declaration_only
12151 		  || (!result->get_is_declaration_only()
12152 		      && is_declaration_only)))
12153 	    {
12154 	      ctxt.associate_die_to_type(die, result, where_offset);
12155 	      return result;
12156 	    }
12157 	  else
12158 	    // We might be seeing the definition of a declaration we
12159 	    // already had.  In that case, keep the definition and
12160 	    // drop the declaration.
12161 	    result.reset();
12162 	}
12163     }
12164 
12165   // If we've already seen the same class as 'die', then let's re-use
12166   // that one, unless it's an anonymous class.  We can't really safely
12167   // re-use anonymous classes as they have no name, by construction.
12168   // What we can do, rather, is to reuse the typedef that name them,
12169   // when they do have a naming typedef.
12170   if (!is_anonymous)
12171     if (class_decl_sptr pre_existing_class =
12172 	is_class_type(ctxt.lookup_type_artifact_from_die(die)))
12173       klass = pre_existing_class;
12174 
12175   uint64_t size = 0;
12176   die_size_in_bits(die, size);
12177   bool is_artificial = die_is_artificial(die);
12178 
12179   Dwarf_Die child;
12180   bool has_child = (dwarf_child(die, &child) == 0);
12181 
12182   decl_base_sptr res;
12183   if (klass)
12184     {
12185       res = result = klass;
12186       if (loc)
12187 	result->set_location(loc);
12188     }
12189   else
12190     {
12191       result.reset(new class_decl(ctxt.env(), name, size,
12192 				  /*alignment=*/0, is_struct, loc,
12193 				  decl_base::VISIBILITY_DEFAULT,
12194 				  is_anonymous));
12195 
12196       result->set_is_declaration_only(is_declaration_only);
12197 
12198       res = add_decl_to_scope(result, scope);
12199       result = dynamic_pointer_cast<class_decl>(res);
12200       ABG_ASSERT(result);
12201     }
12202 
12203   if (size)
12204     result->set_size_in_bits(size);
12205 
12206   if (klass)
12207     // We are amending a class that was built before.  So let's check
12208     // if we need to amend its "declaration-only-ness" status.
12209     if (!!result->get_size_in_bits() == result->get_is_declaration_only())
12210       // The size of the class doesn't match its
12211       // 'declaration-only-ness".  We might have a non-zero sized
12212       // class which is declaration-only, or a zero sized class that
12213       // is not declaration-only.  Let's set the declaration-only-ness
12214       // according to what we are instructed to.
12215       //
12216       // Note however that there are binaries out there emitted by
12217       // compilers (Clang, in C++) emit declarations-only classes that
12218       // have non-zero size.  So we must honor these too. That is why
12219       // we are not forcing the declaration-only-ness to false when a
12220       // class has non-zero size.  An example of such binary is
12221       // tests/data/test-diff-filter/test41-PR21486-abg-writer.llvm.o.
12222       result->set_is_declaration_only(is_declaration_only);
12223 
12224   result->set_is_artificial(is_artificial);
12225 
12226   ctxt.associate_die_to_type(die, result, where_offset);
12227 
12228   ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12229 
12230   if (!has_child)
12231     // TODO: set the access specifier for the declaration-only class
12232     // here.
12233     return result;
12234 
12235   ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
12236 
12237   scope_decl_sptr scop =
12238     dynamic_pointer_cast<scope_decl>(res);
12239   ABG_ASSERT(scop);
12240   ctxt.scope_stack().push(scop.get());
12241 
12242   if (has_child)
12243     {
12244       int anonymous_member_class_index = -1;
12245       int anonymous_member_union_index = -1;
12246       int anonymous_member_enum_index = -1;
12247 
12248       do
12249 	{
12250 	  tag = dwarf_tag(&child);
12251 
12252 	  // Handle base classes.
12253 	  if (tag == DW_TAG_inheritance)
12254 	    {
12255 	      result->set_is_declaration_only(false);
12256 
12257 	      Dwarf_Die type_die;
12258 	      if (!die_die_attribute(&child, DW_AT_type, type_die))
12259 		continue;
12260 
12261 	      type_base_sptr base_type;
12262 	      if (!(base_type =
12263 		    lookup_class_or_typedef_from_corpus(ctxt, &type_die,
12264 							called_from_public_decl,
12265 							where_offset)))
12266 		{
12267 		  base_type =
12268 		    is_type(build_ir_node_from_die(ctxt, &type_die,
12269 						   called_from_public_decl,
12270 						   where_offset));
12271 		}
12272 	      // Sometimes base_type can be a typedef.  Let's make
12273 	      // sure that typedef is compatible with a class type.
12274 	      class_decl_sptr b = is_compatible_with_class_type(base_type);
12275 	      if (!b)
12276 		continue;
12277 
12278 	      access_specifier access =
12279 		is_struct
12280 		? public_access
12281 		: private_access;
12282 
12283 	      die_access_specifier(&child, access);
12284 
12285 	      bool is_virt= die_is_virtual(&child);
12286 	      int64_t offset = 0;
12287 	      bool is_offset_present =
12288 		die_member_offset(ctxt, &child, offset);
12289 
12290 	      class_decl::base_spec_sptr base(new class_decl::base_spec
12291 					      (b, access,
12292 					       is_offset_present ? offset : -1,
12293 					       is_virt));
12294 	      if (b->get_is_declaration_only())
12295 		ABG_ASSERT(ctxt.is_decl_only_class_scheduled_for_resolution(b));
12296 	      if (result->find_base_class(b->get_qualified_name()))
12297 		continue;
12298 	      result->add_base_specifier(base);
12299 	    }
12300 	  // Handle data members.
12301 	  else if (tag == DW_TAG_member
12302 		   || tag == DW_TAG_variable)
12303 	    {
12304 	      Dwarf_Die type_die;
12305 	      if (!die_die_attribute(&child, DW_AT_type, type_die))
12306 		continue;
12307 
12308 	      string n, m;
12309 	      location loc;
12310 	      die_loc_and_name(ctxt, &child, loc, n, m);
12311 	      /// For now, we skip the hidden vtable pointer.
12312 	      /// Currently, we're looking for a member starting with
12313 	      /// "_vptr[^0-9a-zA-Z_]", which is what Clang and GCC
12314 	      /// use as a name for the hidden vtable pointer.
12315 	      if (n.substr(0, 5) == "_vptr"
12316 		  && !std::isalnum(n.at(5))
12317 		  && n.at(5) != '_')
12318 		continue;
12319 
12320 	      // If the variable is already a member of this class,
12321 	      // move on.  If it's an anonymous data member, we need
12322 	      // to handle it differently.  We'll do that later below.
12323 	      if (!n.empty() && lookup_var_decl_in_scope(n, result))
12324 		continue;
12325 
12326 	      int64_t offset_in_bits = 0;
12327 	      bool is_laid_out = die_member_offset(ctxt, &child,
12328 						   offset_in_bits);
12329 	      // For now, is_static == !is_laid_out.  When we have
12330 	      // templates, we'll try to be more specific.  For now,
12331 	      // this approximation should do OK.
12332 	      bool is_static = !is_laid_out;
12333 
12334 	      if (is_static && variable_is_suppressed(ctxt,
12335 						      result.get(),
12336 						      &child))
12337 		continue;
12338 
12339 	      decl_base_sptr ty = is_decl(build_ir_node_from_die(ctxt, &type_die,
12340 								 called_from_public_decl,
12341 								 where_offset));
12342 	      type_base_sptr t = is_type(ty);
12343 	      if (!t)
12344 		continue;
12345 
12346 	      // The call to build_ir_node_from_die above could have
12347 	      // triggered the adding of a data member named 'n' into
12348 	      // result.  So let's check again if the variable is
12349 	      // already a member of this class.  Here again, if it's
12350 	      // an anonymous data member, we need to handle it
12351 	      // differently.  We'll do that later below.
12352 	      if (!n.empty() && lookup_var_decl_in_scope(n, result))
12353 		continue;
12354 
12355 	      if (!is_static)
12356 		// We have a non-static data member.  So this class
12357 		// cannot be a declaration-only class anymore, even if
12358 		// some DWARF emitters might consider it otherwise.
12359 		result->set_is_declaration_only(false);
12360 	      access_specifier access =
12361 		is_struct
12362 		? public_access
12363 		: private_access;
12364 
12365 	      die_access_specifier(&child, access);
12366 
12367 	      var_decl_sptr dm(new var_decl(n, t, loc, m));
12368 	      if (n.empty() && result->find_data_member(dm))
12369 		// dm is an anonymous data member that was already
12370 		// present in the current class so let's not add it.
12371 		continue;
12372 	      result->add_data_member(dm, access, is_laid_out,
12373 				      is_static, offset_in_bits);
12374 	      ABG_ASSERT(has_scope(dm));
12375 	      ctxt.associate_die_to_decl(&child, dm, where_offset,
12376 					 /*associate_by_repr=*/false);
12377 	    }
12378 	  // Handle member functions;
12379 	  else if (tag == DW_TAG_subprogram)
12380 	    {
12381 	      decl_base_sptr r =
12382 		add_or_update_member_function(ctxt, &child, result,
12383 					      called_from_public_decl,
12384 					      where_offset);
12385 	      if (function_decl_sptr f = is_function_decl(r))
12386 		ctxt.associate_die_to_decl(&child, f, where_offset,
12387 					   /*associate_by_repr=*/true);
12388 	    }
12389 	  // Handle member types
12390 	  else if (die_is_type(&child))
12391 	    {
12392 	      // Track the anonymous type index in the current
12393 	      // scope. Look for what this means by reading the
12394 	      // comment of the function
12395 	      // build_internal_anonymous_die_name.
12396 	      int anonymous_member_type_index = 0;
12397 	      if (is_anonymous_type_die(&child))
12398 		{
12399 		  // Update the anonymous type index.
12400 		  if (die_is_class_type(&child))
12401 		    anonymous_member_type_index =
12402 		      ++anonymous_member_class_index;
12403 		  else if (dwarf_tag(&child) == DW_TAG_union_type)
12404 		    anonymous_member_type_index =
12405 		      ++anonymous_member_union_index;
12406 		  else if (dwarf_tag(&child) == DW_TAG_enumeration_type)
12407 		    anonymous_member_type_index =
12408 		      ++anonymous_member_enum_index;
12409 		}
12410 	      // if the type is not already a member of this class,
12411 	      // then add it to the class.
12412 	      if (!lookup_class_typedef_or_enum_type_from_corpus
12413 		  (&child, anonymous_member_type_index, result.get()))
12414 		build_ir_node_from_die(ctxt, &child, result.get(),
12415 				       called_from_public_decl,
12416 				       where_offset);
12417 	    }
12418 	} while (dwarf_siblingof(&child, &child) == 0);
12419     }
12420 
12421   ctxt.scope_stack().pop();
12422 
12423   {
12424     die_class_or_union_map_type::const_iterator i =
12425       ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12426     if (i != ctxt.die_wip_classes_map(source).end())
12427       {
12428 	if (is_member_type(i->second))
12429 	  set_member_access_specifier(res,
12430 				      get_member_access_specifier(i->second));
12431 	ctxt.die_wip_classes_map(source).erase(i);
12432       }
12433   }
12434 
12435   ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12436   return result;
12437 }
12438 
12439 /// Build an @ref union_decl from a DW_TAG_union_type DIE.
12440 ///
12441 /// @param ctxt the read context to use.
12442 ///
12443 /// @param die the DIE to read from.
12444 ///
12445 /// @param scope the scope the resulting @ref union_decl belongs to.
12446 ///
12447 /// @param union_type if this parameter is non-nil, then this function
12448 /// updates the @ref union_decl that it points to, rather than
12449 /// creating a new @ref union_decl.
12450 ///
12451 /// @param called_from_public_decl is true if this function has been
12452 /// initially called within the context of a public decl.
12453 ///
12454 /// @param where_offset the offset of the DIE where we are "logically"
12455 /// positionned at, in the DIE tree.  This is useful when @p die is
12456 /// e.g, DW_TAG_partial_unit that can be included in several places in
12457 /// the DIE tree.
12458 ///
12459 /// @param is_declaration_only is true if the DIE denoted by @p die is
12460 /// a declaration-only DIE.
12461 ///
12462 /// @return the resulting @ref union_decl type.
12463 static union_decl_sptr
add_or_update_union_type(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,union_decl_sptr union_type,bool called_from_public_decl,size_t where_offset,bool is_declaration_only)12464 add_or_update_union_type(read_context&	 ctxt,
12465 			 Dwarf_Die*	 die,
12466 			 scope_decl*	 scope,
12467 			 union_decl_sptr union_type,
12468 			 bool		 called_from_public_decl,
12469 			 size_t	 where_offset,
12470 			 bool		 is_declaration_only)
12471 {
12472   union_decl_sptr result;
12473   if (!die)
12474     return result;
12475 
12476   unsigned tag = dwarf_tag(die);
12477 
12478   if (tag != DW_TAG_union_type)
12479     return result;
12480 
12481   const die_source source = ctxt.get_die_source(die);
12482   {
12483     die_class_or_union_map_type::const_iterator i =
12484       ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12485     if (i != ctxt.die_wip_classes_map(source).end())
12486       {
12487 	union_decl_sptr u = is_union_type(i->second);
12488 	ABG_ASSERT(u);
12489 	return u;
12490       }
12491   }
12492 
12493   string name, linkage_name;
12494   location loc;
12495   die_loc_and_name(ctxt, die, loc, name, linkage_name);
12496 
12497   bool is_anonymous = false;
12498   if (name.empty())
12499     {
12500       // So we are looking at an anonymous union.  Let's give it a
12501       // name.
12502       name = get_internal_anonymous_die_prefix_name(die);
12503       ABG_ASSERT(!name.empty());
12504       // But we remember that the type is anonymous.
12505       is_anonymous = true;
12506 
12507       if (size_t s = scope->get_num_anonymous_member_unions())
12508 	name = build_internal_anonymous_die_name(name, s);
12509     }
12510 
12511   // If the type has location, then associate it to its
12512   // representation.  This way, all occurences of types with the same
12513   // representation (name) and location can be later detected as being
12514   // for the same type.
12515 
12516   if (!is_anonymous)
12517     {
12518       if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
12519 	{
12520 	  if (loc)
12521 	    result = lookup_union_type_per_location(loc.expand(), *corp);
12522 	  else
12523 	    result = lookup_union_type(name, *corp);
12524 
12525 	  if (result)
12526 	    {
12527 	      ctxt.associate_die_to_type(die, result, where_offset);
12528 	      return result;
12529 	    }
12530 	}
12531     }
12532 
12533   // if we've already seen a union with the same union as 'die' then
12534   // let's re-use that one. We can't really safely re-use anonymous
12535   // unions as they have no name, by construction.  What we can do,
12536   // rather, is to reuse the typedef that name them, when they do have
12537   // a naming typedef.
12538   if (!is_anonymous)
12539     if (union_decl_sptr pre_existing_union =
12540 	is_union_type(ctxt.lookup_artifact_from_die(die)))
12541       union_type = pre_existing_union;
12542 
12543   uint64_t size = 0;
12544   die_size_in_bits(die, size);
12545   bool is_artificial = die_is_artificial(die);
12546 
12547   if (union_type)
12548     {
12549       result = union_type;
12550       result->set_location(loc);
12551     }
12552   else
12553     {
12554       result.reset(new union_decl(ctxt.env(), name, size, loc,
12555 				  decl_base::VISIBILITY_DEFAULT,
12556 				  is_anonymous));
12557       if (is_declaration_only)
12558 	result->set_is_declaration_only(true);
12559       result = is_union_type(add_decl_to_scope(result, scope));
12560       ABG_ASSERT(result);
12561     }
12562 
12563   if (size)
12564     {
12565       result->set_size_in_bits(size);
12566       result->set_is_declaration_only(false);
12567     }
12568 
12569   result->set_is_artificial(is_artificial);
12570 
12571   ctxt.associate_die_to_type(die, result, where_offset);
12572 
12573   // TODO: maybe schedule declaration-only union for result like we do
12574   // for classes:
12575   // ctxt.maybe_schedule_declaration_only_class_for_resolution(result);
12576 
12577   Dwarf_Die child;
12578   bool has_child = (dwarf_child(die, &child) == 0);
12579   if (!has_child)
12580     return result;
12581 
12582   ctxt.die_wip_classes_map(source)[dwarf_dieoffset(die)] = result;
12583 
12584   scope_decl_sptr scop =
12585     dynamic_pointer_cast<scope_decl>(result);
12586   ABG_ASSERT(scop);
12587   ctxt.scope_stack().push(scop.get());
12588 
12589   if (has_child)
12590     {
12591       do
12592 	{
12593 	  tag = dwarf_tag(&child);
12594 	  // Handle data members.
12595 	  if (tag == DW_TAG_member || tag == DW_TAG_variable)
12596 	    {
12597 	      Dwarf_Die type_die;
12598 	      if (!die_die_attribute(&child, DW_AT_type, type_die))
12599 		continue;
12600 
12601 	      string n, m;
12602 	      location loc;
12603 	      die_loc_and_name(ctxt, &child, loc, n, m);
12604 
12605 	      // Because we can be updating an existing union, let's
12606 	      // make sure we don't already have a member of the same
12607 	      // name.  Anonymous member are handled a bit later below
12608 	      // so let's not consider them here.
12609 	      if (!n.empty() && lookup_var_decl_in_scope(n, result))
12610 		continue;
12611 
12612 	      ssize_t offset_in_bits = 0;
12613 	      decl_base_sptr ty =
12614 		is_decl(build_ir_node_from_die(ctxt, &type_die,
12615 					       called_from_public_decl,
12616 					       where_offset));
12617 	      type_base_sptr t = is_type(ty);
12618 	      if (!t)
12619 		continue;
12620 
12621 	      // We have a non-static data member.  So this union
12622 	      // cannot be a declaration-only union anymore, even if
12623 	      // some DWARF emitters might consider it otherwise.
12624 	      result->set_is_declaration_only(false);
12625 	      access_specifier access = public_access;
12626 
12627 	      die_access_specifier(&child, access);
12628 
12629 	      var_decl_sptr dm(new var_decl(n, t, loc, m));
12630 	      // If dm is an anonymous data member, let's make sure
12631 	      // the current union doesn't already have it as a data
12632 	      // member.
12633 	      if (n.empty() && result->find_data_member(dm))
12634 		continue;
12635 
12636 	      result->add_data_member(dm, access, /*is_laid_out=*/true,
12637 				      /*is_static=*/false,
12638 				      offset_in_bits);
12639 	      ABG_ASSERT(has_scope(dm));
12640 	      ctxt.associate_die_to_decl(&child, dm, where_offset,
12641 					 /*associate_by_repr=*/false);
12642 	    }
12643 	  // Handle member functions;
12644 	  else if (tag == DW_TAG_subprogram)
12645 	    {
12646 	      decl_base_sptr r =
12647 		is_decl(build_ir_node_from_die(ctxt, &child,
12648 					       result.get(),
12649 					       called_from_public_decl,
12650 					       where_offset));
12651 	      if (!r)
12652 		continue;
12653 
12654 	      function_decl_sptr f = dynamic_pointer_cast<function_decl>(r);
12655 	      ABG_ASSERT(f);
12656 
12657 	      finish_member_function_reading(&child, f, result, ctxt);
12658 
12659 	      ctxt.associate_die_to_decl(&child, f, where_offset,
12660 					 /*associate_by_repr=*/false);
12661 	    }
12662 	  // Handle member types
12663 	  else if (die_is_type(&child))
12664 	    decl_base_sptr td =
12665 	      is_decl(build_ir_node_from_die(ctxt, &child, result.get(),
12666 					     called_from_public_decl,
12667 					     where_offset));
12668 	} while (dwarf_siblingof(&child, &child) == 0);
12669     }
12670 
12671   ctxt.scope_stack().pop();
12672 
12673   {
12674     die_class_or_union_map_type::const_iterator i =
12675       ctxt.die_wip_classes_map(source).find(dwarf_dieoffset(die));
12676     if (i != ctxt.die_wip_classes_map(source).end())
12677       {
12678 	if (is_member_type(i->second))
12679 	  set_member_access_specifier(result,
12680 				      get_member_access_specifier(i->second));
12681 	ctxt.die_wip_classes_map(source).erase(i);
12682       }
12683   }
12684 
12685   return result;
12686 }
12687 
12688 /// build a qualified type from a DW_TAG_const_type,
12689 /// DW_TAG_volatile_type or DW_TAG_restrict_type DIE.
12690 ///
12691 /// @param ctxt the read context to consider.
12692 ///
12693 /// @param die the input DIE to read from.
12694 ///
12695 /// @param called_from_public_decl true if this function was called
12696 /// from a context where either a public function or a public variable
12697 /// is being built.
12698 ///
12699 /// @param where_offset the offset of the DIE where we are "logically"
12700 /// positionned at, in the DIE tree.  This is useful when @p die is
12701 /// e.g, DW_TAG_partial_unit that can be included in several places in
12702 /// the DIE tree.
12703 ///
12704 /// @return the resulting qualified_type_def.
12705 static type_base_sptr
build_qualified_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)12706 build_qualified_type(read_context&	ctxt,
12707 		     Dwarf_Die*	die,
12708 		     bool		called_from_public_decl,
12709 		     size_t		where_offset)
12710 {
12711   type_base_sptr result;
12712   if (!die)
12713     return result;
12714 
12715   unsigned tag = dwarf_tag(die);
12716 
12717   if (tag != DW_TAG_const_type
12718       && tag != DW_TAG_volatile_type
12719       && tag != DW_TAG_restrict_type)
12720     return result;
12721 
12722   Dwarf_Die underlying_type_die;
12723   decl_base_sptr utype_decl;
12724   if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
12725     // So, if no DW_AT_type is present, then this means (if we are
12726     // looking at a debug info emitted by GCC) that we are looking
12727     // at a qualified void type.
12728     utype_decl = build_ir_node_for_void_type(ctxt);
12729 
12730   if (!utype_decl)
12731     utype_decl = is_decl(build_ir_node_from_die(ctxt, &underlying_type_die,
12732 						called_from_public_decl,
12733 						where_offset));
12734   if (!utype_decl)
12735     return result;
12736 
12737   // The call to build_ir_node_from_die() could have triggered the
12738   // creation of the type for this DIE.  In that case, just return it.
12739   if (type_base_sptr t = ctxt.lookup_type_from_die(die))
12740     {
12741       result = t;
12742       ctxt.associate_die_to_type(die, result, where_offset);
12743       return result;
12744     }
12745 
12746   type_base_sptr utype = is_type(utype_decl);
12747   ABG_ASSERT(utype);
12748 
12749   qualified_type_def::CV qual = qualified_type_def::CV_NONE;
12750   if (tag == DW_TAG_const_type)
12751     qual |= qualified_type_def::CV_CONST;
12752   else if (tag == DW_TAG_volatile_type)
12753     qual |= qualified_type_def::CV_VOLATILE;
12754   else if (tag == DW_TAG_restrict_type)
12755     qual |= qualified_type_def::CV_RESTRICT;
12756   else
12757     ABG_ASSERT_NOT_REACHED;
12758 
12759   if (!result)
12760     result.reset(new qualified_type_def(utype, qual, location()));
12761 
12762   ctxt.associate_die_to_type(die, result, where_offset);
12763 
12764   return result;
12765 }
12766 
12767 /// Walk a tree of typedef of qualified arrays and schedule all type
12768 /// nodes for canonicalization.
12769 ///
12770 /// This is to be used after an array tree has been cloned.  In that
12771 /// case, the newly cloned type nodes have to be scheduled for
12772 /// canonicalization.
12773 ///
12774 /// This is a subroutine of maybe_strip_qualification.
12775 ///
12776 /// @param t the type node to be scheduled for canonicalization.
12777 ///
12778 /// @param ctxt the contexter of the reader to use.
12779 static void
schedule_array_tree_for_late_canonicalization(const type_base_sptr & t,read_context & ctxt)12780 schedule_array_tree_for_late_canonicalization(const type_base_sptr& t,
12781 					      read_context &ctxt)
12782 {
12783   if (typedef_decl_sptr type = is_typedef(t))
12784     {
12785       schedule_array_tree_for_late_canonicalization(type->get_underlying_type(),
12786 						    ctxt);
12787       ctxt.schedule_type_for_late_canonicalization(t);
12788     }
12789   else if (qualified_type_def_sptr type = is_qualified_type(t))
12790     {
12791       schedule_array_tree_for_late_canonicalization(type->get_underlying_type(),
12792 						    ctxt);
12793       ctxt.schedule_type_for_late_canonicalization(t);
12794     }
12795   else if (array_type_def_sptr type = is_array_type(t))
12796     {
12797       for (vector<array_type_def::subrange_sptr>::const_iterator i =
12798 	     type->get_subranges().begin();
12799 	   i != type->get_subranges().end();
12800 	   ++i)
12801 	{
12802 	  if (!(*i)->get_scope())
12803 	    add_decl_to_scope(*i, ctxt.cur_transl_unit()->get_global_scope());
12804 	  ctxt.schedule_type_for_late_canonicalization(*i);
12805 
12806 	}
12807       schedule_array_tree_for_late_canonicalization(type->get_element_type(),
12808 						    ctxt);
12809       ctxt.schedule_type_for_late_canonicalization(type);
12810     }
12811 }
12812 
12813 /// Strip qualification from a qualified type, when it makes sense.
12814 ///
12815 /// DWARF constructs "const reference".  This is redundant because a
12816 /// reference is always const.  The issue is these redundant types then
12817 /// leak into the IR and make for bad diagnostics.
12818 ///
12819 /// This function thus strips the const qualifier from the type in
12820 /// that case.  It might contain code to strip other cases like this
12821 /// in the future.
12822 ///
12823 /// @param t the type to strip const qualification from.
12824 ///
12825 /// @param ctxt the @ref read_context to use.
12826 ///
12827 /// @return the stripped type or just return @p t.
12828 static decl_base_sptr
maybe_strip_qualification(const qualified_type_def_sptr t,read_context & ctxt)12829 maybe_strip_qualification(const qualified_type_def_sptr t,
12830 			  read_context &ctxt)
12831 {
12832   if (!t)
12833     return t;
12834 
12835   decl_base_sptr result = t;
12836   type_base_sptr u = t->get_underlying_type();
12837   environment* env = t->get_environment();
12838 
12839   if (t->get_cv_quals() & qualified_type_def::CV_CONST
12840       && (is_reference_type(u)))
12841     {
12842       // Let's strip only the const qualifier.  To do that, the "const"
12843       // qualified is turned into a no-op "none" qualified.
12844       result.reset(new qualified_type_def
12845 		   (u, t->get_cv_quals() & ~qualified_type_def::CV_CONST,
12846 		    t->get_location()));
12847       ctxt.schedule_type_for_late_canonicalization(is_type(result));
12848     }
12849   else if (t->get_cv_quals() & qualified_type_def::CV_CONST
12850 	   && env->is_void_type(u))
12851     {
12852       // So this type is a "const void".  Let's strip the "const"
12853       // qualifier out and make this just be "void", so that a "const
12854       // void" type and a "void" type compare equal after going through
12855       // this function.
12856       result = is_decl(u);
12857     }
12858   else if (is_array_type(u) || is_typedef_of_array(u))
12859     {
12860       array_type_def_sptr array;
12861       scope_decl * scope = 0;
12862       if ((array = is_array_type(u)))
12863 	{
12864 	  scope = array->get_scope();
12865 	  ABG_ASSERT(scope);
12866 	  array = is_array_type(clone_array_tree(array));
12867 	  schedule_array_tree_for_late_canonicalization(array, ctxt);
12868 	  add_decl_to_scope(array, scope);
12869 	  t->set_underlying_type(array);
12870 	  u = t->get_underlying_type();
12871 	}
12872       else if (is_typedef_of_array(u))
12873 	{
12874 	  scope = is_decl(u)->get_scope();
12875 	  ABG_ASSERT(scope);
12876 	  typedef_decl_sptr typdef =
12877 	    is_typedef(clone_array_tree(is_typedef(u)));
12878 	  schedule_array_tree_for_late_canonicalization(typdef, ctxt);
12879 	  ABG_ASSERT(typdef);
12880 	  add_decl_to_scope(typdef, scope);
12881 	  t->set_underlying_type(typdef);
12882 	  u = t->get_underlying_type();
12883 	  array = is_typedef_of_array(u);
12884 	}
12885       else
12886 	ABG_ASSERT_NOT_REACHED;
12887 
12888       ABG_ASSERT(array);
12889       // We should not be editing types that are already canonicalized.
12890       ABG_ASSERT(!array->get_canonical_type());
12891       type_base_sptr element_type = array->get_element_type();
12892 
12893       if (qualified_type_def_sptr qualified = is_qualified_type(element_type))
12894 	{
12895 	  // We should not be editing types that are already canonicalized.
12896 	  ABG_ASSERT(!qualified->get_canonical_type());
12897 	  qualified_type_def::CV quals = qualified->get_cv_quals();
12898 	  quals |= t->get_cv_quals();
12899 	  qualified->set_cv_quals(quals);
12900 	  result = is_decl(u);
12901 	}
12902       else
12903 	{
12904 	  qualified_type_def_sptr qual_type
12905 	    (new qualified_type_def(element_type,
12906 				    t->get_cv_quals(),
12907 				    t->get_location()));
12908 	  add_decl_to_scope(qual_type, is_decl(element_type)->get_scope());
12909 	  array->set_element_type(qual_type);
12910 	  ctxt.schedule_type_for_late_canonicalization(is_type(qual_type));
12911 	  result = is_decl(u);
12912 	}
12913     }
12914 
12915   return result;
12916 }
12917 
12918 /// Build a pointer type from a DW_TAG_pointer_type DIE.
12919 ///
12920 /// @param ctxt the read context to consider.
12921 ///
12922 /// @param die the DIE to read information from.
12923 ///
12924 /// @param called_from_public_decl true if this function was called
12925 /// from a context where either a public function or a public variable
12926 /// is being built.
12927 ///
12928 /// @param where_offset the offset of the DIE where we are "logically"
12929 /// positionned at, in the DIE tree.  This is useful when @p die is
12930 /// e.g, DW_TAG_partial_unit that can be included in several places in
12931 /// the DIE tree.
12932 ///
12933 /// @return the resulting pointer to pointer_type_def.
12934 static pointer_type_def_sptr
build_pointer_type_def(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)12935 build_pointer_type_def(read_context&	ctxt,
12936 		       Dwarf_Die*	die,
12937 		       bool		called_from_public_decl,
12938 		       size_t		where_offset)
12939 {
12940   pointer_type_def_sptr result;
12941 
12942   if (!die)
12943     return result;
12944 
12945   unsigned tag = dwarf_tag(die);
12946   if (tag != DW_TAG_pointer_type)
12947     return result;
12948 
12949   type_or_decl_base_sptr utype_decl;
12950   Dwarf_Die underlying_type_die;
12951   bool has_underlying_type_die = false;
12952   if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
12953     // If the DW_AT_type attribute is missing, that means we are
12954     // looking at a pointer to "void".
12955     utype_decl = build_ir_node_for_void_type(ctxt);
12956   else
12957     has_underlying_type_die = true;
12958 
12959   if (!utype_decl && has_underlying_type_die)
12960     utype_decl = build_ir_node_from_die(ctxt, &underlying_type_die,
12961 					called_from_public_decl,
12962 					where_offset);
12963   if (!utype_decl)
12964     return result;
12965 
12966   // The call to build_ir_node_from_die() could have triggered the
12967   // creation of the type for this DIE.  In that case, just return it.
12968   if (type_base_sptr t = ctxt.lookup_type_from_die(die))
12969     {
12970       result = is_pointer_type(t);
12971       ABG_ASSERT(result);
12972       return result;
12973     }
12974 
12975   type_base_sptr utype = is_type(utype_decl);
12976   ABG_ASSERT(utype);
12977 
12978   // if the DIE for the pointer type doesn't have a byte_size
12979   // attribute then we assume the size of the pointer is the address
12980   // size of the current translation unit.
12981   uint64_t size = ctxt.cur_transl_unit()->get_address_size();
12982   if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
12983     // The size as expressed by DW_AT_byte_size is in byte, so let's
12984     // convert it to bits.
12985     size *= 8;
12986 
12987   // And the size of the pointer must be the same as the address size
12988   // of the current translation unit.
12989   ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
12990 
12991   result.reset(new pointer_type_def(utype, size, /*alignment=*/0, location()));
12992   ABG_ASSERT(result->get_pointed_to_type());
12993 
12994   ctxt.associate_die_to_type(die, result, where_offset);
12995   return result;
12996 }
12997 
12998 /// Build a reference type from either a DW_TAG_reference_type or
12999 /// DW_TAG_rvalue_reference_type DIE.
13000 ///
13001 /// @param ctxt the read context to consider.
13002 ///
13003 /// @param die the DIE to read from.
13004 ///
13005 /// @param called_from_public_decl true if this function was called
13006 /// from a context where either a public function or a public variable
13007 /// is being built.
13008 ///
13009 /// @param where_offset the offset of the DIE where we are "logically"
13010 /// positionned at, in the DIE tree.  This is useful when @p die is
13011 /// e.g, DW_TAG_partial_unit that can be included in several places in
13012 /// the DIE tree.
13013 ///
13014 /// @return a pointer to the resulting reference_type_def.
13015 static reference_type_def_sptr
build_reference_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13016 build_reference_type(read_context&	ctxt,
13017 		     Dwarf_Die*	die,
13018 		     bool		called_from_public_decl,
13019 		     size_t		where_offset)
13020 {
13021   reference_type_def_sptr result;
13022 
13023   if (!die)
13024     return result;
13025 
13026   unsigned tag = dwarf_tag(die);
13027   if (tag != DW_TAG_reference_type
13028       && tag != DW_TAG_rvalue_reference_type)
13029     return result;
13030 
13031   Dwarf_Die underlying_type_die;
13032   if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
13033     return result;
13034 
13035   type_or_decl_base_sptr utype_decl =
13036     build_ir_node_from_die(ctxt, &underlying_type_die,
13037 			   called_from_public_decl,
13038 			   where_offset);
13039   if (!utype_decl)
13040     return result;
13041 
13042   // The call to build_ir_node_from_die() could have triggered the
13043   // creation of the type for this DIE.  In that case, just return it.
13044   if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13045     {
13046       result = is_reference_type(t);
13047       ABG_ASSERT(result);
13048       return result;
13049     }
13050 
13051   type_base_sptr utype = is_type(utype_decl);
13052   ABG_ASSERT(utype);
13053 
13054   // if the DIE for the reference type doesn't have a byte_size
13055   // attribute then we assume the size of the reference is the address
13056   // size of the current translation unit.
13057   uint64_t size = ctxt.cur_transl_unit()->get_address_size();
13058   if (die_unsigned_constant_attribute(die, DW_AT_byte_size, size))
13059     size *= 8;
13060 
13061   // And the size of the pointer must be the same as the address size
13062   // of the current translation unit.
13063   ABG_ASSERT((size_t) ctxt.cur_transl_unit()->get_address_size() == size);
13064 
13065   bool is_lvalue = tag == DW_TAG_reference_type;
13066 
13067   result.reset(new reference_type_def(utype, is_lvalue, size,
13068 				      /*alignment=*/0,
13069 				      location()));
13070   if (corpus_sptr corp = ctxt.current_corpus())
13071     if (reference_type_def_sptr t = lookup_reference_type(*result, *corp))
13072       result = t;
13073   ctxt.associate_die_to_type(die, result, where_offset);
13074   return result;
13075 }
13076 
13077 /// Build a subroutine type from a DW_TAG_subroutine_type DIE.
13078 ///
13079 /// @param ctxt the read context to consider.
13080 ///
13081 /// @param die the DIE to read from.
13082 ///
13083 /// @param is_method points to a class or union declaration iff we're
13084 /// building the type for a method.  This is the enclosing class or
13085 /// union of the method.
13086 ///
13087 /// @param where_offset the offset of the DIE where we are "logically"
13088 /// positioned at, in the DIE tree.  This is useful when @p die is
13089 /// e.g, DW_TAG_partial_unit that can be included in several places in
13090 /// the DIE tree.
13091 ///
13092 /// @return a pointer to the resulting function_type_sptr.
13093 static function_type_sptr
build_function_type(read_context & ctxt,Dwarf_Die * die,class_or_union_sptr is_method,size_t where_offset)13094 build_function_type(read_context&	ctxt,
13095 		    Dwarf_Die*		die,
13096 		    class_or_union_sptr is_method,
13097 		    size_t		where_offset)
13098 {
13099   function_type_sptr result;
13100 
13101   if (!die)
13102     return result;
13103 
13104   ABG_ASSERT(dwarf_tag(die) == DW_TAG_subroutine_type
13105 	     || dwarf_tag(die) == DW_TAG_subprogram);
13106 
13107   const die_source source = ctxt.get_die_source(die);
13108 
13109   decl_base_sptr type_decl;
13110 
13111   translation_unit_sptr tu = ctxt.cur_transl_unit();
13112   ABG_ASSERT(tu);
13113 
13114   /// If, inside the current translation unit, we've already seen a
13115   /// function type with the same text representation, then reuse that
13116   /// one instead.
13117   if (type_base_sptr t = ctxt.lookup_fn_type_from_die_repr_per_tu(die))
13118     {
13119       result = is_function_type(t);
13120       ABG_ASSERT(result);
13121       ctxt.associate_die_to_type(die, result, where_offset);
13122       return result;
13123     }
13124 
13125   bool odr_is_relevant = ctxt.odr_is_relevant(die);
13126   if (odr_is_relevant)
13127     {
13128       // So we can rely on the One Definition Rule to say that if
13129       // several different function types have the same name (or
13130       // rather, representation) across the entire binary, then they
13131       // ought to designate the same function type.  So let's ensure
13132       // that if we've already seen a function type with the same
13133       // representation as the function type 'die', then it's the same
13134       // type as the one denoted by 'die'.
13135       if (function_type_sptr fn_type =
13136 	  is_function_type(ctxt.lookup_type_artifact_from_die(die)))
13137 	{
13138 	  ctxt.associate_die_to_type(die, fn_type, where_offset);
13139 	  return fn_type;
13140 	}
13141     }
13142 
13143   // Let's look at the DIE to detect if it's the DIE for a method
13144   // (type).  If it is, we can deduce the name of its enclosing class
13145   // and if it's a static or const.
13146   bool is_const = false;
13147   bool is_static = false;
13148   Dwarf_Die object_pointer_die;
13149   Dwarf_Die class_type_die;
13150   bool has_this_parm_die =
13151     die_function_type_is_method_type(ctxt, die, where_offset,
13152 				     object_pointer_die,
13153 				     class_type_die,
13154 				     is_static);
13155   if (has_this_parm_die)
13156     {
13157       // The function (type) has a "this" parameter DIE. It means it's
13158       // a member function DIE.
13159       if (!is_static)
13160 	if (die_object_pointer_is_for_const_method(&object_pointer_die))
13161 	  is_const = true;
13162 
13163       if (!is_method)
13164 	{
13165 	  // We were initially called as if the function represented
13166 	  // by DIE was *NOT* a member function.  But now we know it's
13167 	  // a member function.  Let's take that into account.
13168 	  class_or_union_sptr klass_type =
13169 	    is_class_or_union_type(build_ir_node_from_die(ctxt, &class_type_die,
13170 							  /*called_from_pub_decl=*/true,
13171 							  where_offset));
13172 	  ABG_ASSERT(klass_type);
13173 	  is_method = klass_type;
13174 	}
13175     }
13176 
13177   // Let's create the type early and record it as being for the DIE
13178   // 'die'.  This way, when building the sub-type triggers the
13179   // creation of a type matching the same 'die', then we'll reuse this
13180   // one.
13181 
13182   result.reset(is_method
13183 	       ? new method_type(is_method, is_const,
13184 				 tu->get_address_size(),
13185 				 /*alignment=*/0)
13186 	       : new function_type(ctxt.env(), tu->get_address_size(),
13187 				   /*alignment=*/0));
13188   ctxt.associate_die_to_type(die, result, where_offset);
13189   ctxt.die_wip_function_types_map(source)[dwarf_dieoffset(die)] = result;
13190   ctxt.associate_die_repr_to_fn_type_per_tu(die, result);
13191 
13192   type_base_sptr return_type;
13193   Dwarf_Die ret_type_die;
13194   if (die_die_attribute(die, DW_AT_type, ret_type_die))
13195     return_type =
13196       is_type(build_ir_node_from_die(ctxt, &ret_type_die,
13197 				     /*called_from_public_decl=*/true,
13198 				     where_offset));
13199   if (!return_type)
13200     return_type = is_type(build_ir_node_for_void_type(ctxt));
13201   result->set_return_type(return_type);
13202 
13203   Dwarf_Die child;
13204   function_decl::parameters function_parms;
13205 
13206   if (dwarf_child(die, &child) == 0)
13207     do
13208       {
13209 	int child_tag = dwarf_tag(&child);
13210 	if (child_tag == DW_TAG_formal_parameter)
13211 	  {
13212 	    // This is a "normal" function parameter.
13213 	    string name, linkage_name;
13214 	    location loc;
13215 	    die_loc_and_name(ctxt, &child, loc, name, linkage_name);
13216 	    if (!tools_utils::string_is_ascii_identifier(name))
13217 	      // Sometimes, bogus compiler emit names that are
13218 	      // non-ascii garbage.  Let's just ditch that for now.
13219 	      name.clear();
13220 	    bool is_artificial = die_is_artificial(&child);
13221 	    type_base_sptr parm_type;
13222 	    Dwarf_Die parm_type_die;
13223 	    if (die_die_attribute(&child, DW_AT_type, parm_type_die))
13224 	      parm_type =
13225 		is_type(build_ir_node_from_die(ctxt, &parm_type_die,
13226 					       /*called_from_public_decl=*/true,
13227 					       where_offset));
13228 	    if (!parm_type)
13229 	      continue;
13230 	    function_decl::parameter_sptr p
13231 	      (new function_decl::parameter(parm_type, name, loc,
13232 					    /*variadic_marker=*/false,
13233 					    is_artificial));
13234 	    function_parms.push_back(p);
13235 	  }
13236 	else if (child_tag == DW_TAG_unspecified_parameters)
13237 	  {
13238 	    // This is a variadic function parameter.
13239 	    bool is_artificial = die_is_artificial(&child);
13240 	    ir::environment* env = ctxt.env();
13241 	    ABG_ASSERT(env);
13242 	    type_base_sptr parm_type =
13243 	      is_type(build_ir_node_for_variadic_parameter_type(ctxt));
13244 	    function_decl::parameter_sptr p
13245 	      (new function_decl::parameter(parm_type,
13246 					    /*name=*/"",
13247 					    location(),
13248 					    /*variadic_marker=*/true,
13249 					    is_artificial));
13250 	    function_parms.push_back(p);
13251 	    // After a DW_TAG_unspecified_parameters tag, we shouldn't
13252 	    // keep reading for parameters.  The
13253 	    // unspecified_parameters TAG should be the last parameter
13254 	    // that we record. For instance, if there are multiple
13255 	    // DW_TAG_unspecified_parameters DIEs then we should care
13256 	    // only for the first one.
13257 	    break;
13258 	  }
13259       }
13260     while (dwarf_siblingof(&child, &child) == 0);
13261 
13262   result->set_parameters(function_parms);
13263 
13264   tu->bind_function_type_life_time(result);
13265 
13266   {
13267     die_function_type_map_type::const_iterator i =
13268       ctxt.die_wip_function_types_map(source).
13269       find(dwarf_dieoffset(die));
13270     if (i != ctxt.die_wip_function_types_map(source).end())
13271       ctxt.die_wip_function_types_map(source).erase(i);
13272   }
13273 
13274   maybe_canonicalize_type(result, ctxt);
13275   return result;
13276 }
13277 
13278 /// Build a subrange type from a DW_TAG_subrange_type.
13279 ///
13280 /// @param ctxt the read context to consider.
13281 ///
13282 /// @param die the DIE to read from.
13283 ///
13284 /// @param where_offset the offset of the DIE where we are "logically"
13285 /// positionned at in the DIE tree.  This is useful when @p die is
13286 /// e,g, DW_TAG_partial_unit that can be included in several places in
13287 /// the DIE tree.
13288 ///
13289 /// @param associate_die_to_type if this is true then the resulting
13290 /// type is associated to the @p die, so that next time when the
13291 /// system looks up the type associated to it, the current resulting
13292 /// type is returned.  If false, then no association is done and the
13293 /// resulting type can be destroyed right after.  This can be useful
13294 /// when the sole purpose of building the @ref
13295 /// array_type_def::subrange_type is to use some of its method like,
13296 /// e.g, its name pretty printing methods.
13297 ///
13298 /// @return the newly built instance of @ref
13299 /// array_type_def::subrange_type, or nil if no type could be built.
13300 static array_type_def::subrange_sptr
build_subrange_type(read_context & ctxt,const Dwarf_Die * die,size_t where_offset,bool associate_type_to_die)13301 build_subrange_type(read_context&	ctxt,
13302 		    const Dwarf_Die*		die,
13303 		    size_t		where_offset,
13304 		    bool		associate_type_to_die)
13305 {
13306   array_type_def::subrange_sptr result;
13307 
13308   if (!die)
13309     return result;
13310 
13311   unsigned tag = dwarf_tag(const_cast<Dwarf_Die*>(die));
13312   if (tag != DW_TAG_subrange_type)
13313     return result;
13314 
13315   string name = die_name(die);
13316 
13317   // load the underlying type.
13318   Dwarf_Die underlying_type_die;
13319   type_base_sptr underlying_type;
13320   /* Unless there is an underlying type which says differently.  */
13321   bool is_signed = false;
13322   if (die_die_attribute(die, DW_AT_type, underlying_type_die))
13323     underlying_type =
13324       is_type(build_ir_node_from_die(ctxt,
13325 				     &underlying_type_die,
13326 				     /*called_from_public_decl=*/true,
13327 				     where_offset));
13328 
13329   if (underlying_type)
13330     {
13331       uint64_t ate;
13332       if (die_unsigned_constant_attribute (&underlying_type_die,
13333 					   DW_AT_encoding,
13334 					   ate))
13335 	  is_signed = (ate == DW_ATE_signed || ate == DW_ATE_signed_char);
13336     }
13337 
13338   translation_unit::language language = ctxt.cur_transl_unit()->get_language();
13339   array_type_def::subrange_type::bound_value lower_bound =
13340     get_default_array_lower_bound(language);
13341   array_type_def::subrange_type::bound_value upper_bound;
13342   uint64_t count = 0;
13343   bool is_infinite = false;
13344 
13345   // The DWARF 4 specifications says, in [5.11 Subrange
13346   // Type Entries]:
13347   //
13348   //     The subrange entry may have the attributes
13349   //     DW_AT_lower_bound and DW_AT_upper_bound to
13350   //     specify, respectively, the lower and upper bound
13351   //     values of the subrange.
13352   //
13353   // So let's look for DW_AT_lower_bound first.
13354   die_constant_attribute(die, DW_AT_lower_bound, is_signed, lower_bound);
13355 
13356   // Then, DW_AT_upper_bound.
13357   if (!die_constant_attribute(die, DW_AT_upper_bound, is_signed, upper_bound))
13358     {
13359       // The DWARF 4 spec says, in [5.11 Subrange Type
13360       // Entries]:
13361       //
13362       //   The DW_AT_upper_bound attribute may be replaced
13363       //   by a DW_AT_count attribute, whose value
13364       //   describes the number of elements in the
13365       //   subrange rather than the value of the last
13366       //   element."
13367       //
13368       // So, as DW_AT_upper_bound is not present in this
13369       // case, let's see if there is a DW_AT_count.
13370       die_unsigned_constant_attribute(die, DW_AT_count, count);
13371 
13372       // We can deduce the upper_bound from the
13373       // lower_bound and the number of elements of the
13374       // array:
13375       if (int64_t u = lower_bound.get_signed_value() + count)
13376 	upper_bound = u - 1;
13377 
13378       if (upper_bound.get_unsigned_value() == 0 && count == 0)
13379 	// No upper_bound nor count was present on the DIE, this means
13380 	// the array is considered to have an infinite (or rather not
13381 	// known) size.
13382 	is_infinite = true;
13383     }
13384 
13385   if (UINT64_MAX == upper_bound.get_unsigned_value())
13386     {
13387       // If the upper_bound size is the max of the integer value, then
13388       // it most certainly means infinite size.
13389       is_infinite = true;
13390       upper_bound.set_unsigned(0);
13391     }
13392 
13393   result.reset
13394     (new array_type_def::subrange_type(ctxt.env(),
13395 				       name,
13396 				       lower_bound,
13397 				       upper_bound,
13398 				       location()));
13399   result->is_infinite(is_infinite);
13400 
13401   if (underlying_type)
13402     result->set_underlying_type(underlying_type);
13403 
13404   ABG_ASSERT(result->is_infinite()
13405 	     || (result->get_length() ==
13406 		 (uint64_t) (result->get_upper_bound()
13407 			     - result->get_lower_bound() + 1)));
13408 
13409   if (associate_type_to_die)
13410     ctxt.associate_die_to_type(die, result, where_offset);
13411 
13412   return result;
13413 }
13414 
13415 /// Build the sub-ranges of an array type.
13416 ///
13417 /// This is a sub-routine of build_array_type().
13418 ///
13419 /// @param ctxt the context to read from.
13420 ///
13421 /// @param die the DIE of tag DW_TAG_array_type which contains
13422 /// children DIEs that represent the sub-ranges.
13423 ///
13424 /// @param subranges out parameter.  This is set to the sub-ranges
13425 /// that are built from @p die.
13426 ///
13427 /// @param where_offset the offset of the DIE where we are "logically"
13428 /// positioned at, in the DIE tree.  This is useful when @p die is
13429 /// e.g, DW_TAG_partial_unit that can be included in several places in
13430 /// the DIE tree.
13431 static void
build_subranges_from_array_type_die(read_context & ctxt,const Dwarf_Die * die,array_type_def::subranges_type & subranges,size_t where_offset,bool associate_type_to_die)13432 build_subranges_from_array_type_die(read_context&			ctxt,
13433 				    const Dwarf_Die*			die,
13434 				    array_type_def::subranges_type&	subranges,
13435 				    size_t				where_offset,
13436 				    bool				associate_type_to_die)
13437 {
13438   Dwarf_Die child;
13439 
13440   if (dwarf_child(const_cast<Dwarf_Die*>(die), &child) == 0)
13441     {
13442       do
13443 	{
13444 	  int child_tag = dwarf_tag(&child);
13445 	  if (child_tag == DW_TAG_subrange_type)
13446 	    {
13447 	      array_type_def::subrange_sptr s;
13448 	      if (associate_type_to_die)
13449 		{
13450 		  // We are being called to create the type, add it to
13451 		  // the current type graph and associate it to the
13452 		  // DIE it's been created from.
13453 		  type_or_decl_base_sptr t =
13454 		    build_ir_node_from_die(ctxt, &child,
13455 					   /*called_from_public_decl=*/true,
13456 					   where_offset);
13457 		  s = is_subrange_type(t);
13458 		}
13459 	      else
13460 		// We are being called to create the type but *NOT*
13461 		// add it to the current tyupe tree, *NOR* associate
13462 		// it to the DIE it's been created from.
13463 		s = build_subrange_type(ctxt, &child,
13464 					where_offset,
13465 					/*associate_type_to_die=*/false);
13466 	      if (s)
13467 		subranges.push_back(s);
13468 	    }
13469 	}
13470       while (dwarf_siblingof(&child, &child) == 0);
13471     }
13472 }
13473 
13474 /// Build an array type from a DW_TAG_array_type DIE.
13475 ///
13476 /// @param ctxt the read context to consider.
13477 ///
13478 /// @param die the DIE to read from.
13479 ///
13480 /// @param called_from_public_decl true if this function was called
13481 /// from a context where either a public function or a public variable
13482 /// is being built.
13483 ///
13484 /// @param where_offset the offset of the DIE where we are "logically"
13485 /// positioned at, in the DIE tree.  This is useful when @p die is
13486 /// e.g, DW_TAG_partial_unit that can be included in several places in
13487 /// the DIE tree.
13488 ///
13489 /// @return a pointer to the resulting array_type_def.
13490 static array_type_def_sptr
build_array_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13491 build_array_type(read_context&	ctxt,
13492 		 Dwarf_Die*	die,
13493 		 bool		called_from_public_decl,
13494 		 size_t	where_offset)
13495 {
13496   array_type_def_sptr result;
13497 
13498   if (!die)
13499     return result;
13500 
13501   unsigned tag = dwarf_tag(die);
13502   if (tag != DW_TAG_array_type)
13503     return result;
13504 
13505   decl_base_sptr type_decl;
13506   Dwarf_Die type_die;
13507 
13508   if (die_die_attribute(die, DW_AT_type, type_die))
13509     type_decl = is_decl(build_ir_node_from_die(ctxt, &type_die,
13510 					       called_from_public_decl,
13511 					       where_offset));
13512   if (!type_decl)
13513     return result;
13514 
13515   // The call to build_ir_node_from_die() could have triggered the
13516   // creation of the type for this DIE.  In that case, just return it.
13517   if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13518     {
13519       result = is_array_type(t);
13520       ABG_ASSERT(result);
13521       return result;
13522     }
13523 
13524   type_base_sptr type = is_type(type_decl);
13525   ABG_ASSERT(type);
13526 
13527   array_type_def::subranges_type subranges;
13528 
13529   build_subranges_from_array_type_die(ctxt, die, subranges, where_offset);
13530 
13531   result.reset(new array_type_def(type, subranges, location()));
13532 
13533   return result;
13534 }
13535 
13536 /// Create a typedef_decl from a DW_TAG_typedef DIE.
13537 ///
13538 /// @param ctxt the read context to consider.
13539 ///
13540 /// @param die the DIE to read from.
13541 ///
13542 /// @param called_from_public_decl true if this function was called
13543 /// from a context where either a public function or a public variable
13544 /// is being built.
13545 ///
13546 /// @param where_offset the offset of the DIE where we are "logically"
13547 /// positionned at, in the DIE tree.  This is useful when @p die is
13548 /// e.g, DW_TAG_partial_unit that can be included in several places in
13549 /// the DIE tree.
13550 ///
13551 /// @return the newly created typedef_decl.
13552 static typedef_decl_sptr
build_typedef_type(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)13553 build_typedef_type(read_context&	ctxt,
13554 		   Dwarf_Die*		die,
13555 		   bool		called_from_public_decl,
13556 		   size_t		where_offset)
13557 {
13558   typedef_decl_sptr result;
13559 
13560   if (!die)
13561     return result;
13562 
13563   unsigned tag = dwarf_tag(die);
13564   if (tag != DW_TAG_typedef)
13565     return result;
13566 
13567   string name, linkage_name;
13568   location loc;
13569   die_loc_and_name(ctxt, die, loc, name, linkage_name);
13570 
13571   if (corpus_sptr corp = ctxt.should_reuse_type_from_corpus_group())
13572     if (loc)
13573       result = lookup_typedef_type_per_location(loc.expand(), *corp);
13574 
13575   if (!ctxt.odr_is_relevant(die))
13576     if (typedef_decl_sptr t = is_typedef(ctxt.lookup_artifact_from_die(die)))
13577       result = t;
13578 
13579   if (!result)
13580     {
13581       type_base_sptr utype;
13582       Dwarf_Die underlying_type_die;
13583       if (!die_die_attribute(die, DW_AT_type, underlying_type_die))
13584 	// A typedef DIE with no underlying type means a typedef to
13585 	// void type.
13586 	utype = ctxt.env()->get_void_type();
13587 
13588       if (!utype)
13589 	utype =
13590 	  is_type(build_ir_node_from_die(ctxt,
13591 					 &underlying_type_die,
13592 					 called_from_public_decl,
13593 					 where_offset));
13594       if (!utype)
13595 	return result;
13596 
13597       // The call to build_ir_node_from_die() could have triggered the
13598       // creation of the type for this DIE.  In that case, just return
13599       // it.
13600       if (type_base_sptr t = ctxt.lookup_type_from_die(die))
13601 	{
13602 	  result = is_typedef(t);
13603 	  ABG_ASSERT(result);
13604 	  return result;
13605 	}
13606 
13607       ABG_ASSERT(utype);
13608       result.reset(new typedef_decl(name, utype, loc, linkage_name));
13609 
13610       if (class_decl_sptr klass = is_class_type(utype))
13611 	if (is_anonymous_type(klass))
13612 	  klass->set_naming_typedef(result);
13613     }
13614 
13615   ctxt.associate_die_to_type(die, result, where_offset);
13616 
13617   return result;
13618 }
13619 
13620 /// Build a @ref var_decl out of a DW_TAG_variable DIE if the variable
13621 /// denoted by the DIE is not suppressed by a suppression
13622 /// specification associated to the current read context.
13623 ///
13624 /// Note that if a member variable declaration with the same name as
13625 /// the name of the DIE we are looking at exists, this function returns
13626 /// that existing variable declaration.
13627 ///
13628 /// @param ctxt the read context to use.
13629 ///
13630 /// @param die the DIE representing the variable we are looking at.
13631 ///
13632 /// @param where_offset the offset of the DIE where we are "logically"
13633 /// positionned at, in the DIE tree.  This is useful when @p die is
13634 /// e.g, DW_TAG_partial_unit that can be included in several places in
13635 /// the DIE tree.
13636 ///
13637 /// @param result if this is set to an existing var_decl, this means
13638 /// that the function will append the new properties it sees on @p die
13639 /// to that exising var_decl.  Otherwise, if this parameter is NULL, a
13640 /// new var_decl is going to be allocated and returned.
13641 ///
13642 /// @param is_required_decl_spec this is true iff the variable to
13643 /// build is referred to as being the specification of another
13644 /// variable.
13645 ///
13646 /// @return a pointer to the newly created var_decl.  If the var_decl
13647 /// could not be built, this function returns NULL.
13648 static var_decl_sptr
build_or_get_var_decl_if_not_suppressed(read_context & ctxt,scope_decl * scope,Dwarf_Die * die,size_t where_offset,var_decl_sptr result,bool is_required_decl_spec)13649 build_or_get_var_decl_if_not_suppressed(read_context&	ctxt,
13650 					scope_decl	*scope,
13651 					Dwarf_Die	*die,
13652 					size_t	where_offset,
13653 					var_decl_sptr	result,
13654 					bool is_required_decl_spec)
13655 {
13656   var_decl_sptr var;
13657   if (variable_is_suppressed(ctxt, scope, die, is_required_decl_spec))
13658     return var;
13659 
13660   if (class_decl* class_type = is_class_type(scope))
13661     {
13662       string var_name = die_name(die);
13663       if (!var_name.empty())
13664 	if ((var = class_type->find_data_member(var_name)))
13665 	  return var;
13666     }
13667   var = build_var_decl(ctxt, die, where_offset, result);
13668   return var;
13669 }
13670 
13671 /// Build a @ref var_decl out of a DW_TAG_variable DIE.
13672 ///
13673 /// @param ctxt the read context to use.
13674 ///
13675 /// @param die the DIE representing the variable we are looking at.
13676 ///
13677 /// @param where_offset the offset of the DIE where we are "logically"
13678 /// positionned at, in the DIE tree.  This is useful when @p die is
13679 /// e.g, DW_TAG_partial_unit that can be included in several places in
13680 /// the DIE tree.
13681 ///
13682 /// @param result if this is set to an existing var_decl, this means
13683 /// that the function will append the new properties it sees on @p die
13684 /// to that exising var_decl.  Otherwise, if this parameter is NULL, a
13685 /// new var_decl is going to be allocated and returned.
13686 ///
13687 /// @return a pointer to the newly created var_decl.  If the var_decl
13688 /// could not be built, this function returns NULL.
13689 static var_decl_sptr
build_var_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset,var_decl_sptr result)13690 build_var_decl(read_context&	ctxt,
13691 	       Dwarf_Die	*die,
13692 	       size_t		where_offset,
13693 	       var_decl_sptr	result)
13694 {
13695   if (!die)
13696     return result;
13697 
13698   int tag = dwarf_tag(die);
13699   ABG_ASSERT(tag == DW_TAG_variable || tag == DW_TAG_member);
13700 
13701   if (!die_is_public_decl(die))
13702     return result;
13703 
13704   type_base_sptr type;
13705   Dwarf_Die type_die;
13706   if (die_die_attribute(die, DW_AT_type, type_die))
13707     {
13708       decl_base_sptr ty =
13709 	is_decl(build_ir_node_from_die(ctxt, &type_die,
13710 				       /*called_from_public_decl=*/true,
13711 				       where_offset));
13712       if (!ty)
13713 	return result;
13714       type = is_type(ty);
13715       ABG_ASSERT(type);
13716     }
13717 
13718   if (!type)
13719     return result;
13720 
13721   string name, linkage_name;
13722   location loc;
13723   die_loc_and_name(ctxt, die, loc, name, linkage_name);
13724 
13725   if (!result)
13726     result.reset(new var_decl(name, type, loc, linkage_name));
13727   else
13728     {
13729       // We were called to append properties that might have been
13730       // missing from the first version of the variable.  And usually
13731       // that missing property is the mangled name.
13732       if (!linkage_name.empty())
13733 	result->set_linkage_name(linkage_name);
13734     }
13735 
13736   // Check if a variable symbol with this name is exported by the elf
13737   // binary.  If it is, then set the symbol of the variable, if it's
13738   // not set already.
13739   if (!result->get_symbol())
13740     {
13741       elf_symbol_sptr var_sym;
13742       Dwarf_Addr      var_addr;
13743       if (ctxt.get_variable_address(die, var_addr))
13744 	{
13745 	  ctxt.symtab()->update_main_symbol(var_addr,
13746 					    result->get_linkage_name().empty()
13747 					      ? result->get_name()
13748 					      : result->get_linkage_name());
13749 	  var_sym = ctxt.variable_symbol_is_exported(var_addr);
13750 	}
13751 
13752       if (var_sym)
13753 	{
13754 	  result->set_symbol(var_sym);
13755 	  // If the linkage name is not set or is wrong, set it to
13756 	  // the name of the underlying symbol.
13757 	  string linkage_name = result->get_linkage_name();
13758 	  if (linkage_name.empty()
13759 	      || !var_sym->get_alias_from_name(linkage_name))
13760 	    result->set_linkage_name(var_sym->get_name());
13761 	  result->set_is_in_public_symbol_table(true);
13762 	}
13763     }
13764 
13765   return result;
13766 }
13767 
13768 /// Test if a given function denoted by its DIE and its scope is
13769 /// suppressed by any of the suppression specifications associated to
13770 /// a given context of ELF/DWARF reading.
13771 ///
13772 /// Note that a non-member function which symbol is not exported is
13773 /// also suppressed.
13774 ///
13775 /// @param ctxt the ELF/DWARF reading content of interest.
13776 ///
13777 /// @param scope of the scope of the function.
13778 ///
13779 /// @param function_die the DIE representing the function.
13780 ///
13781 /// @param is_declaration_only is true if the DIE denoted by @p die is
13782 /// a declaration-only DIE.
13783 ///
13784 /// @return true iff @p function_die is suppressed by at least one
13785 /// suppression specification attached to the @p ctxt.
13786 static bool
function_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * function_die,bool is_declaration_only)13787 function_is_suppressed(const read_context& ctxt,
13788 		       const scope_decl* scope,
13789 		       Dwarf_Die *function_die,
13790 		       bool is_declaration_only)
13791 {
13792   if (function_die == 0
13793       || dwarf_tag(function_die) != DW_TAG_subprogram)
13794     return false;
13795 
13796   string fname = die_string_attribute(function_die, DW_AT_name);
13797   string flinkage_name = die_linkage_name(function_die);
13798   if (flinkage_name.empty() && ctxt.die_is_in_c(function_die))
13799     flinkage_name = fname;
13800   string qualified_name = build_qualified_name(scope, fname);
13801 
13802   // A non-member non-static function which symbol is not exported is
13803   // suppressed.
13804   //
13805   // Note that if the non-member non-static function has an undefined
13806   // symbol, by default, it's not suppressed.  Unless we are asked to
13807   // drop undefined symbols too.
13808   if (!is_class_type(scope)
13809       && (!is_declaration_only || ctxt.drop_undefined_syms()))
13810     {
13811       Dwarf_Addr fn_addr;
13812       if (!ctxt.get_function_address(function_die, fn_addr))
13813 	return true;
13814 
13815       elf_symbol_sptr symbol = ctxt.function_symbol_is_exported(fn_addr);
13816       if (!symbol)
13817 	return true;
13818       if (!symbol->is_suppressed())
13819 	return false;
13820 
13821       // Since there is only one symbol in DWARF associated with an elf_symbol,
13822       // we can assume this is the main symbol then. Otherwise the main hinting
13823       // did not work as expected.
13824       ABG_ASSERT(symbol->is_main_symbol());
13825       if (symbol->has_aliases())
13826 	for (elf_symbol_sptr a = symbol->get_next_alias();
13827 	     !a->is_main_symbol(); a = a->get_next_alias())
13828 	  if (!a->is_suppressed())
13829 	    return false;
13830     }
13831 
13832   return suppr::function_is_suppressed(ctxt, qualified_name,
13833 				       flinkage_name,
13834 				       /*require_drop_property=*/true);
13835 }
13836 
13837 /// Build a @ref function_decl out of a DW_TAG_subprogram DIE if the
13838 /// function denoted by the DIE is not suppressed by a suppression
13839 /// specification associated to the current read context.
13840 ///
13841 /// Note that if a member function declaration with the same signature
13842 /// (pretty representation) as one of the DIE we are looking at
13843 /// exists, this function returns that existing function declaration.
13844 ///
13845 /// @param ctxt the read context to use.
13846 ///
13847 /// @param scope the scope of the function we are looking at.
13848 ///
13849 /// @param fn_die the DIE representing the function we are looking at.
13850 ///
13851 /// @param where_offset the offset of the DIE where we are "logically"
13852 /// positionned at, in the DIE tree.  This is useful when @p die is
13853 /// e.g, DW_TAG_partial_unit that can be included in several places in
13854 /// the DIE tree.
13855 ///
13856 /// @param is_declaration_only is true if the DIE denoted by @p fn_die
13857 /// is a declaration-only DIE.
13858 ///
13859 /// @param result if this is set to an existing function_decl, this
13860 /// means that the function will append the new properties it sees on
13861 /// @p fn_die to that exising function_decl.  Otherwise, if this
13862 /// parameter is NULL, a new function_decl is going to be allocated
13863 /// and returned.
13864 ///
13865 /// @return a pointer to the newly created var_decl.  If the var_decl
13866 /// could not be built, this function returns NULL.
13867 static function_decl_sptr
build_or_get_fn_decl_if_not_suppressed(read_context & ctxt,scope_decl * scope,Dwarf_Die * fn_die,size_t where_offset,bool is_declaration_only,function_decl_sptr result)13868 build_or_get_fn_decl_if_not_suppressed(read_context&	  ctxt,
13869 				       scope_decl	  *scope,
13870 				       Dwarf_Die	  *fn_die,
13871 				       size_t		  where_offset,
13872 				       bool		  is_declaration_only,
13873 				       function_decl_sptr result)
13874 {
13875   function_decl_sptr fn;
13876   if (function_is_suppressed(ctxt, scope, fn_die, is_declaration_only))
13877     return fn;
13878 
13879   if (!result)
13880     if ((fn = is_function_decl(ctxt.lookup_artifact_from_die(fn_die))))
13881       {
13882 	fn = maybe_finish_function_decl_reading(ctxt, fn_die, where_offset, fn);
13883 	ctxt.associate_die_to_decl(fn_die, fn, /*do_associate_by_repr=*/true);
13884 	ctxt.associate_die_to_type(fn_die, fn->get_type(), where_offset);
13885 	return fn;
13886       }
13887 
13888   fn = build_function_decl(ctxt, fn_die, where_offset, result);
13889 
13890   return fn;
13891 }
13892 
13893 /// Test if a given variable denoted by its DIE and its scope is
13894 /// suppressed by any of the suppression specifications associated to
13895 /// a given context of ELF/DWARF reading.
13896 ///
13897 /// @param ctxt the ELF/DWARF reading content of interest.
13898 ///
13899 /// @param scope of the scope of the variable.
13900 ///
13901 /// @param variable_die the DIE representing the variable.
13902 ///
13903 /// @param is_required_decl_spec if true, means that the @p
13904 /// variable_die being considered is for a variable decl that is a
13905 /// specification for a concrete variable being built.
13906 ///
13907 /// @return true iff @p variable_die is suppressed by at least one
13908 /// suppression specification attached to the @p ctxt.
13909 static bool
variable_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * variable_die,bool is_required_decl_spec)13910 variable_is_suppressed(const read_context& ctxt,
13911 		       const scope_decl* scope,
13912 		       Dwarf_Die *variable_die,
13913 		       bool is_required_decl_spec)
13914 {
13915   if (variable_die == 0
13916       || (dwarf_tag(variable_die) != DW_TAG_variable
13917 	  && dwarf_tag(variable_die) != DW_TAG_member))
13918     return false;
13919 
13920   string name = die_string_attribute(variable_die, DW_AT_name);
13921   string linkage_name = die_linkage_name(variable_die);
13922   if (linkage_name.empty() && ctxt.die_is_in_c(variable_die))
13923     linkage_name = name;
13924   string qualified_name = build_qualified_name(scope, name);
13925 
13926   // If a non member variable that is a declaration (has no defined
13927   // and exported symbol) and is not the specification of another
13928   // concrete variable, then it's suppressed.  This is a size
13929   // optimization; it removes useless declaration-only variables from
13930   // the IR.
13931   if (!is_class_type(scope) && !is_required_decl_spec)
13932     {
13933       Dwarf_Addr var_addr = 0;
13934       if (!ctxt.get_variable_address(variable_die, var_addr))
13935 	return true;
13936 
13937       elf_symbol_sptr symbol = ctxt.variable_symbol_is_exported(var_addr);
13938       if (!symbol)
13939 	return true;
13940       if (!symbol->is_suppressed())
13941 	return false;
13942 
13943       // Since there is only one symbol in DWARF associated with an elf_symbol,
13944       // we can assume this is the main symbol then. Otherwise the main hinting
13945       // did not work as expected.
13946       ABG_ASSERT(symbol->is_main_symbol());
13947       if (symbol->has_aliases())
13948 	for (elf_symbol_sptr a = symbol->get_next_alias();
13949 	     !a->is_main_symbol(); a = a->get_next_alias())
13950 	  if (!a->is_suppressed())
13951 	    return false;
13952     }
13953 
13954   return suppr::variable_is_suppressed(ctxt, qualified_name,
13955 				       linkage_name,
13956 				       /*require_drop_property=*/true);
13957 }
13958 
13959 /// Test if a type (designated by a given DIE) in a given scope is
13960 /// suppressed by the suppression specifications that are associated
13961 /// to a given read context.
13962 ///
13963 /// @param ctxt the read context to consider.
13964 ///
13965 /// @param scope of the scope of the type DIE to consider.
13966 ///
13967 /// @param type_die the DIE that designates the type to consider.
13968 ///
13969 /// @param type_is_private out parameter.  If this function returns
13970 /// true (the type @p type_die is suppressed) and if the type was
13971 /// suppressed because it's private then this parameter is set to
13972 /// true.
13973 ///
13974 /// @return true iff the type designated by the DIE @p type_die, in
13975 /// the scope @p scope is suppressed by at the suppression
13976 /// specifications associated to the current read context.
13977 static bool
type_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * type_die,bool & type_is_private)13978 type_is_suppressed(const read_context& ctxt,
13979 		   const scope_decl* scope,
13980 		   Dwarf_Die *type_die,
13981 		   bool &type_is_private)
13982 {
13983   if (type_die == 0
13984       || (dwarf_tag(type_die) != DW_TAG_enumeration_type
13985 	  && dwarf_tag(type_die) != DW_TAG_class_type
13986 	  && dwarf_tag(type_die) != DW_TAG_structure_type
13987 	  && dwarf_tag(type_die) != DW_TAG_union_type))
13988     return false;
13989 
13990   string type_name, linkage_name;
13991   location type_location;
13992   die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
13993   string qualified_name = build_qualified_name(scope, type_name);
13994 
13995   return suppr::type_is_suppressed(ctxt, qualified_name,
13996 				   type_location,
13997 				   type_is_private,
13998 				   /*require_drop_property=*/true);
13999 }
14000 
14001 /// Test if a type (designated by a given DIE) in a given scope is
14002 /// suppressed by the suppression specifications that are associated
14003 /// to a given read context.
14004 ///
14005 /// @param ctxt the read context to consider.
14006 ///
14007 /// @param scope of the scope of the type DIE to consider.
14008 ///
14009 /// @param type_die the DIE that designates the type to consider.
14010 ///
14011 /// @return true iff the type designated by the DIE @p type_die, in
14012 /// the scope @p scope is suppressed by at the suppression
14013 /// specifications associated to the current read context.
14014 static bool
type_is_suppressed(const read_context & ctxt,const scope_decl * scope,Dwarf_Die * type_die)14015 type_is_suppressed(const read_context& ctxt,
14016 		   const scope_decl* scope,
14017 		   Dwarf_Die *type_die)
14018 {
14019   bool type_is_private = false;
14020   return type_is_suppressed(ctxt, scope, type_die, type_is_private);
14021 }
14022 
14023 /// Get the opaque version of a type that was suppressed because it's
14024 /// a private type.
14025 ///
14026 /// The opaque version version of the type is just a declared-only
14027 /// version of the type (class, union or enum type) denoted by @p
14028 /// type_die.
14029 ///
14030 /// @param ctxt the read context in use.
14031 ///
14032 /// @param scope the scope of the type die we are looking at.
14033 ///
14034 /// @param type_die the type DIE we are looking at.
14035 ///
14036 /// @param where_offset the offset of the DIE where we are "logically"
14037 /// positionned at, in the DIE tree.  This is useful when @p die is
14038 /// e.g, DW_TAG_partial_unit that can be included in several places in
14039 /// the DIE tree.
14040 ///
14041 /// @return the opaque version of the type denoted by @p type_die or
14042 /// nil if no opaque version was found.
14043 static type_or_decl_base_sptr
get_opaque_version_of_type(read_context & ctxt,scope_decl * scope,Dwarf_Die * type_die,size_t where_offset)14044 get_opaque_version_of_type(read_context	&ctxt,
14045 			   scope_decl		*scope,
14046 			   Dwarf_Die		*type_die,
14047 			   size_t		where_offset)
14048 {
14049   type_or_decl_base_sptr result;
14050 
14051   if (type_die == 0)
14052     return result;
14053 
14054   unsigned tag = dwarf_tag(type_die);
14055   if (tag != DW_TAG_class_type
14056       && tag != DW_TAG_structure_type
14057       && tag != DW_TAG_union_type
14058       && tag != DW_TAG_enumeration_type)
14059     return result;
14060 
14061   string type_name, linkage_name;
14062   location type_location;
14063   die_loc_and_name(ctxt, type_die, type_location, type_name, linkage_name);
14064   if (!type_location)
14065     return result;
14066 
14067   string qualified_name = build_qualified_name(scope, type_name);
14068 
14069   //
14070   // TODO: also handle declaration-only unions.  To do that, we mostly
14071   // need to adapt add_or_update_union_type to make it schedule
14072   // declaration-only unions for resolution too.
14073   //
14074   if (tag == DW_TAG_structure_type || tag == DW_TAG_class_type)
14075     {
14076       string_classes_map::const_iterator i =
14077 	ctxt.declaration_only_classes().find(qualified_name);
14078       if (i != ctxt.declaration_only_classes().end())
14079 	result = i->second.back();
14080 
14081       if (!result)
14082 	{
14083 	  // So we didn't find any pre-existing forward-declared-only
14084 	  // class for the class definition that we could return as an
14085 	  // opaque type.  So let's build one.
14086 	  //
14087 	  // TODO: we need to be able to do this for unions too!
14088 	  class_decl_sptr klass(new class_decl(ctxt.env(), type_name,
14089 					       /*alignment=*/0, /*size=*/0,
14090 					       tag == DW_TAG_structure_type,
14091 					       type_location,
14092 					       decl_base::VISIBILITY_DEFAULT));
14093 	  klass->set_is_declaration_only(true);
14094 	  add_decl_to_scope(klass, scope);
14095 	  ctxt.associate_die_to_type(type_die, klass, where_offset);
14096 	  ctxt.maybe_schedule_declaration_only_class_for_resolution(klass);
14097 	  result = klass;
14098 	}
14099     }
14100 
14101   if (tag == DW_TAG_enumeration_type)
14102     {
14103       string_enums_map::const_iterator i =
14104 	ctxt.declaration_only_enums().find(qualified_name);
14105       if (i != ctxt.declaration_only_enums().end())
14106 	result = i->second.back();
14107 
14108       if (!result)
14109 	{
14110 	  uint64_t size = 0;
14111 	  if (die_unsigned_constant_attribute(type_die, DW_AT_byte_size, size))
14112 	    size *= 8;
14113 	  type_decl_sptr underlying_type =
14114 	    build_enum_underlying_type(ctxt, type_name, size,
14115 				       /*anonymous=*/true);
14116 	  enum_type_decl::enumerators enumeratorz;
14117 	  enum_type_decl_sptr enum_type (new enum_type_decl(type_name,
14118 							    type_location,
14119 							    underlying_type,
14120 							    enumeratorz,
14121 							    linkage_name));
14122 	  add_decl_to_scope(enum_type, scope);
14123 	  result = enum_type;
14124 	}
14125     }
14126 
14127   return result;
14128 }
14129 
14130 /// Create a function symbol with a given name.
14131 ///
14132 /// @param sym_name the name of the symbol to create.
14133 ///
14134 /// @param env the environment to create the symbol in.
14135 ///
14136 /// @return the newly created symbol.
14137 elf_symbol_sptr
create_default_fn_sym(const string & sym_name,const environment * env)14138 create_default_fn_sym(const string& sym_name, const environment *env)
14139 {
14140   elf_symbol::version ver;
14141   elf_symbol_sptr result =
14142     elf_symbol::create(env,
14143 		       /*symbol index=*/ 0,
14144 		       /*symbol size=*/ 0,
14145 		       sym_name,
14146 		       /*symbol type=*/ elf_symbol::FUNC_TYPE,
14147 		       /*symbol binding=*/ elf_symbol::GLOBAL_BINDING,
14148 		       /*symbol is defined=*/ true,
14149 		       /*symbol is common=*/ false,
14150 		       /*symbol version=*/ ver,
14151 		       /*symbol visibility=*/elf_symbol::DEFAULT_VISIBILITY,
14152 		       /*symbol is linux string cst=*/false);
14153   return result;
14154 }
14155 
14156 /// Build a @ref function_decl our of a DW_TAG_subprogram DIE.
14157 ///
14158 /// @param ctxt the read context to use
14159 ///
14160 /// @param die the DW_TAG_subprogram DIE to read from.
14161 ///
14162 /// @param where_offset the offset of the DIE where we are "logically"
14163 /// positionned at, in the DIE tree.  This is useful when @p die is
14164 /// e.g, DW_TAG_partial_unit that can be included in several places in
14165 /// the DIE tree.
14166 ///
14167 /// @param called_for_public_decl this is set to true if the function
14168 /// was called for a public (function) decl.
14169 static function_decl_sptr
build_function_decl(read_context & ctxt,Dwarf_Die * die,size_t where_offset,function_decl_sptr fn)14170 build_function_decl(read_context&	ctxt,
14171 		    Dwarf_Die*		die,
14172 		    size_t		where_offset,
14173 		    function_decl_sptr	fn)
14174 {
14175   function_decl_sptr result = fn;
14176   if (!die)
14177     return result;
14178   ABG_ASSERT(dwarf_tag(die) == DW_TAG_subprogram);
14179 
14180   if (!die_is_public_decl(die))
14181     return result;
14182 
14183   translation_unit_sptr tu = ctxt.cur_transl_unit();
14184   ABG_ASSERT(tu);
14185 
14186   string fname, flinkage_name;
14187   location floc;
14188   die_loc_and_name(ctxt, die, floc, fname, flinkage_name);
14189 
14190   size_t is_inline = die_is_declared_inline(die);
14191   class_or_union_sptr is_method =
14192     is_class_or_union_type(get_scope_for_die(ctxt, die, true, where_offset));
14193 
14194   if (result)
14195     {
14196       // Add the properties that might have been missing from the
14197       // first declaration of the function.  For now, it usually is
14198       // the mangled name that goes missing in the first declarations.
14199       //
14200       // Also note that if 'fn' has just been cloned, the current
14201       // linkage name (of the current DIE) might be different from the
14202       // linkage name of 'fn'.  In that case, update the linkage name
14203       // of 'fn' too.
14204       if (!flinkage_name.empty()
14205 	  && result->get_linkage_name() != flinkage_name)
14206 	result->set_linkage_name(flinkage_name);
14207       if (floc)
14208 	if (!result->get_location())
14209 	  result->set_location(floc);
14210     }
14211   else
14212     {
14213       function_type_sptr fn_type(build_function_type(ctxt, die, is_method,
14214 						     where_offset));
14215       if (!fn_type)
14216 	return result;
14217 
14218       maybe_canonicalize_type(fn_type, ctxt);
14219 
14220       result.reset(is_method
14221 		   ? new method_decl(fname, fn_type,
14222 				     is_inline, floc,
14223 				     flinkage_name)
14224 		   : new function_decl(fname, fn_type,
14225 				       is_inline, floc,
14226 				       flinkage_name));
14227     }
14228 
14229   // Set the symbol of the function.  If the linkage name is not set
14230   // or is wrong, set it to the name of the underlying symbol.
14231   if (!result->get_symbol())
14232     {
14233       elf_symbol_sptr fn_sym;
14234       Dwarf_Addr      fn_addr;
14235       if (ctxt.get_function_address(die, fn_addr))
14236 	{
14237 	  ctxt.symtab()->update_main_symbol(fn_addr,
14238 					    result->get_linkage_name().empty()
14239 					      ? result->get_name()
14240 					      : result->get_linkage_name());
14241 	  fn_sym = ctxt.function_symbol_is_exported(fn_addr);
14242 	}
14243 
14244       if (fn_sym && !ctxt.symbol_already_belongs_to_a_function(fn_sym))
14245 	{
14246 	  result->set_symbol(fn_sym);
14247 	  string linkage_name = result->get_linkage_name();
14248 	  if (linkage_name.empty()
14249 	      || !fn_sym->get_alias_from_name(linkage_name))
14250 	    result->set_linkage_name(fn_sym->get_name());
14251 	  result->set_is_in_public_symbol_table(true);
14252 	}
14253     }
14254 
14255   ctxt.associate_die_to_type(die, result->get_type(), where_offset);
14256 
14257   size_t die_offset = dwarf_dieoffset(die);
14258 
14259   if (fn
14260       && is_member_function(fn)
14261       && get_member_function_is_virtual(fn)
14262       && !result->get_linkage_name().empty())
14263     // This function is a virtual member function which has its
14264     // linkage name *and* and has its underlying symbol correctly set.
14265     // It thus doesn't need any fixup related to elf symbol.  So
14266     // remove it from the set of virtual member functions with linkage
14267     // names and no elf symbol that need to be fixed up.
14268     ctxt.die_function_decl_with_no_symbol_map().erase(die_offset);
14269   return result;
14270 }
14271 
14272 /// Read all @ref abigail::translation_unit possible from the debug info
14273 /// accessible through a DWARF Front End Library handle, and stuff
14274 /// them into a libabigail ABI Corpus.
14275 ///
14276 /// @param ctxt the read context.
14277 ///
14278 /// @return a pointer to the resulting corpus, or NULL if the corpus
14279 /// could not be constructed.
14280 static corpus_sptr
read_debug_info_into_corpus(read_context & ctxt)14281 read_debug_info_into_corpus(read_context& ctxt)
14282 {
14283   ctxt.clear_per_corpus_data();
14284 
14285   if (!ctxt.current_corpus())
14286     {
14287       corpus_sptr corp (new corpus(ctxt.env(), ctxt.elf_path()));
14288       ctxt.current_corpus(corp);
14289       if (!ctxt.env())
14290 	ctxt.env(corp->get_environment());
14291     }
14292 
14293   // First set some mundane properties of the corpus gathered from
14294   // ELF.
14295   ctxt.current_corpus()->set_path(ctxt.elf_path());
14296   if (is_linux_kernel(ctxt.elf_handle()))
14297     ctxt.current_corpus()->set_origin(corpus::LINUX_KERNEL_BINARY_ORIGIN);
14298   else
14299     ctxt.current_corpus()->set_origin(corpus::DWARF_ORIGIN);
14300   ctxt.current_corpus()->set_soname(ctxt.dt_soname());
14301   ctxt.current_corpus()->set_needed(ctxt.dt_needed());
14302   ctxt.current_corpus()->set_architecture_name(ctxt.elf_architecture());
14303   if (corpus_group_sptr group = ctxt.current_corpus_group())
14304     group->add_corpus(ctxt.current_corpus());
14305 
14306   // Set symbols information to the corpus.
14307   ctxt.current_corpus()->set_symtab(ctxt.symtab());
14308 
14309   // Get out now if no debug info is found.
14310   if (!ctxt.dwarf())
14311     return ctxt.current_corpus();
14312 
14313   uint8_t address_size = 0;
14314   size_t header_size = 0;
14315 
14316   // Set the set of exported declaration that are defined.
14317   ctxt.exported_decls_builder
14318     (ctxt.current_corpus()->get_exported_decls_builder().get());
14319 
14320   // Walk all the DIEs of the debug info to build a DIE -> parent map
14321   // useful for get_die_parent() to work.
14322   {
14323     tools_utils::timer t;
14324     if (ctxt.do_log())
14325       {
14326 	cerr << "building die -> parent maps ...";
14327 	t.start();
14328       }
14329 
14330     ctxt.build_die_parent_maps();
14331 
14332     if (ctxt.do_log())
14333       {
14334 	t.stop();
14335 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14336 	     << ":"
14337 	     << t
14338 	     << "\n";
14339       }
14340   }
14341 
14342   ctxt.env()->canonicalization_is_done(false);
14343 
14344   {
14345     tools_utils::timer t;
14346     if (ctxt.do_log())
14347       {
14348 	cerr << "building the libabigail internal representation ...";
14349 	t.start();
14350       }
14351     // And now walk all the DIEs again to build the libabigail IR.
14352     Dwarf_Half dwarf_version = 0;
14353     for (Dwarf_Off offset = 0, next_offset = 0;
14354 	 (dwarf_next_unit(ctxt.dwarf(), offset, &next_offset, &header_size,
14355 			  &dwarf_version, NULL, &address_size, NULL,
14356 			  NULL, NULL) == 0);
14357 	 offset = next_offset)
14358       {
14359 	Dwarf_Off die_offset = offset + header_size;
14360 	Dwarf_Die unit;
14361 	if (!dwarf_offdie(ctxt.dwarf(), die_offset, &unit)
14362 	    || dwarf_tag(&unit) != DW_TAG_compile_unit)
14363 	  continue;
14364 
14365 	ctxt.dwarf_version(dwarf_version);
14366 
14367 	address_size *= 8;
14368 
14369 	// Build a translation_unit IR node from cu; note that cu must
14370 	// be a DW_TAG_compile_unit die.
14371 	translation_unit_sptr ir_node =
14372 	  build_translation_unit_and_add_to_ir(ctxt, &unit, address_size);
14373 	ABG_ASSERT(ir_node);
14374       }
14375     if (ctxt.do_log())
14376       {
14377 	t.stop();
14378 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14379 	     << ":"
14380 	     << t
14381 	     << "\n";
14382       }
14383   }
14384 
14385   {
14386     tools_utils::timer t;
14387     if (ctxt.do_log())
14388       {
14389 	cerr << "resolving declaration only classes ...";
14390 	t.start();
14391       }
14392     ctxt.resolve_declaration_only_classes();
14393     if (ctxt.do_log())
14394       {
14395 	t.stop();
14396 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14397 	     << ":"
14398 	     << t
14399 	     <<"\n";
14400       }
14401   }
14402 
14403   {
14404     tools_utils::timer t;
14405     if (ctxt.do_log())
14406       {
14407 	cerr << "resolving declaration only enums ...";
14408 	t.start();
14409       }
14410     ctxt.resolve_declaration_only_enums();
14411     if (ctxt.do_log())
14412       {
14413 	t.stop();
14414 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14415 	     << ":"
14416 	     << t
14417 	     <<"\n";
14418       }
14419   }
14420 
14421   {
14422     tools_utils::timer t;
14423     if (ctxt.do_log())
14424       {
14425 	cerr << "fixing up functions with linkage name but "
14426 	     << "no advertised underlying symbols ....";
14427 	t.start();
14428       }
14429     ctxt.fixup_functions_with_no_symbols();
14430     if (ctxt.do_log())
14431       {
14432 	t.stop();
14433 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14434 	     <<":"
14435 	     << t
14436 	     <<"\n";
14437       }
14438   }
14439 
14440   /// Now, look at the types that needs to be canonicalized after the
14441   /// translation has been constructed (which is just now) and
14442   /// canonicalize them.
14443   ///
14444   /// These types need to be constructed at the end of the translation
14445   /// unit reading phase because some types are modified by some DIEs
14446   /// even after the principal DIE describing the type has been read;
14447   /// this happens for clones of virtual destructors (for instance) or
14448   /// even for some static data members.  We need to do that for types
14449   /// are in the alternate debug info section and for types that in
14450   /// the main debug info section.
14451   {
14452     tools_utils::timer t;
14453     if (ctxt.do_log())
14454       {
14455 	cerr << "perform late type canonicalizing ...\n";
14456 	t.start();
14457       }
14458 
14459     ctxt.perform_late_type_canonicalizing();
14460     if (ctxt.do_log())
14461       {
14462 	t.stop();
14463 	cerr << "late type canonicalizing DONE@"
14464 	     << ctxt.current_corpus()->get_path()
14465 	     << ":"
14466 	     << t
14467 	     << "\n";
14468       }
14469   }
14470 
14471   ctxt.env()->canonicalization_is_done(true);
14472 
14473   {
14474     tools_utils::timer t;
14475     if (ctxt.do_log())
14476       {
14477 	cerr << "sort functions and variables ...";
14478 	t.start();
14479       }
14480     ctxt.current_corpus()->sort_functions();
14481     ctxt.current_corpus()->sort_variables();
14482     if (ctxt.do_log())
14483       {
14484 	t.stop();
14485 	cerr << " DONE@" << ctxt.current_corpus()->get_path()
14486 	     << ":"
14487 	     << t
14488 	     <<" \n";
14489       }
14490   }
14491 
14492   return ctxt.current_corpus();
14493 }
14494 
14495 /// Canonicalize a type if it's suitable for early canonicalizing, or,
14496 /// if it's not, schedule it for late canonicalization, after the
14497 /// debug info of the current translation unit has been fully read.
14498 ///
14499 /// A (composite) type is deemed suitable for early canonicalizing iff
14500 /// all of its sub-types are canonicalized themselve.  Non composite
14501 /// types are always deemed suitable for early canonicalization.
14502 ///
14503 /// Note that this function doesn't work on *ANONYMOUS* classes,
14504 /// structs, unions or enums because it first does some
14505 /// canonicalization of the DWARF DIE @p die.  That canonicalization
14506 /// is done by looking up @p die by name; and because these are
14507 /// anonymous types, they don't have names! and so that
14508 /// canonicalization fails.  So the type artifact associated to @p
14509 /// die often ends being *NOT* canonicalized.  This later leads to
14510 /// extreme slowness of operation, especially when comparisons are
14511 /// later performed on these anonymous types.
14512 ///
14513 /// So when you have classes, structs, unions, or enums that can be
14514 /// anonymous, please use this overload instead:
14515 ///
14516 ///     void
14517 ///     maybe_canonicalize_type(const Dwarf_Die*	die,
14518 ///				const type_base_sptr&	t,
14519 ///				read_context&		ctxt);
14520 ///
14521 /// It knows how to deal with anonymous types.
14522 ///
14523 /// @p looks up the type artifact
14524 /// associated to @p die.  During that lookup, ; but then those types don't have
14525 /// names because they are anonymous.
14526 ///
14527 /// @param die the type DIE to consider for canonicalization.  Note
14528 /// that this DIE must have been associated with its type using the
14529 /// function read_context::associate_die_to_type() prior to calling
14530 /// this function.
14531 ///
14532 /// @param ctxt the @ref read_context to use.
14533 static void
maybe_canonicalize_type(const Dwarf_Die * die,read_context & ctxt)14534 maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
14535 {
14536   const die_source source = ctxt.get_die_source(die);
14537 
14538   size_t die_offset = dwarf_dieoffset(const_cast<Dwarf_Die*>(die));
14539   type_base_sptr t = ctxt.lookup_type_from_die(die);
14540 
14541   if (!t)
14542     return;
14543 
14544   type_base_sptr peeled_type = peel_typedef_pointer_or_reference_type(t);
14545   if (is_class_type(peeled_type)
14546       || is_union_type(peeled_type)
14547       || is_function_type(peeled_type)
14548       || is_array_type(peeled_type)
14549       || is_qualified_type(peeled_type)
14550       || is_typedef(t))
14551     // We delay canonicalization of classes/unions or typedef,
14552     // pointers, references and array to classes/unions.  This is
14553     // because the (underlying) class might not be finished yet and we
14554     // might not be able to able detect it here (thinking about
14555     // classes that are work-in-progress, or classes that might be
14556     // later amended by some DWARF construct).  So we err on the safe
14557     // side.  We also delay canonicalization for array and qualified
14558     // types because they can be edited (in particular by
14559     // maybe_strip_qualification) after they are initially built.
14560     ctxt.schedule_type_for_late_canonicalization(die);
14561   else if ((is_function_type(t)
14562 	    && ctxt.is_wip_function_type_die_offset(die_offset, source))
14563 	   || type_has_non_canonicalized_subtype(t))
14564     ctxt.schedule_type_for_late_canonicalization(die);
14565   else
14566     canonicalize(t);
14567 }
14568 
14569 /// Canonicalize a type if it's suitable for early canonicalizing, or,
14570 /// if it's not, schedule it for late canonicalization, after the
14571 /// debug info of the current translation unit has been fully read.
14572 ///
14573 /// A (composite) type is deemed suitable for early canonicalizing iff
14574 /// all of its sub-types are canonicalized themselve.  Non composite
14575 /// types are always deemed suitable for early canonicalization.
14576 ///
14577 /// Note that this function nows how to deal with anonymous classes,
14578 /// structs and enums, unlike the overload below:
14579 ///
14580 ///     void maybe_canonicalize_type(const Dwarf_Die *die, read_context& ctxt)
14581 ///
14582 /// The problem, though is that this function is much slower that that
14583 /// overload above because of how the types that are meant for later
14584 /// canonicalization are stored.  So the idea is that this function
14585 /// should be used only for the smallest possible subset of types that
14586 /// are anonymous and thus cannot be handled by the overload above.
14587 ///
14588 /// @param t the type DIE to consider for canonicalization.
14589 ///
14590 /// @param ctxt the @ref read_context to use.
14591 static void
maybe_canonicalize_type(const type_base_sptr & t,read_context & ctxt)14592 maybe_canonicalize_type(const type_base_sptr& t,
14593 			read_context&	ctxt)
14594 {
14595   if (!t)
14596     return;
14597 
14598   type_base_sptr peeled_type = peel_typedef_pointer_or_reference_type(t);
14599   if (is_class_type(peeled_type)
14600       || is_union_type(peeled_type)
14601       || is_function_type(peeled_type)
14602       || is_array_type(peeled_type)
14603       || is_qualified_type(peeled_type))
14604     // We delay canonicalization of classes/unions or typedef,
14605     // pointers, references and array to classes/unions.  This is
14606     // because the (underlying) class might not be finished yet and we
14607     // might not be able to able detect it here (thinking about
14608     // classes that are work-in-progress, or classes that might be
14609     // later amended by some DWARF construct).  So we err on the safe
14610     // side.  We also delay canonicalization for array and qualified
14611     // types because they can be edited (in particular by
14612     // maybe_strip_qualification) after they are initially built.
14613     ctxt.schedule_type_for_late_canonicalization(t);
14614   else if (type_has_non_canonicalized_subtype(t))
14615     ctxt.schedule_type_for_late_canonicalization(t);
14616   else
14617     canonicalize(t);
14618 }
14619 
14620 /// If a given decl is a member type declaration, set its access
14621 /// specifier from the DIE that represents it.
14622 ///
14623 /// @param member_type_declaration the member type declaration to
14624 /// consider.
14625 static void
maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,Dwarf_Die * die)14626 maybe_set_member_type_access_specifier(decl_base_sptr member_type_declaration,
14627 				       Dwarf_Die* die)
14628 {
14629   if (is_type(member_type_declaration)
14630       && is_member_decl(member_type_declaration))
14631     {
14632       class_or_union* scope =
14633 	is_class_or_union_type(member_type_declaration->get_scope());
14634       ABG_ASSERT(scope);
14635 
14636       access_specifier access = public_access;
14637       if (class_decl* cl = is_class_type(scope))
14638 	if (!cl->is_struct())
14639 	  access = private_access;
14640 
14641       die_access_specifier(die, access);
14642       set_member_access_specifier(member_type_declaration, access);
14643     }
14644 }
14645 
14646 /// This function tests if a given function which might be intented to
14647 /// be added to a class scope (to become a member function) should be
14648 /// dropped on the floor instead and not be added to the class.
14649 ///
14650 /// This is a subroutine of build_ir_node_from_die.
14651 ///
14652 /// @param fn the function to consider.
14653 ///
14654 /// @param scope the scope the function is intended to be added
14655 /// to. This might be of class type or not.
14656 ///
14657 /// @param fn_die the DWARF die of @p fn.
14658 ///
14659 /// @return true iff @p fn should be dropped on the floor.
14660 static bool
potential_member_fn_should_be_dropped(const function_decl_sptr & fn,Dwarf_Die * fn_die)14661 potential_member_fn_should_be_dropped(const function_decl_sptr& fn,
14662 				      Dwarf_Die *fn_die)
14663 {
14664   if (!fn || fn->get_scope())
14665     return false;
14666 
14667   if (// A function that is not virtual ...
14668       !die_is_virtual(fn_die)
14669       // ... has a linkage name ...
14670       && !fn->get_linkage_name().empty()
14671       // .. and yet has no ELF symbol associated ...
14672       && !fn->get_symbol())
14673     // Should not be added to its class scope.
14674     //
14675     // Why would it? It's not part of the ABI anyway, as it doesn't
14676     // have any ELF symbol associated and is not a virtual member
14677     // function.  It just constitutes bloat in the IR and might even
14678     // induce spurious change reports down the road.
14679     return true;
14680 
14681   return false;
14682 }
14683 
14684 /// Build an IR node from a given DIE and add the node to the current
14685 /// IR being build and held in the read_context.  Doing that is called
14686 /// "emitting an IR node for the DIE".
14687 ///
14688 /// @param ctxt the read context.
14689 ///
14690 /// @param die the DIE to consider.
14691 ///
14692 /// @param scope the scope under which the resulting IR node has to be
14693 /// added.
14694 ///
14695 /// @param called_from_public_decl set to yes if this function is
14696 /// called from the functions used to build a public decl (functions
14697 /// and variables).  In that case, this function accepts building IR
14698 /// nodes representing types.  Otherwise, this function only creates
14699 /// IR nodes representing public decls (functions and variables).
14700 /// This is done to avoid emitting IR nodes for types that are not
14701 /// referenced by public functions or variables.
14702 ///
14703 /// @param where_offset the offset of the DIE where we are "logically"
14704 /// positionned at, in the DIE tree.  This is useful when @p die is
14705 /// e.g, DW_TAG_partial_unit that can be included in several places in
14706 /// the DIE tree.
14707 ///
14708 /// @param is_required_decl_spec if true, it means the ir node to
14709 /// build is for a decl that is a specification for another decl that
14710 /// is concrete.  If you don't know what this is, set it to false.
14711 ///
14712 /// @param is_declaration_only is true if the DIE denoted by @p die is
14713 /// a declaration-only DIE.
14714 ///
14715 /// @return the resulting IR node.
14716 static type_or_decl_base_sptr
build_ir_node_from_die(read_context & ctxt,Dwarf_Die * die,scope_decl * scope,bool called_from_public_decl,size_t where_offset,bool is_declaration_only,bool is_required_decl_spec)14717 build_ir_node_from_die(read_context&	ctxt,
14718 		       Dwarf_Die*	die,
14719 		       scope_decl*	scope,
14720 		       bool		called_from_public_decl,
14721 		       size_t		where_offset,
14722 		       bool		is_declaration_only,
14723 		       bool		is_required_decl_spec)
14724 {
14725   type_or_decl_base_sptr result;
14726 
14727   if (!die || !scope)
14728     return result;
14729 
14730   int tag = dwarf_tag(die);
14731 
14732   if (!called_from_public_decl)
14733     {
14734       if (ctxt.load_all_types() && die_is_type(die))
14735 	/* We were instructed to load debug info for all types,
14736 	   included those that are not reachable from a public
14737 	   declaration.  So load the debug info for this type.  */;
14738       else if (tag != DW_TAG_subprogram
14739 	       && tag != DW_TAG_variable
14740 	       && tag != DW_TAG_member
14741 	       && tag != DW_TAG_namespace)
14742 	return result;
14743     }
14744 
14745   const die_source source_of_die = ctxt.get_die_source(die);
14746 
14747   if ((result = ctxt.lookup_decl_from_die_offset(dwarf_dieoffset(die),
14748 						 source_of_die)))
14749     {
14750       if (ctxt.load_all_types())
14751 	if (called_from_public_decl)
14752 	  if (type_base_sptr t = is_type(result))
14753 	    if (corpus *abi_corpus = scope->get_corpus())
14754 	      abi_corpus->record_type_as_reachable_from_public_interfaces(*t);
14755 
14756       return result;
14757     }
14758 
14759   // This is *the* bit of code that ensures we have the right notion
14760   // of "declared" at any point in a DIE chain formed from
14761   // DW_AT_abstract_origin and DW_AT_specification links. There should
14762   // be no other callers of die_is_declaration_only.
14763   is_declaration_only = is_declaration_only && die_is_declaration_only(die);
14764 
14765   switch (tag)
14766     {
14767       // Type DIEs we support.
14768     case DW_TAG_base_type:
14769       if (type_decl_sptr t = build_type_decl(ctxt, die, where_offset))
14770 	{
14771 	  result =
14772 	    add_decl_to_scope(t, ctxt.cur_transl_unit()->get_global_scope());
14773 	  canonicalize(t);
14774 	}
14775       break;
14776 
14777     case DW_TAG_typedef:
14778       {
14779 	typedef_decl_sptr t = build_typedef_type(ctxt, die,
14780 						 called_from_public_decl,
14781 						 where_offset);
14782 	result = add_decl_to_scope(t, scope);
14783 	if (result)
14784 	  {
14785 	    maybe_set_member_type_access_specifier(is_decl(result), die);
14786 	    maybe_canonicalize_type(die, ctxt);
14787 	  }
14788       }
14789       break;
14790 
14791     case DW_TAG_pointer_type:
14792       {
14793 	pointer_type_def_sptr p =
14794 	  build_pointer_type_def(ctxt, die,
14795 				 called_from_public_decl,
14796 				 where_offset);
14797 	if (p)
14798 	  {
14799 	    result =
14800 	      add_decl_to_scope(p, ctxt.cur_transl_unit()->get_global_scope());
14801 	    ABG_ASSERT(result->get_translation_unit());
14802 	    maybe_canonicalize_type(die, ctxt);
14803 	  }
14804       }
14805       break;
14806 
14807     case DW_TAG_reference_type:
14808     case DW_TAG_rvalue_reference_type:
14809       {
14810 	reference_type_def_sptr r =
14811 	  build_reference_type(ctxt, die,
14812 			       called_from_public_decl,
14813 			       where_offset);
14814 	if (r)
14815 	  {
14816 	    result =
14817 	      add_decl_to_scope(r, ctxt.cur_transl_unit()->get_global_scope());
14818 
14819 	    ctxt.associate_die_to_type(die, r, where_offset);
14820 	    maybe_canonicalize_type(die, ctxt);
14821 	  }
14822       }
14823       break;
14824 
14825     case DW_TAG_const_type:
14826     case DW_TAG_volatile_type:
14827     case DW_TAG_restrict_type:
14828       {
14829 	type_base_sptr q =
14830 	  build_qualified_type(ctxt, die,
14831 			       called_from_public_decl,
14832 			       where_offset);
14833 	if (q)
14834 	  {
14835 	    // Strip some potentially redundant type qualifiers from
14836 	    // the qualified type we just built.
14837 	    decl_base_sptr d = maybe_strip_qualification(is_qualified_type(q),
14838 							 ctxt);
14839 	    if (!d)
14840 	      d = get_type_declaration(q);
14841 	    ABG_ASSERT(d);
14842 	    type_base_sptr ty = is_type(d);
14843 	    // Associate the die to type ty again because 'ty'might be
14844 	    // different from 'q', because 'ty' is 'q' possibly
14845 	    // stripped from some redundant type qualifier.
14846 	    ctxt.associate_die_to_type(die, ty, where_offset);
14847 	    result =
14848 	      add_decl_to_scope(d, ctxt.cur_transl_unit()->get_global_scope());
14849 	    maybe_canonicalize_type(die, ctxt);
14850 	  }
14851       }
14852       break;
14853 
14854     case DW_TAG_enumeration_type:
14855       {
14856 	bool type_is_private = false;
14857 	bool type_suppressed =
14858 	  type_is_suppressed(ctxt, scope, die, type_is_private);
14859 	if (type_suppressed && type_is_private)
14860 	  // The type is suppressed because it's private.  If other
14861 	  // non-suppressed and declaration-only instances of this
14862 	  // type exist in the current corpus, then it means those
14863 	  // non-suppressed instances are opaque versions of the
14864 	  // suppressed private type.  Lets return one of these opaque
14865 	  // types then.
14866 	  result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
14867 	else if (!type_suppressed)
14868 	  {
14869 	    enum_type_decl_sptr e = build_enum_type(ctxt, die, scope,
14870 						    where_offset,
14871 						    is_declaration_only);
14872 	    result = add_decl_to_scope(e, scope);
14873 	    if (result)
14874 	      {
14875 		maybe_set_member_type_access_specifier(is_decl(result), die);
14876 		maybe_canonicalize_type(die, ctxt);
14877 	      }
14878 	  }
14879       }
14880       break;
14881 
14882     case DW_TAG_class_type:
14883     case DW_TAG_structure_type:
14884       {
14885 	bool type_is_private = false;
14886 	bool type_suppressed=
14887 	  type_is_suppressed(ctxt, scope, die, type_is_private);
14888 
14889 	if (type_suppressed && type_is_private)
14890 	  // The type is suppressed because it's private.  If other
14891 	  // non-suppressed and declaration-only instances of this
14892 	  // type exist in the current corpus, then it means those
14893 	  // non-suppressed instances are opaque versions of the
14894 	  // suppressed private type.  Lets return one of these opaque
14895 	  // types then.
14896 	  result = get_opaque_version_of_type(ctxt, scope, die, where_offset);
14897 	else if (!type_suppressed)
14898 	  {
14899 	    Dwarf_Die spec_die;
14900 	    scope_decl_sptr scop;
14901 	    class_decl_sptr klass;
14902 	    if (die_die_attribute(die, DW_AT_specification, spec_die))
14903 	      {
14904 		scope_decl_sptr skope =
14905 		  get_scope_for_die(ctxt, &spec_die,
14906 				    called_from_public_decl,
14907 				    where_offset);
14908 		ABG_ASSERT(skope);
14909 		decl_base_sptr cl =
14910 		  is_decl(build_ir_node_from_die(ctxt, &spec_die,
14911 						 skope.get(),
14912 						 called_from_public_decl,
14913 						 where_offset,
14914 						 is_declaration_only,
14915 						 /*is_required_decl_spec=*/false));
14916 		ABG_ASSERT(cl);
14917 		klass = dynamic_pointer_cast<class_decl>(cl);
14918 		ABG_ASSERT(klass);
14919 
14920 		klass =
14921 		  add_or_update_class_type(ctxt, die,
14922 					   skope.get(),
14923 					   tag == DW_TAG_structure_type,
14924 					   klass,
14925 					   called_from_public_decl,
14926 					   where_offset,
14927 					   is_declaration_only);
14928 	      }
14929 	    else
14930 	      klass =
14931 		add_or_update_class_type(ctxt, die, scope,
14932 					 tag == DW_TAG_structure_type,
14933 					 class_decl_sptr(),
14934 					 called_from_public_decl,
14935 					 where_offset,
14936 					 is_declaration_only);
14937 	    result = klass;
14938 	    if (klass)
14939 	      {
14940 		maybe_set_member_type_access_specifier(klass, die);
14941 		maybe_canonicalize_type(klass, ctxt);
14942 	      }
14943 	  }
14944       }
14945       break;
14946     case DW_TAG_union_type:
14947       if (!type_is_suppressed(ctxt, scope, die))
14948 	{
14949 	  union_decl_sptr union_type =
14950 	    add_or_update_union_type(ctxt, die, scope,
14951 				     union_decl_sptr(),
14952 				     called_from_public_decl,
14953 				     where_offset,
14954 				     is_declaration_only);
14955 	  if (union_type)
14956 	    {
14957 	      maybe_set_member_type_access_specifier(union_type, die);
14958 	      maybe_canonicalize_type(union_type, ctxt);
14959 	    }
14960 	  result = union_type;
14961 	}
14962       break;
14963     case DW_TAG_string_type:
14964       break;
14965     case DW_TAG_subroutine_type:
14966       {
14967 	function_type_sptr f = build_function_type(ctxt, die,
14968 						   class_decl_sptr(),
14969 						   where_offset);
14970 	if (f)
14971 	  {
14972 	    result = f;
14973 	    maybe_canonicalize_type(die, ctxt);
14974 	  }
14975       }
14976       break;
14977     case DW_TAG_array_type:
14978       {
14979 	array_type_def_sptr a = build_array_type(ctxt,
14980 						 die,
14981 						 called_from_public_decl,
14982 						 where_offset);
14983 	if (a)
14984 	  {
14985 	    result =
14986 	      add_decl_to_scope(a, ctxt.cur_transl_unit()->get_global_scope());
14987 	    ctxt.associate_die_to_type(die, a, where_offset);
14988 	    maybe_canonicalize_type(die, ctxt);
14989 	  }
14990 	break;
14991       }
14992     case DW_TAG_subrange_type:
14993       {
14994 	// If we got here, this means the subrange type is a "free
14995 	// form" defined in the global namespace of the current
14996 	// translation unit, like what is found in Ada.
14997 	array_type_def::subrange_sptr s =
14998 	  build_subrange_type(ctxt, die, where_offset);
14999 	if (s)
15000 	  {
15001 	    result =
15002 	      add_decl_to_scope(s, ctxt.cur_transl_unit()->get_global_scope());
15003 	    ctxt.associate_die_to_type(die, s, where_offset);
15004 	    maybe_canonicalize_type(die, ctxt);
15005 	  }
15006       }
15007       break;
15008     case DW_TAG_packed_type:
15009       break;
15010     case DW_TAG_set_type:
15011       break;
15012     case DW_TAG_file_type:
15013       break;
15014     case DW_TAG_ptr_to_member_type:
15015       break;
15016     case DW_TAG_thrown_type:
15017       break;
15018     case DW_TAG_interface_type:
15019       break;
15020     case DW_TAG_unspecified_type:
15021       break;
15022     case DW_TAG_shared_type:
15023       break;
15024 
15025     case DW_TAG_compile_unit:
15026       // We shouldn't reach this point b/c this should be handled by
15027       // build_translation_unit.
15028       ABG_ASSERT_NOT_REACHED;
15029 
15030     case DW_TAG_namespace:
15031     case DW_TAG_module:
15032       result = build_namespace_decl_and_add_to_ir(ctxt, die, where_offset);
15033       break;
15034 
15035     case DW_TAG_variable:
15036     case DW_TAG_member:
15037       {
15038 	Dwarf_Die spec_die;
15039 	bool var_is_cloned = false;
15040 
15041 	if (tag == DW_TAG_member)
15042 	  ABG_ASSERT(!is_c_language(ctxt.cur_transl_unit()->get_language()));
15043 
15044 	if (die_die_attribute(die, DW_AT_specification, spec_die, false)
15045 	    || (var_is_cloned = die_die_attribute(die, DW_AT_abstract_origin,
15046 						  spec_die, false)))
15047 	  {
15048 	    scope_decl_sptr spec_scope = get_scope_for_die(ctxt, &spec_die,
15049 							   called_from_public_decl,
15050 							   where_offset);
15051 	    if (spec_scope)
15052 	      {
15053 		decl_base_sptr d =
15054 		  is_decl(build_ir_node_from_die(ctxt, &spec_die,
15055 						 spec_scope.get(),
15056 						 called_from_public_decl,
15057 						 where_offset,
15058 						 is_declaration_only,
15059 						 /*is_required_decl_spec=*/true));
15060 		if (d)
15061 		  {
15062 		    var_decl_sptr m =
15063 		      dynamic_pointer_cast<var_decl>(d);
15064 		    if (var_is_cloned)
15065 		      m = m->clone();
15066 		    m = build_var_decl(ctxt, die, where_offset, m);
15067 		    if (is_data_member(m))
15068 		      {
15069 			set_member_is_static(m, true);
15070 			ctxt.associate_die_to_decl(die, m, where_offset,
15071 						   /*associate_by_repr=*/false);
15072 		      }
15073 		    else
15074 		      {
15075 			ABG_ASSERT(has_scope(m));
15076 			ctxt.var_decls_to_re_add_to_tree().push_back(m);
15077 		      }
15078 		    ABG_ASSERT(m->get_scope());
15079 		    ctxt.maybe_add_var_to_exported_decls(m.get());
15080 		    return m;
15081 		  }
15082 	      }
15083 	  }
15084 	else if (var_decl_sptr v =
15085 		 build_or_get_var_decl_if_not_suppressed(ctxt, scope, die,
15086 							 where_offset,
15087 							 /*result=*/var_decl_sptr(),
15088 							 is_required_decl_spec))
15089 	  {
15090 	    result = add_decl_to_scope(v, scope);
15091 	    ABG_ASSERT(is_decl(result)->get_scope());
15092 	    v = dynamic_pointer_cast<var_decl>(result);
15093 	    ABG_ASSERT(v);
15094 	    ABG_ASSERT(v->get_scope());
15095 	    ctxt.var_decls_to_re_add_to_tree().push_back(v);
15096 	    ctxt.maybe_add_var_to_exported_decls(v.get());
15097 	  }
15098       }
15099       break;
15100 
15101     case DW_TAG_subprogram:
15102       {
15103 	Dwarf_Die spec_die;
15104 	Dwarf_Die abstract_origin_die;
15105 	Dwarf_Die *interface_die = 0, *origin_die = 0;
15106 	scope_decl_sptr interface_scope;
15107 	if (die_is_artificial(die))
15108 	  break;
15109 
15110 	function_decl_sptr fn;
15111 	bool has_spec = die_die_attribute(die, DW_AT_specification,
15112 					  spec_die, true);
15113 	bool has_abstract_origin =
15114 	  die_die_attribute(die, DW_AT_abstract_origin,
15115 			    abstract_origin_die, true);
15116 	if (has_spec || has_abstract_origin)
15117 	  {
15118 	    interface_die =
15119 	      has_spec
15120 	      ? &spec_die
15121 	      : &abstract_origin_die;
15122 	    origin_die =
15123 	      has_abstract_origin
15124 	      ? &abstract_origin_die
15125 	      : &spec_die;
15126 
15127 	    string linkage_name = die_linkage_name(die);
15128 	    string spec_linkage_name = die_linkage_name(interface_die);
15129 
15130 	    interface_scope = get_scope_for_die(ctxt, interface_die,
15131 						called_from_public_decl,
15132 						where_offset);
15133 	    if (interface_scope)
15134 	      {
15135 		decl_base_sptr d;
15136 		class_decl_sptr c = is_class_type(interface_scope);
15137 		if (c && !linkage_name.empty())
15138 		  d = c->find_member_function_sptr(linkage_name);
15139 
15140 		if (!d)
15141 		  d = is_decl(build_ir_node_from_die(ctxt,
15142 						     origin_die,
15143 						     interface_scope.get(),
15144 						     called_from_public_decl,
15145 						     where_offset,
15146 						     is_declaration_only,
15147 						     /*is_required_decl_spec=*/false));
15148 		if (d)
15149 		  {
15150 		    fn = dynamic_pointer_cast<function_decl>(d);
15151 		    if (has_abstract_origin
15152 			&& (linkage_name != spec_linkage_name))
15153 		      // The current DIE has 'd' as abstract orign,
15154 		      // and has a linkage name that is different
15155 		      // from from the linkage name of 'd'.  That
15156 		      // means, the current DIE represents a clone
15157 		      // of 'd'.
15158 		      fn = fn->clone();
15159 		  }
15160 	      }
15161 	  }
15162 	ctxt.scope_stack().push(scope);
15163 
15164 	scope_decl* logical_scope =
15165 	  interface_scope
15166 	  ? interface_scope.get()
15167 	  : scope;
15168 
15169 	result = build_or_get_fn_decl_if_not_suppressed(ctxt, logical_scope,
15170 							die, where_offset,
15171 							is_declaration_only,
15172 							fn);
15173 
15174 	if (result && !fn)
15175 	  {
15176 	    if (potential_member_fn_should_be_dropped(is_function_decl(result),
15177 						      die))
15178 	      {
15179 		result.reset();
15180 		break;
15181 	      }
15182 	    result = add_decl_to_scope(is_decl(result), logical_scope);
15183 	  }
15184 
15185 	fn = is_function_decl(result);
15186 	if (fn && is_member_function(fn))
15187 	  {
15188 	    class_decl_sptr klass(static_cast<class_decl*>(logical_scope),
15189 				  sptr_utils::noop_deleter());
15190 	    ABG_ASSERT(klass);
15191 	    finish_member_function_reading(die, fn, klass, ctxt);
15192 	  }
15193 
15194 	if (fn)
15195 	  {
15196 	    ctxt.maybe_add_fn_to_exported_decls(fn.get());
15197 	    ctxt.associate_die_to_decl(die, fn, where_offset,
15198 				       /*associate_by_repr=*/false);
15199 	    maybe_canonicalize_type(die, ctxt);
15200 	  }
15201 
15202 	ctxt.scope_stack().pop();
15203       }
15204       break;
15205 
15206     case DW_TAG_formal_parameter:
15207       // We should not read this case as it should have been dealt
15208       // with by build_function_decl above.
15209       ABG_ASSERT_NOT_REACHED;
15210 
15211     case DW_TAG_constant:
15212       break;
15213     case DW_TAG_enumerator:
15214       break;
15215 
15216     case DW_TAG_partial_unit:
15217     case DW_TAG_imported_unit:
15218       // For now, the DIEs under these are read lazily when they are
15219       // referenced by a public decl DIE that is under a
15220       // DW_TAG_compile_unit, so we shouldn't get here.
15221       ABG_ASSERT_NOT_REACHED;
15222 
15223       // Other declaration we don't really intend to support yet.
15224     case DW_TAG_dwarf_procedure:
15225     case DW_TAG_imported_declaration:
15226     case DW_TAG_entry_point:
15227     case DW_TAG_label:
15228     case DW_TAG_lexical_block:
15229     case DW_TAG_unspecified_parameters:
15230     case DW_TAG_variant:
15231     case DW_TAG_common_block:
15232     case DW_TAG_common_inclusion:
15233     case DW_TAG_inheritance:
15234     case DW_TAG_inlined_subroutine:
15235     case DW_TAG_with_stmt:
15236     case DW_TAG_access_declaration:
15237     case DW_TAG_catch_block:
15238     case DW_TAG_friend:
15239     case DW_TAG_namelist:
15240     case DW_TAG_namelist_item:
15241     case DW_TAG_template_type_parameter:
15242     case DW_TAG_template_value_parameter:
15243     case DW_TAG_try_block:
15244     case DW_TAG_variant_part:
15245     case DW_TAG_imported_module:
15246     case DW_TAG_condition:
15247     case DW_TAG_type_unit:
15248     case DW_TAG_template_alias:
15249     case DW_TAG_lo_user:
15250     case DW_TAG_MIPS_loop:
15251     case DW_TAG_format_label:
15252     case DW_TAG_function_template:
15253     case DW_TAG_class_template:
15254     case DW_TAG_GNU_BINCL:
15255     case DW_TAG_GNU_EINCL:
15256     case DW_TAG_GNU_template_template_param:
15257     case DW_TAG_GNU_template_parameter_pack:
15258     case DW_TAG_GNU_formal_parameter_pack:
15259     case DW_TAG_GNU_call_site:
15260     case DW_TAG_GNU_call_site_parameter:
15261     case DW_TAG_hi_user:
15262     default:
15263       break;
15264     }
15265 
15266   if (result && tag != DW_TAG_subroutine_type)
15267     ctxt.associate_die_to_decl(die, is_decl(result), where_offset,
15268 			       /*associate_by_repr=*/false);
15269 
15270   if (result)
15271     if (ctxt.load_all_types())
15272       if (called_from_public_decl)
15273 	if (type_base_sptr t = is_type(result))
15274 	  if (corpus *abi_corpus = scope->get_corpus())
15275 	    abi_corpus->record_type_as_reachable_from_public_interfaces(*t);
15276 
15277   return result;
15278 }
15279 
15280 ///  Build the IR node for a void type.
15281 ///
15282 ///  @param ctxt the read context to use.
15283 ///
15284 ///  @return the void type node.
15285 static decl_base_sptr
build_ir_node_for_void_type(read_context & ctxt)15286 build_ir_node_for_void_type(read_context& ctxt)
15287 {
15288   ir::environment* env = ctxt.env();
15289   ABG_ASSERT(env);
15290   type_base_sptr t = env->get_void_type();
15291   decl_base_sptr type_declaration = get_type_declaration(t);
15292   if (!has_scope(type_declaration))
15293     add_decl_to_scope(type_declaration,
15294 		      ctxt.cur_transl_unit()->get_global_scope());
15295   canonicalize(t);
15296   return type_declaration;
15297 }
15298 
15299 /// Build the IR node for a variadic parameter type.
15300 ///
15301 /// @param ctxt the read context to use.
15302 ///
15303 /// @return the variadic parameter type.
15304 static decl_base_sptr
build_ir_node_for_variadic_parameter_type(read_context & ctxt)15305 build_ir_node_for_variadic_parameter_type(read_context &ctxt)
15306 {
15307 
15308   ir::environment* env = ctxt.env();
15309   ABG_ASSERT(env);
15310   type_base_sptr t = env->get_variadic_parameter_type();
15311   decl_base_sptr type_declaration = get_type_declaration(t);
15312   if (!has_scope(type_declaration))
15313     add_decl_to_scope(type_declaration,
15314 		      ctxt.cur_transl_unit()->get_global_scope());
15315   canonicalize(t);
15316   return type_declaration;
15317 }
15318 
15319 /// Build an IR node from a given DIE and add the node to the current
15320 /// IR being build and held in the read_context.  Doing that is called
15321 /// "emitting an IR node for the DIE".
15322 ///
15323 /// @param ctxt the read context.
15324 ///
15325 /// @param die the DIE to consider.
15326 ///
15327 /// @param called_from_public_decl set to yes if this function is
15328 /// called from the functions used to build a public decl (functions
15329 /// and variables).  In that case, this function accepts building IR
15330 /// nodes representing types.  Otherwise, this function only creates
15331 /// IR nodes representing public decls (functions and variables).
15332 /// This is done to avoid emitting IR nodes for types that are not
15333 /// referenced by public functions or variables.
15334 ///
15335 /// @param where_offset the offset of the DIE where we are "logically"
15336 /// positionned at, in the DIE tree.  This is useful when @p die is
15337 /// e.g, DW_TAG_partial_unit that can be included in several places in
15338 /// the DIE tree.
15339 ///
15340 /// @return the resulting IR node.
15341 static type_or_decl_base_sptr
build_ir_node_from_die(read_context & ctxt,Dwarf_Die * die,bool called_from_public_decl,size_t where_offset)15342 build_ir_node_from_die(read_context&	ctxt,
15343 		       Dwarf_Die*	die,
15344 		       bool		called_from_public_decl,
15345 		       size_t		where_offset)
15346 {
15347   if (!die)
15348     return decl_base_sptr();
15349 
15350   if (is_c_language(ctxt.cur_transl_unit()->get_language()))
15351     {
15352       const scope_decl_sptr& scop = ctxt.global_scope();
15353       return build_ir_node_from_die(ctxt, die, scop.get(),
15354 				    called_from_public_decl,
15355 				    where_offset,
15356                                     true);
15357     }
15358 
15359   scope_decl_sptr scope = get_scope_for_die(ctxt, die,
15360 					    called_from_public_decl,
15361 					    where_offset);
15362   return build_ir_node_from_die(ctxt, die, scope.get(),
15363 				called_from_public_decl,
15364 				where_offset,
15365                                 true);
15366 }
15367 
15368 status
operator |(status l,status r)15369 operator|(status l, status r)
15370 {
15371   return static_cast<status>(static_cast<unsigned>(l)
15372 			     | static_cast<unsigned>(r));
15373 }
15374 
15375 status
operator &(status l,status r)15376 operator&(status l, status r)
15377 {
15378   return static_cast<status>(static_cast<unsigned>(l)
15379 			     & static_cast<unsigned>(r));
15380 }
15381 
15382 status&
operator |=(status & l,status r)15383 operator|=(status& l, status r)
15384 {
15385   l = l | r;
15386   return l;
15387 }
15388 
15389 status&
operator &=(status & l,status r)15390 operator&=(status& l, status r)
15391 {
15392   l = l & r;
15393   return l;
15394 }
15395 
15396 /// Emit a diagnostic status with english sentences to describe the
15397 /// problems encoded in a given abigail::dwarf_reader::status, if
15398 /// there is an error.
15399 ///
15400 /// @param status the status to diagnose
15401 ///
15402 /// @return a string containing sentences that describe the possible
15403 /// errors encoded in @p s.  If there is no error to encode, then the
15404 /// empty string is returned.
15405 string
status_to_diagnostic_string(status s)15406 status_to_diagnostic_string(status s)
15407 {
15408   string str;
15409 
15410   if (s & STATUS_DEBUG_INFO_NOT_FOUND)
15411     str += "could not find debug info\n";
15412 
15413   if (s & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
15414     str += "could not find alternate debug info\n";
15415 
15416   if (s & STATUS_NO_SYMBOLS_FOUND)
15417     str += "could not load ELF symbols\n";
15418 
15419   return str;
15420 }
15421 
15422 /// Create a dwarf_reader::read_context.
15423 ///
15424 /// @param elf_path the path to the elf file the context is to be used for.
15425 ///
15426 /// @param debug_info_root_paths a pointer to the path to the root
15427 /// directory under which the debug info is to be found for @p
15428 /// elf_path.  Leave this to NULL if the debug info is not in a split
15429 /// file.
15430 ///
15431 /// @param environment the environment used by the current context.
15432 /// This environment contains resources needed by the reader and by
15433 /// the types and declarations that are to be created later.  Note
15434 /// that ABI artifacts that are to be compared all need to be created
15435 /// within the same environment.
15436 ///
15437 /// Please also note that the life time of this environment object
15438 /// must be greater than the life time of the resulting @ref
15439 /// read_context the context uses resources that are allocated in the
15440 /// environment.
15441 ///
15442 /// @param load_all_types if set to false only the types that are
15443 /// reachable from publicly exported declarations (of functions and
15444 /// variables) are read.  If set to true then all types found in the
15445 /// debug information are loaded.
15446 ///
15447 /// @param linux_kernel_mode if set to true, then consider the special
15448 /// linux kernel symbol tables when determining if a symbol is
15449 /// exported or not.
15450 ///
15451 /// @return a smart pointer to the resulting dwarf_reader::read_context.
15452 read_context_sptr
create_read_context(const std::string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,bool linux_kernel_mode)15453 create_read_context(const std::string&		elf_path,
15454 		    const vector<char**>&	debug_info_root_paths,
15455 		    ir::environment*		environment,
15456 		    bool			load_all_types,
15457 		    bool			linux_kernel_mode)
15458 {
15459   // Create a DWARF Front End Library handle to be used by functions
15460   // of that library.
15461   read_context_sptr result(new read_context(elf_path, debug_info_root_paths,
15462 					    environment, load_all_types,
15463 					    linux_kernel_mode));
15464   return result;
15465 }
15466 
15467 /// Getter for the path to the binary this @ref read_context is for.
15468 ///
15469 /// @return the path to the binary the @ref read_context is for.
15470 const string&
read_context_get_path(const read_context & ctxt)15471 read_context_get_path(const read_context& ctxt)
15472 {return ctxt.elf_path();}
15473 
15474 /// Re-initialize a read_context so that it can re-used to read
15475 /// another binary.
15476 ///
15477 /// @param ctxt the context to re-initialize.
15478 ///
15479 /// @param elf_path the path to the elf file the context is to be used
15480 /// for.
15481 ///
15482 /// @param debug_info_root_path a pointer to the path to the root
15483 /// directory under which the debug info is to be found for @p
15484 /// elf_path.  Leave this to NULL if the debug info is not in a split
15485 /// file.
15486 ///
15487 /// @param environment the environment used by the current context.
15488 /// This environment contains resources needed by the reader and by
15489 /// the types and declarations that are to be created later.  Note
15490 /// that ABI artifacts that are to be compared all need to be created
15491 /// within the same environment.
15492 ///
15493 /// Please also note that the life time of this environment object
15494 /// must be greater than the life time of the resulting @ref
15495 /// read_context the context uses resources that are allocated in the
15496 /// environment.
15497 ///
15498 /// @param load_all_types if set to false only the types that are
15499 /// reachable from publicly exported declarations (of functions and
15500 /// variables) are read.  If set to true then all types found in the
15501 /// debug information are loaded.
15502 ///
15503 /// @param linux_kernel_mode if set to true, then consider the special
15504 /// linux kernel symbol tables when determining if a symbol is
15505 /// exported or not.
15506 ///
15507 /// @return a smart pointer to the resulting dwarf_reader::read_context.
15508 void
reset_read_context(read_context_sptr & ctxt,const std::string & elf_path,const vector<char ** > & debug_info_root_path,ir::environment * environment,bool read_all_types,bool linux_kernel_mode)15509 reset_read_context(read_context_sptr	&ctxt,
15510 		   const std::string&	 elf_path,
15511 		   const vector<char**>& debug_info_root_path,
15512 		   ir::environment*	 environment,
15513 		   bool		 read_all_types,
15514 		   bool		 linux_kernel_mode)
15515 {
15516   if (ctxt)
15517     ctxt->initialize(elf_path, debug_info_root_path, environment,
15518 		     read_all_types, linux_kernel_mode);
15519 }
15520 
15521 /// Add suppressions specifications to the set of suppressions to be
15522 /// used during the construction of the ABI internal representation
15523 /// (the ABI corpus) from ELF and DWARF.
15524 ///
15525 /// During the construction of the ABI corpus, ABI artifacts that
15526 /// match the a given suppression specification are dropped on the
15527 /// floor; that is, they are discarded and won't be part of the final
15528 /// ABI corpus.  This is a way to reduce the amount of data held by
15529 /// the final ABI corpus.
15530 ///
15531 /// Note that the suppression specifications provided to this function
15532 /// are only considered during the construction of the ABI corpus.
15533 /// For instance, they are not taken into account during e.g
15534 /// comparisons of two ABI corpora that might happen later.  If you
15535 /// want to apply suppression specificatins to the comparison (or
15536 /// reporting) of ABI corpora please refer to the documentation of the
15537 /// @ref diff_context type to learn how to set suppressions that are
15538 /// to be used in that context.
15539 ///
15540 /// @param ctxt the context that is going to be used by functions that
15541 /// read ELF and DWARF information to construct and ABI corpus.
15542 ///
15543 /// @param supprs the suppression specifications to be applied during
15544 /// the construction of the ABI corpus.
15545 void
add_read_context_suppressions(read_context & ctxt,const suppr::suppressions_type & supprs)15546 add_read_context_suppressions(read_context& ctxt,
15547 			      const suppr::suppressions_type& supprs)
15548 {
15549   for (suppr::suppressions_type::const_iterator i = supprs.begin();
15550        i != supprs.end();
15551        ++i)
15552     if ((*i)->get_drops_artifact_from_ir())
15553       ctxt.get_suppressions().push_back(*i);
15554 }
15555 
15556 /// Set the @ref corpus_group being created to the current read context.
15557 ///
15558 /// @param ctxt the read_context to consider.
15559 ///
15560 /// @param group the @ref corpus_group to set.
15561 void
set_read_context_corpus_group(read_context & ctxt,corpus_group_sptr & group)15562 set_read_context_corpus_group(read_context& ctxt,
15563 			      corpus_group_sptr& group)
15564 {
15565   ctxt.cur_corpus_group_ = group;
15566 }
15567 
15568 /// Read all @ref abigail::translation_unit possible from the debug info
15569 /// accessible from an elf file, stuff them into a libabigail ABI
15570 /// Corpus and return it.
15571 ///
15572 /// @param ctxt the context to use for reading the elf file.
15573 ///
15574 /// @param resulting_corp a pointer to the resulting abigail::corpus.
15575 ///
15576 /// @return the resulting status.
15577 corpus_sptr
read_corpus_from_elf(read_context & ctxt,status & status)15578 read_corpus_from_elf(read_context& ctxt, status& status)
15579 {
15580   status = STATUS_UNKNOWN;
15581 
15582   // Load debug info from the elf path.
15583   if (!ctxt.load_debug_info())
15584     status |= STATUS_DEBUG_INFO_NOT_FOUND;
15585 
15586   {
15587     string alt_di_path;
15588     if (refers_to_alt_debug_info(ctxt, alt_di_path) && !ctxt.alt_dwarf())
15589       status |= STATUS_ALT_DEBUG_INFO_NOT_FOUND;
15590   }
15591 
15592   ctxt.load_elf_properties();  // DT_SONAME, DT_NEEDED, architecture
15593 
15594   if (!ctxt.symtab() || !ctxt.symtab()->has_symbols())
15595     status |= STATUS_NO_SYMBOLS_FOUND;
15596 
15597   if (// If no elf symbol was found ...
15598       status & STATUS_NO_SYMBOLS_FOUND
15599       // ... or if debug info was found but not the required alternate
15600       // debug info ...
15601       || ((status & STATUS_ALT_DEBUG_INFO_NOT_FOUND)
15602 	  && !(status & STATUS_DEBUG_INFO_NOT_FOUND)))
15603     // ... then we cannot handle the binary.
15604     return corpus_sptr();
15605 
15606   // Read the variable and function descriptions from the debug info
15607   // we have, through the dwfl handle.
15608   corpus_sptr corp = read_debug_info_into_corpus(ctxt);
15609 
15610   status |= STATUS_OK;
15611 
15612   return corp;
15613 }
15614 
15615 /// Read a corpus and add it to a given @ref corpus_group.
15616 ///
15617 /// @param ctxt the reading context to consider.
15618 ///
15619 /// @param group the @ref corpus_group to add the new corpus to.
15620 ///
15621 /// @param status output parameter. The status of the read.  It is set
15622 /// by this function upon its completion.
15623 corpus_sptr
read_and_add_corpus_to_group_from_elf(read_context & ctxt,corpus_group & group,status & status)15624 read_and_add_corpus_to_group_from_elf(read_context& ctxt,
15625 				      corpus_group& group,
15626 				      status& status)
15627 {
15628   corpus_sptr result;
15629   corpus_sptr corp = read_corpus_from_elf(ctxt, status);
15630   if (status & STATUS_OK)
15631     {
15632       if (!corp->get_group())
15633 	group.add_corpus(corp);
15634       result = corp;
15635     }
15636 
15637   return result;
15638 }
15639 
15640 /// Read all @ref abigail::translation_unit possible from the debug info
15641 /// accessible from an elf file, stuff them into a libabigail ABI
15642 /// Corpus and return it.
15643 ///
15644 /// @param elf_path the path to the elf file.
15645 ///
15646 /// @param debug_info_root_paths a vector of pointers to root paths
15647 /// under which to look for the debug info of the elf files that are
15648 /// later handled by the Dwfl.  This for cases where the debug info is
15649 /// split into a different file from the binary we want to inspect.
15650 /// On Red Hat compatible systems, this root path is usually
15651 /// /usr/lib/debug by default.  If this argument is set to NULL, then
15652 /// "./debug" and /usr/lib/debug will be searched for sub-directories
15653 /// containing the debug info file.
15654 ///
15655 /// @param environment the environment used by the current context.
15656 /// This environment contains resources needed by the reader and by
15657 /// the types and declarations that are to be created later.  Note
15658 /// that ABI artifacts that are to be compared all need to be created
15659 /// within the same environment.  Also, the lifetime of the
15660 /// environment must be greater than the lifetime of the resulting
15661 /// corpus because the corpus uses resources that are allocated in the
15662 /// environment.
15663 ///
15664 /// @param load_all_types if set to false only the types that are
15665 /// reachable from publicly exported declarations (of functions and
15666 /// variables) are read.  If set to true then all types found in the
15667 /// debug information are loaded.
15668 ///
15669 /// @param resulting_corp a pointer to the resulting abigail::corpus.
15670 ///
15671 /// @return the resulting status.
15672 corpus_sptr
read_corpus_from_elf(const std::string & elf_path,const vector<char ** > & debug_info_root_paths,ir::environment * environment,bool load_all_types,status & status)15673 read_corpus_from_elf(const std::string& elf_path,
15674 		     const vector<char**>& debug_info_root_paths,
15675 		     ir::environment*	environment,
15676 		     bool		load_all_types,
15677 		     status&		status)
15678 {
15679   read_context_sptr c = create_read_context(elf_path,
15680 					    debug_info_root_paths,
15681 					    environment,
15682 					    load_all_types);
15683   read_context& ctxt = *c;
15684   return read_corpus_from_elf(ctxt, status);
15685 }
15686 
15687 /// Look into the symbol tables of a given elf file and see if we find
15688 /// a given symbol.
15689 ///
15690 /// @param env the environment we are operating from.
15691 ///
15692 /// @param elf_path the path to the elf file to consider.
15693 ///
15694 /// @param symbol_name the name of the symbol to look for.
15695 ///
15696 /// @param demangle if true, try to demangle the symbol name found in
15697 /// the symbol table.
15698 ///
15699 /// @param syms the vector of symbols found with the name @p symbol_name.
15700 ///
15701 /// @return true iff the symbol was found among the publicly exported
15702 /// symbols of the ELF file.
15703 bool
lookup_symbol_from_elf(const environment * env,const string & elf_path,const string & symbol_name,bool demangle,vector<elf_symbol_sptr> & syms)15704 lookup_symbol_from_elf(const environment*		env,
15705 		       const string&			elf_path,
15706 		       const string&			symbol_name,
15707 		       bool				demangle,
15708 		       vector<elf_symbol_sptr>&	syms)
15709 
15710 {
15711   if (elf_version(EV_CURRENT) == EV_NONE)
15712     return false;
15713 
15714   int fd = open(elf_path.c_str(), O_RDONLY);
15715   if (fd < 0)
15716     return false;
15717 
15718   struct stat s;
15719   if (fstat(fd, &s))
15720     return false;
15721 
15722   Elf* elf = elf_begin(fd, ELF_C_READ, 0);
15723   if (elf == 0)
15724     return false;
15725 
15726   bool value = lookup_symbol_from_elf(env, elf, symbol_name,
15727 				      demangle, syms);
15728   elf_end(elf);
15729   close(fd);
15730 
15731   return value;
15732 }
15733 
15734 /// Look into the symbol tables of an elf file to see if a public
15735 /// function of a given name is found.
15736 ///
15737 /// @param env the environment we are operating from.
15738 ///
15739 /// @param elf_path the path to the elf file to consider.
15740 ///
15741 /// @param symbol_name the name of the function to look for.
15742 ///
15743 /// @param syms the vector of public function symbols found with the
15744 /// name @p symname.
15745 ///
15746 /// @return true iff a function with symbol name @p symbol_name is
15747 /// found.
15748 bool
lookup_public_function_symbol_from_elf(const environment * env,const string & path,const string & symname,vector<elf_symbol_sptr> & syms)15749 lookup_public_function_symbol_from_elf(const environment*		env,
15750 				       const string&			path,
15751 				       const string&			symname,
15752 				       vector<elf_symbol_sptr>&	syms)
15753 {
15754   if (elf_version(EV_CURRENT) == EV_NONE)
15755     return false;
15756 
15757   int fd = open(path.c_str(), O_RDONLY);
15758   if (fd < 0)
15759     return false;
15760 
15761   struct stat s;
15762   if (fstat(fd, &s))
15763     return false;
15764 
15765   Elf* elf = elf_begin(fd, ELF_C_READ, 0);
15766   if (elf == 0)
15767     return false;
15768 
15769   bool value = lookup_public_function_symbol_from_elf(env, elf, symname, syms);
15770   elf_end(elf);
15771   close(fd);
15772 
15773   return value;
15774 }
15775 
15776 /// Check if the underlying elf file refers to an alternate debug info
15777 /// file associated to it.
15778 ///
15779 /// Note that "alternate debug info sections" is a GNU extension as
15780 /// of DWARF4 and is described at
15781 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15782 ///
15783 /// @param ctxt the context used to read the elf file.
15784 ///
15785 /// @param alt_di the path to the alternate debug info file.  This is
15786 /// set iff the function returns true.
15787 ///
15788 /// @return true if the ELF file refers to an alternate debug info
15789 /// file.
15790 bool
refers_to_alt_debug_info(const read_context & ctxt,string & alt_di_path)15791 refers_to_alt_debug_info(const read_context&	ctxt,
15792 			 string&		alt_di_path)
15793 {
15794   if (!ctxt.alt_debug_info_path().empty())
15795     {
15796       alt_di_path = ctxt.alt_debug_info_path();
15797       return true;
15798     }
15799   return false;
15800 }
15801 
15802 /// Check if the underlying elf file has an alternate debug info file
15803 /// associated to it.
15804 ///
15805 /// Note that "alternate debug info sections" is a GNU extension as
15806 /// of DWARF4 and is described at
15807 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15808 ///
15809 /// @param ctxt the read_context to use to handle the underlying elf file.
15810 ///
15811 /// @param has_alt_di out parameter.  This is set to true upon
15812 /// succesful completion of the function iff an alternate debug info
15813 /// file was found, false otherwise.  Note thas this parameter is set
15814 /// only if the function returns STATUS_OK.
15815 ///
15816 /// @param alt_debug_info_path if the function returned STATUS_OK and
15817 /// if @p has been set to true, then this parameter contains the path
15818 /// to the alternate debug info file found.
15819 ///
15820 /// return STATUS_OK upon successful completion, false otherwise.
15821 status
has_alt_debug_info(read_context & ctxt,bool & has_alt_di,string & alt_debug_info_path)15822 has_alt_debug_info(read_context&	ctxt,
15823 		   bool&		has_alt_di,
15824 		   string&		alt_debug_info_path)
15825 {
15826   // Load debug info from the elf path.
15827   if (!ctxt.load_debug_info())
15828     return STATUS_DEBUG_INFO_NOT_FOUND;
15829 
15830   if (ctxt.alt_dwarf())
15831     {
15832       has_alt_di = true;
15833       alt_debug_info_path = ctxt.alt_debug_info_path();
15834     }
15835   else
15836     has_alt_di = false;
15837 
15838   return STATUS_OK;
15839 }
15840 
15841 /// Check if a given elf file has an alternate debug info file
15842 /// associated to it.
15843 ///
15844 /// Note that "alternate debug info sections" is a GNU extension as
15845 /// of DWARF4 and is described at
15846 /// http://www.dwarfstd.org/ShowIssue.php?issue=120604.1.
15847 ///
15848 /// @param elf_path the path to the elf file to consider.
15849 ///
15850 /// @param a pointer to the root directory under which the split debug info
15851 /// file associated to elf_path is to be found.  This has to be NULL
15852 /// if the debug info file is not in a split file.
15853 ///
15854 /// @param has_alt_di out parameter.  This is set to true upon
15855 /// succesful completion of the function iff an alternate debug info
15856 /// file was found, false otherwise.  Note thas this parameter is set
15857 /// only if the function returns STATUS_OK.
15858 ///
15859 /// @param alt_debug_info_path if the function returned STATUS_OK and
15860 /// if @p has been set to true, then this parameter contains the path
15861 /// to the alternate debug info file found.
15862 ///
15863 /// return STATUS_OK upon successful completion, false otherwise.
15864 status
has_alt_debug_info(const string & elf_path,char ** debug_info_root_path,bool & has_alt_di,string & alt_debug_info_path)15865 has_alt_debug_info(const string&	elf_path,
15866 		   char**		debug_info_root_path,
15867 		   bool&		has_alt_di,
15868 		   string&		alt_debug_info_path)
15869 {
15870   vector<char**> di_roots;
15871   di_roots.push_back(debug_info_root_path);
15872   read_context_sptr c = create_read_context(elf_path, di_roots, 0);
15873   read_context& ctxt = *c;
15874 
15875   // Load debug info from the elf path.
15876   if (!ctxt.load_debug_info())
15877     return STATUS_DEBUG_INFO_NOT_FOUND;
15878 
15879   if (ctxt.alt_dwarf())
15880     {
15881       has_alt_di = true;
15882       alt_debug_info_path = ctxt.alt_debug_info_path();
15883     }
15884   else
15885     has_alt_di = false;
15886 
15887   return STATUS_OK;
15888 }
15889 
15890 /// Fetch the SONAME ELF property from an ELF binary file.
15891 ///
15892 /// @param path The path to the elf file to consider.
15893 ///
15894 /// @param soname out parameter. Set to the SONAME property of the
15895 /// binary file, if it present in the ELF file.
15896 ///
15897 /// return false if an error occured while looking for the SONAME
15898 /// property in the binary, true otherwise.
15899 bool
get_soname_of_elf_file(const string & path,string & soname)15900 get_soname_of_elf_file(const string& path, string &soname)
15901 {
15902 
15903   int fd = open(path.c_str(), O_RDONLY);
15904   if (fd == -1)
15905     return false;
15906 
15907   elf_version (EV_CURRENT);
15908   Elf* elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
15909 
15910   GElf_Ehdr ehdr_mem;
15911   GElf_Ehdr* ehdr = gelf_getehdr (elf, &ehdr_mem);
15912   if (ehdr == NULL)
15913     return false;
15914 
15915   for (int i = 0; i < ehdr->e_phnum; ++i)
15916     {
15917       GElf_Phdr phdr_mem;
15918       GElf_Phdr* phdr = gelf_getphdr (elf, i, &phdr_mem);
15919 
15920       if (phdr != NULL && phdr->p_type == PT_DYNAMIC)
15921         {
15922           Elf_Scn* scn = gelf_offscn (elf, phdr->p_offset);
15923           GElf_Shdr shdr_mem;
15924           GElf_Shdr* shdr = gelf_getshdr (scn, &shdr_mem);
15925           int maxcnt = (shdr != NULL
15926                         ? shdr->sh_size / shdr->sh_entsize : INT_MAX);
15927           ABG_ASSERT (shdr == NULL || shdr->sh_type == SHT_DYNAMIC);
15928           Elf_Data* data = elf_getdata (scn, NULL);
15929           if (data == NULL)
15930             break;
15931 
15932           for (int cnt = 0; cnt < maxcnt; ++cnt)
15933             {
15934               GElf_Dyn dynmem;
15935               GElf_Dyn* dyn = gelf_getdyn (data, cnt, &dynmem);
15936               if (dyn == NULL)
15937                 continue;
15938 
15939               if (dyn->d_tag == DT_NULL)
15940                 break;
15941 
15942               if (dyn->d_tag != DT_SONAME)
15943                 continue;
15944 
15945               soname = elf_strptr (elf, shdr->sh_link, dyn->d_un.d_val);
15946               break;
15947             }
15948           break;
15949         }
15950     }
15951 
15952   elf_end(elf);
15953   close(fd);
15954 
15955   return true;
15956 }
15957 
15958 /// Get the type of a given elf type.
15959 ///
15960 /// @param path the absolute path to the ELF file to analyzed.
15961 ///
15962 /// @param type the kind of the ELF file designated by @p path.
15963 ///
15964 /// @param out parameter.  Is set to the type of ELF file of @p path.
15965 /// This parameter is set iff the function returns true.
15966 ///
15967 /// @return true iff the file could be opened and analyzed.
15968 bool
get_type_of_elf_file(const string & path,elf_type & type)15969 get_type_of_elf_file(const string& path, elf_type& type)
15970 {
15971   int fd = open(path.c_str(), O_RDONLY);
15972   if (fd == -1)
15973     return false;
15974 
15975   elf_version (EV_CURRENT);
15976   Elf *elf = elf_begin (fd, ELF_C_READ_MMAP, NULL);
15977   type = elf_file_type(elf);
15978   elf_end(elf);
15979   close(fd);
15980 
15981   return true;
15982 }
15983 
15984 }// end namespace dwarf_reader
15985 
15986 }// end namespace abigail
15987