1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2013-2020 Red Hat, Inc.
5 // Copyright (C) 2020 Google, Inc.
6 //
7 // Author: Matthias Maennich
8 
9 /// @file
10 ///
11 /// This contains the definition of the symtab reader
12 
13 #include <algorithm>
14 #include <iostream>
15 #include <unordered_map>
16 #include <unordered_set>
17 
18 #include "abg-elf-helpers.h"
19 #include "abg-fwd.h"
20 #include "abg-internal.h"
21 #include "abg-tools-utils.h"
22 
23 // Though this is an internal header, we need to export the symbols to be able
24 // to test this code.  TODO: find a way to export symbols just for unit tests.
25 ABG_BEGIN_EXPORT_DECLARATIONS
26 #include "abg-symtab-reader.h"
27 ABG_END_EXPORT_DECLARATIONS
28 
29 namespace abigail
30 {
31 
32 namespace symtab_reader
33 {
34 
35 /// symtab_filter implementations
36 
37 /// Determine whether a symbol is matching the filter criteria of this filter
38 /// object. In terms of a filter functionality, you would _not_ filter out
39 /// this symbol if it passes this (i.e. returns true).
40 ///
41 /// @param symbol The Elf symbol under test.
42 ///
43 /// @return whether the symbol matches all relevant / required criteria
44 bool
matches(const elf_symbol & symbol) const45 symtab_filter::matches(const elf_symbol& symbol) const
46 {
47   if (functions_ && *functions_ != symbol.is_function())
48     return false;
49   if (variables_ && *variables_ != symbol.is_variable())
50     return false;
51   if (public_symbols_ && *public_symbols_ != symbol.is_public())
52     return false;
53   if (undefined_symbols_ && *undefined_symbols_ == symbol.is_defined())
54     return false;
55   if (kernel_symbols_ && *kernel_symbols_ != symbol.is_in_ksymtab())
56     return false;
57 
58   return true;
59 }
60 
61 /// symtab implementations
62 
63 /// Obtain a suitable default filter for iterating this symtab object.
64 ///
65 /// The symtab_filter obtained is populated with some sensible default
66 /// settings, such as public_symbols(true) and kernel_symbols(true) if the
67 /// binary has been identified as Linux Kernel binary.
68 ///
69 /// @return a symtab_filter with sensible populated defaults
70 symtab_filter
make_filter() const71 symtab::make_filter() const
72 {
73   symtab_filter filter;
74   filter.set_public_symbols();
75   if (is_kernel_binary_)
76     filter.set_kernel_symbols();
77   return filter;
78 }
79 
80 /// Get a vector of symbols that are associated with a certain name
81 ///
82 /// @param name the name the symbols need to match
83 ///
84 /// @return a vector of symbols, empty if no matching symbols have been found
85 const elf_symbols&
lookup_symbol(const std::string & name) const86 symtab::lookup_symbol(const std::string& name) const
87 {
88   static const elf_symbols empty_result;
89   const auto it = name_symbol_map_.find(name);
90   if (it != name_symbol_map_.end())
91       return it->second;
92   return empty_result;
93 }
94 
95 /// Lookup a symbol by its address
96 ///
97 /// @param symbol_addr the starting address of the symbol
98 ///
99 /// @return a symbol if found, else an empty sptr
100 const elf_symbol_sptr&
lookup_symbol(GElf_Addr symbol_addr) const101 symtab::lookup_symbol(GElf_Addr symbol_addr) const
102 {
103   static const elf_symbol_sptr empty_result;
104   const auto addr_it = addr_symbol_map_.find(symbol_addr);
105   if (addr_it != addr_symbol_map_.end())
106     return addr_it->second;
107   else
108     {
109       // check for a potential entry address mapping instead,
110       // relevant for ppc ELFv1 binaries
111       const auto entry_it = entry_addr_symbol_map_.find(symbol_addr);
112       if (entry_it != entry_addr_symbol_map_.end())
113 	return entry_it->second;
114     }
115   return empty_result;
116 }
117 
118 /// A symbol sorting functor.
119 static struct
120 {
121   bool
operator ()abigail::symtab_reader::__anon7e0669120108122   operator()(const elf_symbol_sptr& left, const elf_symbol_sptr& right)
123   {return left->get_id_string() < right->get_id_string();}
124 } symbol_sort;
125 
126 /// Construct a symtab object and instantiate it from an ELF
127 /// handle. Also pass in the ir::environment we are living in. If
128 /// specified, the symbol_predicate will be respected when creating
129 /// the full vector of symbols.
130 ///
131 /// @param elf_handle the elf handle to load the symbol table from
132 ///
133 /// @param env the environment we are operating in
134 ///
135 /// @param is_suppressed a predicate function to determine if a symbol should
136 /// be suppressed
137 ///
138 /// @return a smart pointer handle to symtab, set to nullptr if the load was
139 /// not completed
140 symtab_ptr
load(Elf * elf_handle,ir::environment * env,symbol_predicate is_suppressed)141 symtab::load(Elf*	      elf_handle,
142 	     ir::environment* env,
143 	     symbol_predicate is_suppressed)
144 {
145   ABG_ASSERT(elf_handle);
146   ABG_ASSERT(env);
147 
148   symtab_ptr result(new symtab);
149   if (!result->load_(elf_handle, env, is_suppressed))
150     return {};
151 
152   return result;
153 }
154 
155 /// Construct a symtab object from existing name->symbol lookup maps.
156 /// They were possibly read from a different representation (XML maybe).
157 ///
158 /// @param function_symbol_map a map from ELF function name to elf_symbol
159 ///
160 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
161 ///
162 /// @return a smart pointer handle to symtab, set to nullptr if the load was
163 /// not completed
164 symtab_ptr
load(string_elf_symbols_map_sptr function_symbol_map,string_elf_symbols_map_sptr variables_symbol_map)165 symtab::load(string_elf_symbols_map_sptr function_symbol_map,
166 	     string_elf_symbols_map_sptr variables_symbol_map)
167 {
168   symtab_ptr result(new symtab);
169   if (!result->load_(function_symbol_map, variables_symbol_map))
170     return {};
171 
172   return result;
173 }
174 
175 /// Default constructor of the @ref symtab type.
symtab()176 symtab::symtab()
177   : is_kernel_binary_(false), has_ksymtab_entries_(false)
178 {}
179 
180 /// Load the symtab representation from an Elf binary presented to us by an
181 /// Elf* handle.
182 ///
183 /// This method iterates over the entries of .symtab and collects all
184 /// interesting symbols (functions and variables).
185 ///
186 /// In case of a Linux Kernel binary, it also collects information about the
187 /// symbols exported via EXPORT_SYMBOL in the Kernel that would then end up
188 /// having a corresponding __ksymtab entry.
189 ///
190 /// Symbols that are suppressed will be omitted from the symbols_ vector, but
191 /// still be discoverable through the name->symbol and addr->symbol lookup
192 /// maps.
193 ///
194 /// @param elf_handle the elf handle to load the symbol table from
195 ///
196 /// @param env the environment we are operating in
197 ///
198 /// @param is_suppressed a predicate function to determine if a symbol should
199 /// be suppressed
200 ///
201 /// @return true if the load succeeded
202 bool
load_(Elf * elf_handle,ir::environment * env,symbol_predicate is_suppressed)203 symtab::load_(Elf*	       elf_handle,
204 	      ir::environment* env,
205 	      symbol_predicate is_suppressed)
206 {
207 
208   Elf_Scn* symtab_section = elf_helpers::find_symbol_table_section(elf_handle);
209   if (!symtab_section)
210     {
211       std::cerr << "No symbol table found: Skipping symtab load.\n";
212       return false;
213     }
214 
215   GElf_Shdr symtab_sheader;
216   gelf_getshdr(symtab_section, &symtab_sheader);
217 
218   // check for bogus section header
219   if (symtab_sheader.sh_entsize == 0)
220     {
221       std::cerr << "Invalid symtab header found: Skipping symtab load.\n";
222       return false;
223     }
224 
225   const size_t number_syms =
226       symtab_sheader.sh_size / symtab_sheader.sh_entsize;
227 
228   Elf_Data* symtab = elf_getdata(symtab_section, 0);
229   if (!symtab)
230     {
231       std::cerr << "Could not load elf symtab: Skipping symtab load.\n";
232       return false;
233     }
234 
235   const bool is_kernel = elf_helpers::is_linux_kernel(elf_handle);
236   std::unordered_set<std::string> exported_kernel_symbols;
237   std::unordered_map<std::string, uint64_t> crc_values;
238 
239   const bool is_arm32 = elf_helpers::architecture_is_arm32(elf_handle);
240   const bool is_ppc64 = elf_helpers::architecture_is_ppc64(elf_handle);
241 
242   for (size_t i = 0; i < number_syms; ++i)
243     {
244       GElf_Sym *sym, sym_mem;
245       sym = gelf_getsym(symtab, i, &sym_mem);
246       if (!sym)
247 	{
248 	  std::cerr << "Could not load symbol with index " << i
249 		    << ": Skipping symtab load.\n";
250 	  return false;
251 	}
252 
253       const char* const name_str =
254 	  elf_strptr(elf_handle, symtab_sheader.sh_link, sym->st_name);
255 
256       // no name, no game
257       if (!name_str)
258 	continue;
259 
260       const std::string name = name_str;
261       if (name.empty())
262 	continue;
263 
264       // Handle ksymtab entries. Every symbol entry that starts with __ksymtab_
265       // indicates that the symbol in question is exported through ksymtab. We
266       // do not know whether this is ksymtab_gpl or ksymtab, but that is good
267       // enough for now.
268       //
269       // We could follow up with this entry:
270       //
271       // symbol_value -> ksymtab_entry in either ksymtab_gpl or ksymtab
272       //              -> addr/name/namespace (in case of PREL32: offset)
273       //
274       // That way we could also detect ksymtab<>ksymtab_gpl changes or changes
275       // of the symbol namespace.
276       //
277       // As of now this lookup is fragile, as occasionally ksymtabs are empty
278       // (seen so far for kernel modules and LTO builds). Hence we stick to the
279       // fairly safe assumption that ksymtab exported entries are having an
280       // appearence as __ksymtab_<symbol> in the symtab.
281       if (is_kernel && name.rfind("__ksymtab_", 0) == 0)
282 	{
283 	  ABG_ASSERT(exported_kernel_symbols.insert(name.substr(10)).second);
284 	  continue;
285 	}
286       if (is_kernel && name.rfind("__crc_", 0) == 0)
287 	{
288 	  ABG_ASSERT(crc_values.emplace(name.substr(6), sym->st_value).second);
289 	  continue;
290 	}
291 
292       // filter out uninteresting entries and only keep functions/variables for
293       // now. The rest might be interesting in the future though.
294       const int sym_type = GELF_ST_TYPE(sym->st_info);
295       if (!(sym_type == STT_FUNC
296 	    || sym_type == STT_GNU_IFUNC
297 	    // If the symbol is for an OBJECT, the index of the
298 	    // section it refers to cannot be absolute.
299 	    // Otherwise that OBJECT is not a variable.
300 	    || (sym_type == STT_OBJECT && sym->st_shndx != SHN_ABS)
301 	    || sym_type == STT_TLS))
302 	continue;
303 
304       const bool sym_is_defined = sym->st_shndx != SHN_UNDEF;
305       // this occurs in relocatable files.
306       const bool sym_is_common = sym->st_shndx == SHN_COMMON;
307 
308       elf_symbol::version ver;
309       elf_helpers::get_version_for_symbol(elf_handle, i, sym_is_defined, ver);
310 
311       const elf_symbol_sptr& symbol_sptr =
312 	elf_symbol::create
313 	(env, i, sym->st_size, name,
314 	 elf_helpers::stt_to_elf_symbol_type(GELF_ST_TYPE(sym->st_info)),
315 	 elf_helpers::stb_to_elf_symbol_binding(GELF_ST_BIND(sym->st_info)),
316 	 sym_is_defined, sym_is_common, ver,
317 	 elf_helpers::stv_to_elf_symbol_visibility
318 	 (GELF_ST_VISIBILITY(sym->st_other)),
319 	 /*is_linux_strings_cstr=*/false); // TODO: remove
320 					   // is_linux_strings_cstr
321 					   // as it is obsolete
322 
323       // We do not take suppressed symbols into our symbol vector to avoid
324       // accidental leakage. But we ensure supressed symbols are otherwise set
325       // up for lookup.
326       if (!(is_suppressed && is_suppressed(symbol_sptr)))
327 	// add to the symbol vector
328 	symbols_.push_back(symbol_sptr);
329       else
330 	symbol_sptr->set_is_suppressed(true);
331 
332       // add to the name->symbol lookup
333       name_symbol_map_[name].push_back(symbol_sptr);
334 
335       // add to the addr->symbol lookup
336       if (symbol_sptr->is_common_symbol())
337 	{
338 	  const auto it = name_symbol_map_.find(name);
339 	  ABG_ASSERT(it != name_symbol_map_.end());
340 	  const elf_symbols& common_sym_instances = it->second;
341 	  ABG_ASSERT(!common_sym_instances.empty());
342 	  if (common_sym_instances.size() > 1)
343 	    {
344 	      elf_symbol_sptr main_common_sym = common_sym_instances[0];
345 	      ABG_ASSERT(main_common_sym->get_name() == name);
346 	      ABG_ASSERT(main_common_sym->is_common_symbol());
347 	      ABG_ASSERT(symbol_sptr.get() != main_common_sym.get());
348 	      main_common_sym->add_common_instance(symbol_sptr);
349 	    }
350 	}
351       else if (symbol_sptr->is_defined())
352 	{
353 	  GElf_Addr symbol_value =
354 	      elf_helpers::maybe_adjust_et_rel_sym_addr_to_abs_addr(elf_handle,
355 								    sym);
356 
357 	  if (symbol_sptr->is_function())
358 	    {
359 	      if (is_arm32)
360 		// Clear bit zero of ARM32 addresses as per "ELF for the Arm
361 		// Architecture" section 5.5.3.
362 		// https://static.docs.arm.com/ihi0044/g/aaelf32.pdf
363 		symbol_value &= ~1;
364 	      else if (is_ppc64)
365 		update_function_entry_address_symbol_map(elf_handle, sym,
366 							 symbol_sptr);
367 	    }
368 
369 	  const auto result =
370 	    addr_symbol_map_.emplace(symbol_value, symbol_sptr);
371 	  if (!result.second)
372 	    // A symbol with the same address already exists.  This
373 	    // means this symbol is an alias of the main symbol with
374 	    // that address.  So let's register this new alias as such.
375 	    result.first->second->get_main_symbol()->add_alias(symbol_sptr);
376 	}
377     }
378 
379   is_kernel_binary_ = elf_helpers::is_linux_kernel(elf_handle);
380 
381   // Now apply the ksymtab_exported attribute to the symbols we collected.
382   for (const auto& symbol : exported_kernel_symbols)
383     {
384       const auto r = name_symbol_map_.find(symbol);
385       if (r == name_symbol_map_.end())
386 	continue;
387 
388       for (const auto& elf_symbol : r->second)
389 	  if (elf_symbol->is_public())
390 	    elf_symbol->set_is_in_ksymtab(true);
391       has_ksymtab_entries_ = true;
392     }
393 
394   // Now add the CRC values
395   for (const auto& crc_entry : crc_values)
396     {
397       const auto r = name_symbol_map_.find(crc_entry.first);
398       if (r == name_symbol_map_.end())
399 	continue;
400 
401       for (const auto& symbol : r->second)
402 	symbol->set_crc(crc_entry.second);
403     }
404 
405   // sort the symbols for deterministic output
406   std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
407 
408   return true;
409 }
410 
411 /// Load the symtab representation from a function/variable lookup map pair.
412 ///
413 /// This method assumes the lookup maps are correct and sets up the data
414 /// vector as well as the name->symbol lookup map. The addr->symbol lookup
415 /// map cannot be set up in this case.
416 ///
417 /// @param function_symbol_map a map from ELF function name to elf_symbol
418 ///
419 /// @param variable_symbol_map a map from ELF variable name to elf_symbol
420 ///
421 /// @return true if the load succeeded
422 bool
load_(string_elf_symbols_map_sptr function_symbol_map,string_elf_symbols_map_sptr variables_symbol_map)423 symtab::load_(string_elf_symbols_map_sptr function_symbol_map,
424 	     string_elf_symbols_map_sptr variables_symbol_map)
425 
426 {
427   if (function_symbol_map)
428     for (const auto& symbol_map_entry : *function_symbol_map)
429       {
430 	for (const auto& symbol : symbol_map_entry.second)
431 	  {
432 	    if (!symbol->is_suppressed())
433 	      symbols_.push_back(symbol);
434 	  }
435 	ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
436       }
437 
438   if (variables_symbol_map)
439     for (const auto& symbol_map_entry : *variables_symbol_map)
440       {
441 	for (const auto& symbol : symbol_map_entry.second)
442 	  {
443 	    if (!symbol->is_suppressed())
444 	      symbols_.push_back(symbol);
445 	  }
446 	ABG_ASSERT(name_symbol_map_.insert(symbol_map_entry).second);
447       }
448 
449   // sort the symbols for deterministic output
450   std::sort(symbols_.begin(), symbols_.end(), symbol_sort);
451 
452   return true;
453 }
454 
455 /// Notify the symtab about the name of the main symbol at a given address.
456 ///
457 /// From just alone the symtab we can't guess the main symbol of a bunch of
458 /// aliased symbols that all point to the same address. During processing of
459 /// additional information (such as DWARF), this information becomes apparent
460 /// and we can adjust the addr->symbol lookup map as well as the alias
461 /// reference of the symbol objects.
462 ///
463 /// @param addr the addr that we are updating the main symbol for
464 /// @param name the name of the main symbol
465 void
update_main_symbol(GElf_Addr addr,const std::string & name)466 symtab::update_main_symbol(GElf_Addr addr, const std::string& name)
467 {
468   // get one symbol (i.e. the current main symbol)
469   elf_symbol_sptr symbol = lookup_symbol(addr);
470 
471   // The caller might not know whether the addr is associated to an ELF symbol
472   // that we care about. E.g. the addr could be associated to an ELF symbol,
473   // but not one in .dynsym when looking at a DSO. Hence, early exit if the
474   // lookup failed.
475   if (!symbol)
476     return;
477 
478   // determine the new main symbol by attempting an update
479   elf_symbol_sptr new_main = symbol->update_main_symbol(name);
480 
481   // also update the default symbol we return when looked up by address
482   if (new_main)
483     addr_symbol_map_[addr] = new_main;
484 }
485 
486 /// Update the function entry symbol map to later allow lookups of this symbol
487 /// by entry address as well. This is relevant for ppc64 ELFv1 binaries.
488 ///
489 /// For ppc64 ELFv1 binaries, we need to build a function entry point address
490 /// -> function symbol map. This is in addition to the function pointer ->
491 /// symbol map.  This is because on ppc64 ELFv1, a function pointer is
492 /// different from a function entry point address.
493 ///
494 /// On ppc64 ELFv1, the DWARF DIE of a function references the address of the
495 /// entry point of the function symbol; whereas the value of the function
496 /// symbol is the function pointer. As these addresses are different, if I we
497 /// want to get to the symbol of a function from its entry point address (as
498 /// referenced by DWARF function DIEs) we must have the two maps I mentionned
499 /// right above.
500 ///
501 /// In other words, we need a map that associates a function entry point
502 /// address with the symbol of that function, to be able to get the function
503 /// symbol that corresponds to a given function DIE, on ppc64.
504 ///
505 /// The value of the function pointer (the value of the symbol) usually refers
506 /// to the offset of a table in the .opd section.  But sometimes, for a symbol
507 /// named "foo", the corresponding symbol named ".foo" (note the dot before
508 /// foo) which value is the entry point address of the function; that entry
509 /// point address refers to a region in the .text section.
510 ///
511 /// So we are only interested in values of the symbol that are in the .opd
512 /// section.
513 ///
514 /// @param elf_handle the ELF handle to operate on
515 ///
516 /// @param native_symbol the native Elf symbol to update the entry for
517 ///
518 /// @param symbol_sptr the internal symbol to associte the entry address with
519 void
update_function_entry_address_symbol_map(Elf * elf_handle,GElf_Sym * native_symbol,const elf_symbol_sptr & symbol_sptr)520 symtab::update_function_entry_address_symbol_map(
521   Elf* elf_handle, GElf_Sym* native_symbol, const elf_symbol_sptr& symbol_sptr)
522 {
523   const GElf_Addr fn_desc_addr = native_symbol->st_value;
524   const GElf_Addr fn_entry_point_addr =
525     elf_helpers::lookup_ppc64_elf_fn_entry_point_address(elf_handle,
526 							 fn_desc_addr);
527 
528   const std::pair<addr_symbol_map_type::const_iterator, bool>& result =
529     entry_addr_symbol_map_.emplace(fn_entry_point_addr, symbol_sptr);
530 
531   const addr_symbol_map_type::const_iterator it = result.first;
532   const bool was_inserted = result.second;
533   if (!was_inserted
534       && elf_helpers::address_is_in_opd_section(elf_handle, fn_desc_addr))
535     {
536       // Either
537       //
538       // 'symbol' must have been registered as an alias for
539       // it->second->get_main_symbol()
540       //
541       // Or
542       //
543       // if the name of 'symbol' is foo, then the name of it2->second is
544       // ".foo". That is, foo is the name of the symbol when it refers to the
545       // function descriptor in the .opd section and ".foo" is an internal name
546       // for the address of the entry point of foo.
547       //
548       // In the latter case, we just want to keep a reference to "foo" as .foo
549       // is an internal name.
550 
551       const bool two_symbols_alias =
552 	it->second->get_main_symbol()->does_alias(*symbol_sptr);
553       const bool symbol_is_foo_and_prev_symbol_is_dot_foo =
554 	(it->second->get_name() == std::string(".") + symbol_sptr->get_name());
555 
556       ABG_ASSERT(two_symbols_alias
557 		 || symbol_is_foo_and_prev_symbol_is_dot_foo);
558 
559       if (symbol_is_foo_and_prev_symbol_is_dot_foo)
560 	// Let's just keep a reference of the symbol that the user sees in the
561 	// source code (the one named foo). The symbol which name is prefixed
562 	// with a "dot" is an artificial one.
563 	entry_addr_symbol_map_[fn_entry_point_addr] = symbol_sptr;
564     }
565 }
566 
567 } // end namespace symtab_reader
568 } // end namespace abigail
569