1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- Mode: C++ -*-
3 //
4 // Copyright (C) 2020 Google, Inc.
5 
6 /// @file
7 ///
8 /// This contains the definitions of the ELF utilities for the dwarf reader.
9 
10 #include "abg-elf-helpers.h"
11 
12 #include <elf.h>
13 
14 #include "abg-tools-utils.h"
15 
16 namespace abigail
17 {
18 
19 namespace elf_helpers
20 {
21 
22 /// Convert an elf symbol type (given by the ELF{32,64}_ST_TYPE
23 /// macros) into an elf_symbol::type value.
24 ///
25 /// Note that this function aborts when given an unexpected value.
26 ///
27 /// @param the symbol type value to convert.
28 ///
29 /// @return the converted value.
30 elf_symbol::type
stt_to_elf_symbol_type(unsigned char stt)31 stt_to_elf_symbol_type(unsigned char stt)
32 {
33   switch (stt)
34     {
35     case STT_NOTYPE:
36       return elf_symbol::NOTYPE_TYPE;
37     case STT_OBJECT:
38       return elf_symbol::OBJECT_TYPE;
39     case STT_FUNC:
40       return elf_symbol::FUNC_TYPE;
41     case STT_SECTION:
42       return elf_symbol::SECTION_TYPE;
43     case STT_FILE:
44       return elf_symbol::FILE_TYPE;
45     case STT_COMMON:
46       return elf_symbol::COMMON_TYPE;
47     case STT_TLS:
48       return elf_symbol::TLS_TYPE;
49     case STT_GNU_IFUNC:
50       return elf_symbol::GNU_IFUNC_TYPE;
51     default:
52       // An unknown value that probably ought to be supported?  Let's
53       // abort right here rather than yielding garbage.
54       ABG_ASSERT_NOT_REACHED;
55     }
56 }
57 
58 /// Convert an elf symbol binding (given by the ELF{32,64}_ST_BIND
59 /// macros) into an elf_symbol::binding value.
60 ///
61 /// Note that this function aborts when given an unexpected value.
62 ///
63 /// @param the symbol binding value to convert.
64 ///
65 /// @return the converted value.
66 elf_symbol::binding
stb_to_elf_symbol_binding(unsigned char stb)67 stb_to_elf_symbol_binding(unsigned char stb)
68 {
69   switch (stb)
70     {
71     case STB_LOCAL:
72       return elf_symbol::LOCAL_BINDING;
73     case STB_GLOBAL:
74       return elf_symbol::GLOBAL_BINDING;
75     case STB_WEAK:
76       return elf_symbol::WEAK_BINDING;
77     case STB_GNU_UNIQUE:
78       return elf_symbol::GNU_UNIQUE_BINDING;
79     default:
80       ABG_ASSERT_NOT_REACHED;
81     }
82 }
83 
84 /// Convert an ELF symbol visiblity given by the symbols ->st_other
85 /// data member as returned by the GELF_ST_VISIBILITY macro into a
86 /// elf_symbol::visiblity value.
87 ///
88 /// @param stv the value of the ->st_other data member of the ELF
89 /// symbol.
90 ///
91 /// @return the converted elf_symbol::visiblity value.
92 elf_symbol::visibility
stv_to_elf_symbol_visibility(unsigned char stv)93 stv_to_elf_symbol_visibility(unsigned char stv)
94 {
95   switch (stv)
96     {
97     case STV_DEFAULT:
98       return elf_symbol::DEFAULT_VISIBILITY;
99     case STV_INTERNAL:
100       return elf_symbol::INTERNAL_VISIBILITY;
101     case STV_HIDDEN:
102       return elf_symbol::HIDDEN_VISIBILITY;
103     case STV_PROTECTED:
104       return elf_symbol::PROTECTED_VISIBILITY;
105     default:
106       ABG_ASSERT_NOT_REACHED;
107     }
108 }
109 
110 /// Convert the value of the e_machine field of GElf_Ehdr into a
111 /// string.  This is to get a string representing the architecture of
112 /// the elf file at hand.
113 ///
114 /// @param e_machine the value of GElf_Ehdr::e_machine.
115 ///
116 /// @return the string representation of GElf_Ehdr::e_machine.
117 std::string
e_machine_to_string(GElf_Half e_machine)118 e_machine_to_string(GElf_Half e_machine)
119 {
120   switch (e_machine)
121     {
122     case EM_NONE:
123       return "elf-no-arch";
124     case EM_M32:
125       return "elf-att-we-32100";
126     case EM_SPARC:
127       return "elf-sun-sparc";
128     case EM_386:
129       return "elf-intel-80386";
130     case EM_68K:
131       return "elf-motorola-68k";
132     case EM_88K:
133       return "elf-motorola-88k";
134     case EM_860:
135       return "elf-intel-80860";
136     case EM_MIPS:
137       return "elf-mips-r3000-be";
138     case EM_S370:
139       return "elf-ibm-s370";
140     case EM_MIPS_RS3_LE:
141       return "elf-mips-r3000-le";
142     case EM_PARISC:
143       return "elf-hp-parisc";
144     case EM_VPP500:
145       return "elf-fujitsu-vpp500";
146     case EM_SPARC32PLUS:
147       return "elf-sun-sparc-v8plus";
148     case EM_960:
149       return "elf-intel-80960";
150     case EM_PPC:
151       return "elf-powerpc";
152     case EM_PPC64:
153       return "elf-powerpc-64";
154     case EM_S390:
155       return "elf-ibm-s390";
156     case EM_V800:
157       return "elf-nec-v800";
158     case EM_FR20:
159       return "elf-fujitsu-fr20";
160     case EM_RH32:
161       return "elf-trw-rh32";
162     case EM_RCE:
163       return "elf-motorola-rce";
164     case EM_ARM:
165       return "elf-arm";
166     case EM_FAKE_ALPHA:
167       return "elf-digital-alpha";
168     case EM_SH:
169       return "elf-hitachi-sh";
170     case EM_SPARCV9:
171       return "elf-sun-sparc-v9-64";
172     case EM_TRICORE:
173       return "elf-siemens-tricore";
174     case EM_ARC:
175       return "elf-argonaut-risc-core";
176     case EM_H8_300:
177       return "elf-hitachi-h8-300";
178     case EM_H8_300H:
179       return "elf-hitachi-h8-300h";
180     case EM_H8S:
181       return "elf-hitachi-h8s";
182     case EM_H8_500:
183       return "elf-hitachi-h8-500";
184     case EM_IA_64:
185       return "elf-intel-ia-64";
186     case EM_MIPS_X:
187       return "elf-stanford-mips-x";
188     case EM_COLDFIRE:
189       return "elf-motorola-coldfire";
190     case EM_68HC12:
191       return "elf-motorola-68hc12";
192     case EM_MMA:
193       return "elf-fujitsu-mma";
194     case EM_PCP:
195       return "elf-siemens-pcp";
196     case EM_NCPU:
197       return "elf-sony-ncpu";
198     case EM_NDR1:
199       return "elf-denso-ndr1";
200     case EM_STARCORE:
201       return "elf-motorola-starcore";
202     case EM_ME16:
203       return "elf-toyota-me16";
204     case EM_ST100:
205       return "elf-stm-st100";
206     case EM_TINYJ:
207       return "elf-alc-tinyj";
208     case EM_X86_64:
209       return "elf-amd-x86_64";
210     case EM_PDSP:
211       return "elf-sony-pdsp";
212     case EM_FX66:
213       return "elf-siemens-fx66";
214     case EM_ST9PLUS:
215       return "elf-stm-st9+";
216     case EM_ST7:
217       return "elf-stm-st7";
218     case EM_68HC16:
219       return "elf-motorola-68hc16";
220     case EM_68HC11:
221       return "elf-motorola-68hc11";
222     case EM_68HC08:
223       return "elf-motorola-68hc08";
224     case EM_68HC05:
225       return "elf-motorola-68hc05";
226     case EM_SVX:
227       return "elf-sg-svx";
228     case EM_ST19:
229       return "elf-stm-st19";
230     case EM_VAX:
231       return "elf-digital-vax";
232     case EM_CRIS:
233       return "elf-axis-cris";
234     case EM_JAVELIN:
235       return "elf-infineon-javelin";
236     case EM_FIREPATH:
237       return "elf-firepath";
238     case EM_ZSP:
239       return "elf-lsi-zsp";
240     case EM_MMIX:
241       return "elf-don-knuth-mmix";
242     case EM_HUANY:
243       return "elf-harvard-huany";
244     case EM_PRISM:
245       return "elf-sitera-prism";
246     case EM_AVR:
247       return "elf-atmel-avr";
248     case EM_FR30:
249       return "elf-fujistu-fr30";
250     case EM_D10V:
251       return "elf-mitsubishi-d10v";
252     case EM_D30V:
253       return "elf-mitsubishi-d30v";
254     case EM_V850:
255       return "elf-nec-v850";
256     case EM_M32R:
257       return "elf-mitsubishi-m32r";
258     case EM_MN10300:
259       return "elf-matsushita-mn10300";
260     case EM_MN10200:
261       return "elf-matsushita-mn10200";
262     case EM_PJ:
263       return "elf-picojava";
264     case EM_OPENRISC:
265       return "elf-openrisc-32";
266     case EM_ARC_A5:
267       return "elf-arc-a5";
268     case EM_XTENSA:
269       return "elf-tensilica-xtensa";
270 
271 #ifdef HAVE_EM_AARCH64_MACRO
272     case EM_AARCH64:
273       return "elf-arm-aarch64";
274 #endif
275 
276 #ifdef HAVE_EM_TILEPRO_MACRO
277     case EM_TILEPRO:
278       return "elf-tilera-tilepro";
279 #endif
280 
281 #ifdef HAVE_EM_TILEGX_MACRO
282     case EM_TILEGX:
283       return "elf-tilera-tilegx";
284 #endif
285 
286     case EM_NUM:
287       return "elf-last-arch-number";
288     case EM_ALPHA:
289       return "elf-non-official-alpha";
290     default:
291       {
292 	std::ostringstream o;
293 	o << "elf-unknown-arch-value-" << e_machine;
294 	return o.str();
295       }
296     }
297 }
298 
299 /// Find and return a section by its name and its type.
300 ///
301 /// @param elf_handle the elf handle to use.
302 ///
303 /// @param name the name of the section.
304 ///
305 /// @param section_type the type of the section.  This is the
306 /// Elf32_Shdr::sh_type (or Elf64_Shdr::sh_type) data member.
307 /// Examples of values of this parameter are SHT_PROGBITS or SHT_NOBITS.
308 ///
309 /// @return the section found, nor nil if none was found.
310 Elf_Scn*
find_section(Elf * elf_handle,const std::string & name,Elf64_Word section_type)311 find_section(Elf* elf_handle, const std::string& name, Elf64_Word section_type)
312 {
313   size_t section_header_string_index = 0;
314   if (elf_getshdrstrndx (elf_handle, &section_header_string_index) < 0)
315     return 0;
316 
317   Elf_Scn* section = 0;
318   GElf_Shdr header_mem, *header;
319   while ((section = elf_nextscn(elf_handle, section)) != 0)
320     {
321       header = gelf_getshdr(section, &header_mem);
322       if (header == NULL || header->sh_type != section_type)
323       continue;
324 
325       const char* section_name =
326 	elf_strptr(elf_handle, section_header_string_index, header->sh_name);
327       if (section_name && name == section_name)
328 	return section;
329     }
330 
331   return 0;
332 }
333 
334 /// Find the symbol table.
335 ///
336 /// If we are looking at a relocatable or executable file, this
337 /// function will return the .symtab symbol table (of type
338 /// SHT_SYMTAB).  But if we are looking at a DSO it returns the
339 /// .dynsym symbol table (of type SHT_DYNSYM).
340 ///
341 /// @param elf_handle the elf handle to consider.
342 ///
343 /// @param symtab the symbol table found.
344 ///
345 /// @return the symbol table section
346 Elf_Scn*
find_symbol_table_section(Elf * elf_handle)347 find_symbol_table_section(Elf* elf_handle)
348 {
349   Elf_Scn* section = 0, *dynsym = 0, *sym_tab = 0;
350   while ((section = elf_nextscn(elf_handle, section)) != 0)
351     {
352       GElf_Shdr header_mem, *header;
353       header = gelf_getshdr(section, &header_mem);
354       if (header->sh_type == SHT_DYNSYM)
355 	dynsym = section;
356       else if (header->sh_type == SHT_SYMTAB)
357 	sym_tab = section;
358     }
359 
360   if (dynsym || sym_tab)
361     {
362       GElf_Ehdr eh_mem;
363       GElf_Ehdr* elf_header = gelf_getehdr(elf_handle, &eh_mem);
364       if (elf_header->e_type == ET_REL
365 	  || elf_header->e_type == ET_EXEC)
366 	return sym_tab ? sym_tab : dynsym;
367       else
368 	return dynsym ? dynsym : sym_tab;
369     }
370   return NULL;
371 }
372 
373 /// Find the index (in the section headers table) of the symbol table
374 /// section.
375 ///
376 /// If we are looking at a relocatable or executable file, this
377 /// function will return the index for the .symtab symbol table (of
378 /// type SHT_SYMTAB).  But if we are looking at a DSO it returns the
379 /// index for the .dynsym symbol table (of type SHT_DYNSYM).
380 ///
381 /// @param elf_handle the elf handle to use.
382 ///
383 /// @param symtab_index the index of the symbol_table, that was found.
384 ///
385 /// @return true iff the symbol table section index was found.
386 bool
find_symbol_table_section_index(Elf * elf_handle,size_t & symtab_index)387 find_symbol_table_section_index(Elf* elf_handle, size_t& symtab_index)
388 {
389   Elf_Scn* section = find_symbol_table_section(elf_handle);
390 
391   if (!section)
392     return false;
393 
394   symtab_index = elf_ndxscn(section);
395   return true;
396 }
397 
398 /// Get the offset offset of the hash table section.
399 ///
400 /// @param elf_handle the elf handle to use.
401 ///
402 /// @param ht_section_offset this is set to the resulting offset
403 /// of the hash table section.  This is set iff the function returns true.
404 ///
405 /// @param symtab_section_offset the offset of the section of the
406 /// symbol table the hash table refers to.
407 hash_table_kind
find_hash_table_section_index(Elf * elf_handle,size_t & ht_section_index,size_t & symtab_section_index)408 find_hash_table_section_index(Elf*	elf_handle,
409 			      size_t&	ht_section_index,
410 			      size_t&	symtab_section_index)
411 {
412   if (!elf_handle)
413     return NO_HASH_TABLE_KIND;
414 
415   GElf_Shdr header_mem, *section_header;
416   bool found_sysv_ht = false, found_gnu_ht = false;
417   for (Elf_Scn* section = elf_nextscn(elf_handle, 0);
418        section != 0;
419        section = elf_nextscn(elf_handle, section))
420     {
421       section_header= gelf_getshdr(section, &header_mem);
422       if (section_header->sh_type != SHT_HASH
423 	  && section_header->sh_type != SHT_GNU_HASH)
424 	continue;
425 
426       ht_section_index = elf_ndxscn(section);
427       symtab_section_index = section_header->sh_link;
428 
429       if (section_header->sh_type == SHT_HASH)
430 	found_sysv_ht = true;
431       else if (section_header->sh_type == SHT_GNU_HASH)
432 	found_gnu_ht = true;
433     }
434 
435   if (found_gnu_ht)
436     return GNU_HASH_TABLE_KIND;
437   else if (found_sysv_ht)
438     return SYSV_HASH_TABLE_KIND;
439   else
440     return NO_HASH_TABLE_KIND;
441 }
442 
443 /// Find and return the .text section.
444 ///
445 /// @param elf_handle the elf handle to use.
446 ///
447 /// @return the .text section found.
448 Elf_Scn*
find_text_section(Elf * elf_handle)449 find_text_section(Elf* elf_handle)
450 {return find_section(elf_handle, ".text", SHT_PROGBITS);}
451 
452 /// Find and return the .bss section.
453 ///
454 /// @param elf_handle.
455 ///
456 /// @return the .bss section found.
457 Elf_Scn*
find_bss_section(Elf * elf_handle)458 find_bss_section(Elf* elf_handle)
459 {return find_section(elf_handle, ".bss", SHT_NOBITS);}
460 
461 /// Find and return the .rodata section.
462 ///
463 /// @param elf_handle.
464 ///
465 /// @return the .rodata section found.
466 Elf_Scn*
find_rodata_section(Elf * elf_handle)467 find_rodata_section(Elf* elf_handle)
468 {return find_section(elf_handle, ".rodata", SHT_PROGBITS);}
469 
470 /// Find and return the .data section.
471 ///
472 /// @param elf_handle the elf handle to use.
473 ///
474 /// @return the .data section found.
475 Elf_Scn*
find_data_section(Elf * elf_handle)476 find_data_section(Elf* elf_handle)
477 {return find_section(elf_handle, ".data", SHT_PROGBITS);}
478 
479 /// Find and return the .data1 section.
480 ///
481 /// @param elf_handle the elf handle to use.
482 ///
483 /// @return the .data1 section found.
484 Elf_Scn*
find_data1_section(Elf * elf_handle)485 find_data1_section(Elf* elf_handle)
486 {return find_section(elf_handle, ".data1", SHT_PROGBITS);}
487 
488 /// Return the "Official Procedure descriptors section."  This
489 /// section is named .opd, and is usually present only on PPC64
490 /// ELFv1 binaries.
491 ///
492 /// @param elf_handle the elf handle to consider.
493 ///
494 /// @return the .opd section, if found.  Return nil otherwise.
495 Elf_Scn*
find_opd_section(Elf * elf_handle)496 find_opd_section(Elf* elf_handle)
497 {return find_section(elf_handle, ".opd", SHT_PROGBITS);}
498 
499 /// Return the SHT_GNU_versym, SHT_GNU_verdef and SHT_GNU_verneed
500 /// sections that are involved in symbol versionning.
501 ///
502 /// @param elf_handle the elf handle to use.
503 ///
504 /// @param versym_section the SHT_GNU_versym section found.  If the
505 /// section wasn't found, this is set to nil.
506 ///
507 /// @param verdef_section the SHT_GNU_verdef section found.  If the
508 /// section wasn't found, this is set to nil.
509 ///
510 /// @param verneed_section the SHT_GNU_verneed section found.  If the
511 /// section wasn't found, this is set to nil.
512 ///
513 /// @return true iff at least one of the sections where found.
514 bool
get_symbol_versionning_sections(Elf * elf_handle,Elf_Scn * & versym_section,Elf_Scn * & verdef_section,Elf_Scn * & verneed_section)515 get_symbol_versionning_sections(Elf*		elf_handle,
516 				Elf_Scn*&	versym_section,
517 				Elf_Scn*&	verdef_section,
518 				Elf_Scn*&	verneed_section)
519 {
520   Elf_Scn* section = NULL;
521   GElf_Shdr mem;
522   Elf_Scn* versym = NULL, *verdef = NULL, *verneed = NULL;
523 
524   while ((section = elf_nextscn(elf_handle, section)) != NULL)
525     {
526       GElf_Shdr* h = gelf_getshdr(section, &mem);
527       if (h->sh_type == SHT_GNU_versym)
528 	versym = section;
529       else if (h->sh_type == SHT_GNU_verdef)
530 	verdef = section;
531       else if (h->sh_type == SHT_GNU_verneed)
532 	verneed = section;
533     }
534 
535   if (versym || verdef || verneed)
536     {
537       // At least one the versionning sections was found.  Return it.
538       versym_section = versym;
539       verdef_section = verdef;
540       verneed_section = verneed;
541       return true;
542     }
543 
544   return false;
545 }
546 
547 /// Return the __ksymtab section of a linux kernel ELF file (either
548 /// a vmlinux binary or a kernel module).
549 ///
550 /// @param elf_handle the elf handle to consider.
551 ///
552 /// @return the __ksymtab section if found, nil otherwise.
553 Elf_Scn*
find_ksymtab_section(Elf * elf_handle)554 find_ksymtab_section(Elf* elf_handle)
555 {return find_section(elf_handle, "__ksymtab", SHT_PROGBITS);}
556 
557 /// Return the __ksymtab_gpl section of a linux kernel ELF file (either
558 /// a vmlinux binary or a kernel module).
559 ///
560 /// @param elf_handle the elf handle to consider.
561 ///
562 /// @return the __ksymtab section if found, nil otherwise.
563 Elf_Scn*
find_ksymtab_gpl_section(Elf * elf_handle)564 find_ksymtab_gpl_section(Elf* elf_handle)
565 {return find_section(elf_handle, "__ksymtab_gpl", SHT_PROGBITS);}
566 
567 /// Find the __ksymtab_strings section of a Linux kernel binary.
568 ///
569 /// @param elf_handle the elf handle to use.
570 ///
571 /// @return the find_ksymtab_strings_section of the linux kernel
572 /// binary denoted by @p elf_handle, or nil if such a section could
573 /// not be found.
574 Elf_Scn*
find_ksymtab_strings_section(Elf * elf_handle)575 find_ksymtab_strings_section(Elf *elf_handle)
576 {
577   if (is_linux_kernel(elf_handle))
578     return find_section(elf_handle, "__ksymtab_strings", SHT_PROGBITS);
579   return 0;
580 }
581 
582 /// Return the .rel{a,} section corresponding to a given section.
583 ///
584 /// @param elf_handle the elf handle to consider.
585 ///
586 /// @param target_section the section to search the relocation section for
587 ///
588 /// @return the .rel{a,} section if found, null otherwise.
589 Elf_Scn*
find_relocation_section(Elf * elf_handle,Elf_Scn * target_section)590 find_relocation_section(Elf* elf_handle, Elf_Scn* target_section)
591 {
592   if (target_section)
593     {
594       // the relo section we are searching for has this index as sh_info
595       size_t target_index = elf_ndxscn(target_section);
596 
597       // now iterate over all the sections, look for relocation sections and
598       // find the one that points to the section we are searching for
599       Elf_Scn*	section = 0;
600       GElf_Shdr header_mem, *header;
601       while ((section = elf_nextscn(elf_handle, section)) != 0)
602 	{
603 	  header = gelf_getshdr(section, &header_mem);
604 	  if (header == NULL
605 	      || (header->sh_type != SHT_RELA && header->sh_type != SHT_REL))
606 	    continue;
607 
608 	  if (header->sh_info == target_index)
609 	    return section;
610 	}
611     }
612   return NULL;
613 }
614 
615 /// Get the version definition (from the SHT_GNU_verdef section) of a
616 /// given symbol represented by a pointer to GElf_Versym.
617 ///
618 /// @param elf_hande the elf handle to use.
619 ///
620 /// @param versym the symbol to get the version definition for.
621 ///
622 /// @param verdef_section the SHT_GNU_verdef section.
623 ///
624 /// @param version the resulting version definition.  This is set iff
625 /// the function returns true.
626 ///
627 /// @return true upon successful completion, false otherwise.
628 bool
get_version_definition_for_versym(Elf * elf_handle,GElf_Versym * versym,Elf_Scn * verdef_section,elf_symbol::version & version)629 get_version_definition_for_versym(Elf*			 elf_handle,
630 				  GElf_Versym*		 versym,
631 				  Elf_Scn*		 verdef_section,
632 				  elf_symbol::version&	 version)
633 {
634   Elf_Data* verdef_data = elf_getdata(verdef_section, NULL);
635   GElf_Verdef verdef_mem;
636   GElf_Verdef* verdef = gelf_getverdef(verdef_data, 0, &verdef_mem);
637   size_t vd_offset = 0;
638 
639   for (;; vd_offset += verdef->vd_next)
640     {
641       for (;verdef != 0;)
642 	{
643 	  if (verdef->vd_ndx == (*versym & 0x7fff))
644 	    // Found the version of the symbol.
645 	    break;
646 	  vd_offset += verdef->vd_next;
647 	  verdef = (verdef->vd_next == 0
648 		    ? 0
649 		    : gelf_getverdef(verdef_data, vd_offset, &verdef_mem));
650 	}
651 
652       if (verdef != 0)
653 	{
654 	  GElf_Verdaux verdaux_mem;
655 	  GElf_Verdaux *verdaux = gelf_getverdaux(verdef_data,
656 						  vd_offset + verdef->vd_aux,
657 						  &verdaux_mem);
658 	  GElf_Shdr header_mem;
659 	  GElf_Shdr* verdef_section_header = gelf_getshdr(verdef_section,
660 							  &header_mem);
661 	  size_t verdef_stridx = verdef_section_header->sh_link;
662 	  version.str(elf_strptr(elf_handle, verdef_stridx, verdaux->vda_name));
663 	  if (*versym & 0x8000)
664 	    version.is_default(false);
665 	  else
666 	    version.is_default(true);
667 	  return true;
668 	}
669       if (!verdef || verdef->vd_next == 0)
670 	break;
671     }
672   return false;
673 }
674 
675 /// Get the version needed (from the SHT_GNU_verneed section) to
676 /// resolve an undefined symbol represented by a pointer to
677 /// GElf_Versym.
678 ///
679 /// @param elf_hande the elf handle to use.
680 ///
681 /// @param versym the symbol to get the version definition for.
682 ///
683 /// @param verneed_section the SHT_GNU_verneed section.
684 ///
685 /// @param version the resulting version definition.  This is set iff
686 /// the function returns true.
687 ///
688 /// @return true upon successful completion, false otherwise.
689 bool
get_version_needed_for_versym(Elf * elf_handle,GElf_Versym * versym,Elf_Scn * verneed_section,elf_symbol::version & version)690 get_version_needed_for_versym(Elf*			elf_handle,
691 			      GElf_Versym*		versym,
692 			      Elf_Scn*			verneed_section,
693 			      elf_symbol::version&	version)
694 {
695   if (versym == 0 || elf_handle == 0 || verneed_section == 0)
696     return false;
697 
698   size_t vn_offset = 0;
699   Elf_Data* verneed_data = elf_getdata(verneed_section, NULL);
700   GElf_Verneed verneed_mem;
701   GElf_Verneed* verneed = gelf_getverneed(verneed_data, 0, &verneed_mem);
702 
703   for (;verneed; vn_offset += verneed->vn_next)
704     {
705       size_t vna_offset = vn_offset;
706       GElf_Vernaux vernaux_mem;
707       GElf_Vernaux *vernaux = gelf_getvernaux(verneed_data,
708 					      vn_offset + verneed->vn_aux,
709 					      &vernaux_mem);
710       for (;vernaux != 0 && verneed;)
711 	{
712 	  if (vernaux->vna_other == *versym)
713 	    // Found the version of the symbol.
714 	    break;
715 	  vna_offset += verneed->vn_next;
716 	  verneed = (verneed->vn_next == 0
717 		     ? 0
718 		     : gelf_getverneed(verneed_data, vna_offset, &verneed_mem));
719 	}
720 
721       if (verneed != 0 && vernaux != 0 && vernaux->vna_other == *versym)
722 	{
723 	  GElf_Shdr header_mem;
724 	  GElf_Shdr* verneed_section_header = gelf_getshdr(verneed_section,
725 							   &header_mem);
726 	  size_t verneed_stridx = verneed_section_header->sh_link;
727 	  version.str(elf_strptr(elf_handle,
728 				 verneed_stridx,
729 				 vernaux->vna_name));
730 	  if (*versym & 0x8000)
731 	    version.is_default(false);
732 	  else
733 	    version.is_default(true);
734 	  return true;
735 	}
736 
737       if (!verneed || verneed->vn_next == 0)
738 	break;
739     }
740   return false;
741 }
742 
743 /// Return the version for a symbol that is at a given index in its
744 /// SHT_SYMTAB section.
745 ///
746 /// @param elf_handle the elf handle to use.
747 ///
748 /// @param symbol_index the index of the symbol to consider.
749 ///
750 /// @param get_def_version if this is true, it means that that we want
751 /// the version for a defined symbol; in that case, the version is
752 /// looked for in a section of type SHT_GNU_verdef.  Otherwise, if
753 /// this parameter is false, this means that we want the version for
754 /// an undefined symbol; in that case, the version is the needed one
755 /// for the symbol to be resolved; so the version is looked fo in a
756 /// section of type SHT_GNU_verneed.
757 ///
758 /// @param version the version found for symbol at @p symbol_index.
759 ///
760 /// @return true iff a version was found for symbol at index @p
761 /// symbol_index.
762 bool
get_version_for_symbol(Elf * elf_handle,size_t symbol_index,bool get_def_version,elf_symbol::version & version)763 get_version_for_symbol(Elf*			elf_handle,
764 		       size_t			symbol_index,
765 		       bool			get_def_version,
766 		       elf_symbol::version&	version)
767 {
768   Elf_Scn *versym_section = NULL,
769     *verdef_section = NULL,
770     *verneed_section = NULL;
771 
772   if (!get_symbol_versionning_sections(elf_handle,
773 				       versym_section,
774 				       verdef_section,
775 				       verneed_section))
776     return false;
777 
778   GElf_Versym versym_mem;
779   Elf_Data* versym_data = (versym_section)
780     ? elf_getdata(versym_section, NULL)
781     : NULL;
782   GElf_Versym* versym = (versym_data)
783     ? gelf_getversym(versym_data, symbol_index, &versym_mem)
784     : NULL;
785 
786   if (versym == 0 || *versym <= 1)
787     // I got these value from the code of readelf.c in elfutils.
788     // Apparently, if the symbol version entry has these values, the
789     // symbol must be discarded. This is not documented in the
790     // official specification.
791     return false;
792 
793   if (get_def_version)
794     {
795       if (*versym == 0x8001)
796 	// I got this value from the code of readelf.c in elfutils
797 	// too.  It's not really documented in the official
798 	// specification.
799 	return false;
800 
801       if (verdef_section
802 	  && get_version_definition_for_versym(elf_handle, versym,
803 					       verdef_section, version))
804 	return true;
805     }
806   else
807     {
808       if (verneed_section
809 	  && get_version_needed_for_versym(elf_handle, versym,
810 					   verneed_section, version))
811 	return true;
812     }
813 
814   return false;
815 }
816 
817 /// Test if the architecture of the current binary is ppc64.
818 ///
819 /// @param elf_handle the ELF handle to consider.
820 ///
821 /// @return true iff the architecture of the current binary is ppc64.
822 bool
architecture_is_ppc64(Elf * elf_handle)823 architecture_is_ppc64(Elf* elf_handle)
824 {
825   GElf_Ehdr  eh_mem;
826   GElf_Ehdr* elf_header = gelf_getehdr(elf_handle, &eh_mem);
827   return (elf_header && elf_header->e_machine == EM_PPC64);
828 }
829 
830 /// Test if the architecture of the current binary is arm32.
831 ///
832 /// @param elf_handle the ELF handle to consider.
833 ///
834 /// @return true iff the architecture of the current binary is arm32.
835 bool
architecture_is_arm32(Elf * elf_handle)836 architecture_is_arm32(Elf* elf_handle)
837 {
838   GElf_Ehdr  eh_mem;
839   GElf_Ehdr* elf_header = gelf_getehdr(elf_handle, &eh_mem);
840   return (elf_header && elf_header->e_machine == EM_ARM);
841 }
842 
843 /// Test if the endianness of the current binary is Big Endian.
844 ///
845 /// https://en.wikipedia.org/wiki/Endianness.
846 ///
847 /// @param elf_handle the ELF handle to consider.
848 ///
849 /// @return true iff the current binary is Big Endian.
850 bool
architecture_is_big_endian(Elf * elf_handle)851 architecture_is_big_endian(Elf* elf_handle)
852 {
853   GElf_Ehdr  elf_header;
854   gelf_getehdr(elf_handle, &elf_header);
855 
856   bool is_big_endian = (elf_header.e_ident[EI_DATA] == ELFDATA2MSB);
857 
858   if (!is_big_endian)
859     ABG_ASSERT(elf_header.e_ident[EI_DATA] == ELFDATA2LSB);
860 
861   return is_big_endian;
862 }
863 
864 /// Read N bytes and convert their value into an integer type T.
865 ///
866 /// Note that N cannot be bigger than 8 for now. The type passed needs to be at
867 /// least of the size of number_of_bytes.
868 ///
869 /// @param bytes the array of bytes to read the next 8 bytes from.
870 /// Note that this array must be at least 8 bytes long.
871 ///
872 /// @param number_of_bytes the number of bytes to read.  This number
873 /// cannot be bigger than 8.
874 ///
875 /// @param is_big_endian if true, read the 8 bytes in Big Endian
876 /// mode, otherwise, read them in Little Endian.
877 ///
878 /// @param result where to store the resuting integer that was read.
879 ///
880 ///
881 /// @param true if the 8 bytes could be read, false otherwise.
882 template <typename T>
883 bool
read_int_from_array_of_bytes(const uint8_t * bytes,unsigned char number_of_bytes,bool is_big_endian,T & result)884 read_int_from_array_of_bytes(const uint8_t* bytes,
885 			     unsigned char  number_of_bytes,
886 			     bool	    is_big_endian,
887 			     T&		    result)
888 {
889   if (!bytes)
890     return false;
891 
892   ABG_ASSERT(number_of_bytes <= 8);
893   ABG_ASSERT(number_of_bytes <= sizeof(T));
894 
895   T res = 0;
896 
897   const uint8_t* cur = bytes;
898   if (is_big_endian)
899     {
900       // In Big Endian, the most significant byte is at the lowest
901       // address.
902       const uint8_t* msb = cur;
903       res = *msb;
904 
905       // Now read the remaining least significant bytes.
906       for (uint i = 1; i < number_of_bytes; ++i)
907 	res = (res << 8) | ((T)msb[i]);
908     }
909   else
910     {
911       // In Little Endian, the least significant byte is at the
912       // lowest address.
913       const uint8_t* lsb = cur;
914       res = *lsb;
915       // Now read the remaining most significant bytes.
916       for (uint i = 1; i < number_of_bytes; ++i)
917 	res = res | (((T)lsb[i]) << i * 8);
918     }
919 
920   result = res;
921   return true;
922 }
923 
924 /// Read 8 bytes and convert their value into an uint64_t.
925 ///
926 /// @param bytes the array of bytes to read the next 8 bytes from.
927 /// Note that this array must be at least 8 bytes long.
928 ///
929 /// @param result where to store the resuting uint64_t that was read.
930 ///
931 /// @param is_big_endian if true, read the 8 bytes in Big Endian
932 /// mode, otherwise, read them in Little Endian.
933 ///
934 /// @param true if the 8 bytes could be read, false otherwise.
935 bool
read_uint64_from_array_of_bytes(const uint8_t * bytes,bool is_big_endian,uint64_t & result)936 read_uint64_from_array_of_bytes(const uint8_t* bytes,
937 				bool	       is_big_endian,
938 				uint64_t&      result)
939 {
940   return read_int_from_array_of_bytes(bytes, 8, is_big_endian, result);
941 }
942 
943 
944 /// Lookup the address of the function entry point that corresponds
945 /// to the address of a given function descriptor.
946 ///
947 /// On PPC64, a function pointer is the address of a function
948 /// descriptor.  Function descriptors are located in the .opd
949 /// section.  Each function descriptor is a triplet of three
950 /// addresses, each one on 64 bits.  Among those three address only
951 /// the first one is of any interest to us: the address of the entry
952 /// point of the function.
953 ///
954 /// This function returns the address of the entry point of the
955 /// function whose descriptor's address is given.
956 ///
957 /// http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi.html#FUNC-DES
958 ///
959 /// https://www.ibm.com/developerworks/community/blogs/5894415f-be62-4bc0-81c5-3956e82276f3/entry/deeply_understand_64_bit_powerpc_elf_abi_function_descriptors?lang=en
960 ///
961 /// @param fn_desc_address the address of the function descriptor to
962 /// consider.
963 ///
964 /// @return the address of the entry point of the function whose
965 /// descriptor has the address @p fn_desc_address.  If there is no
966 /// .opd section (e.g because we are not on ppc64) or more generally
967 /// if the function descriptor could not be found then this function
968 /// just returns the address of the fuction descriptor.
969 GElf_Addr
lookup_ppc64_elf_fn_entry_point_address(Elf * elf_handle,GElf_Addr fn_desc_address)970 lookup_ppc64_elf_fn_entry_point_address(Elf* elf_handle, GElf_Addr fn_desc_address)
971 {
972   if (!elf_handle)
973     return fn_desc_address;
974 
975   if (!architecture_is_ppc64(elf_handle))
976     return fn_desc_address;
977 
978   bool is_big_endian = architecture_is_big_endian(elf_handle);
979 
980   Elf_Scn* opd_section = find_opd_section(elf_handle);
981   if (!opd_section)
982     return fn_desc_address;
983 
984   GElf_Shdr header_mem;
985   // The section header of the .opd section.
986   GElf_Shdr* opd_sheader = gelf_getshdr(opd_section, &header_mem);
987 
988   // The offset of the function descriptor entry, in the .opd
989   // section.
990   size_t    fn_desc_offset = fn_desc_address - opd_sheader->sh_addr;
991   Elf_Data* elf_data = elf_rawdata(opd_section, 0);
992 
993   // Ensure that the opd_section has at least 8 bytes, starting from
994   // the offset we want read the data from.
995   if (elf_data->d_size <= fn_desc_offset + 8)
996     return fn_desc_address;
997 
998   // A pointer to the data of the .opd section, that we can actually
999   // do something with.
1000   uint8_t* bytes = (uint8_t*)elf_data->d_buf;
1001 
1002   // The resulting address we are looking for is going to be formed
1003   // in this variable.
1004   GElf_Addr result = 0;
1005   ABG_ASSERT(read_uint64_from_array_of_bytes(bytes + fn_desc_offset,
1006 					     is_big_endian, result));
1007 
1008   return result;
1009 }
1010 
1011 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1012 /// Kernel Module.
1013 ///
1014 /// @param elf_handle the ELF handle to consider.
1015 ///
1016 /// @return true iff the binary denoted by @p elf_handle is a Linux
1017 /// kernel module.
1018 bool
is_linux_kernel_module(Elf * elf_handle)1019 is_linux_kernel_module(Elf *elf_handle)
1020 {
1021   return (find_section(elf_handle, ".modinfo", SHT_PROGBITS)
1022 	  && find_section(elf_handle,
1023 			  ".gnu.linkonce.this_module",
1024 			  SHT_PROGBITS));
1025 }
1026 
1027 /// Test if the ELF binary denoted by a given ELF handle is a Linux
1028 /// Kernel binary (either vmlinux or a kernel module).
1029 ///
1030 /// @param elf_handle the ELF handle to consider.
1031 ///
1032 /// @return true iff the binary denoted by @p elf_handle is a Linux
1033 /// kernel binary
1034 bool
is_linux_kernel(Elf * elf_handle)1035 is_linux_kernel(Elf *elf_handle)
1036 {
1037   return (find_section(elf_handle,
1038 		       "__ksymtab_strings",
1039 		       SHT_PROGBITS)
1040 	  || is_linux_kernel_module(elf_handle));
1041 }
1042 
1043 /// Get the address at which a given binary is loaded in memory.
1044 ///
1045 /// @param elf_handle the elf handle for the binary to consider.
1046 ///
1047 /// @param load_address the address where the binary is loaded.  This
1048 /// is set by the function iff it returns true.
1049 ///
1050 /// @return true if the function could get the binary load address
1051 /// and assign @p load_address to it.
1052 bool
get_binary_load_address(Elf * elf_handle,GElf_Addr & load_address)1053 get_binary_load_address(Elf* elf_handle, GElf_Addr& load_address)
1054 {
1055   GElf_Ehdr elf_header;
1056   gelf_getehdr(elf_handle, &elf_header);
1057   size_t num_segments = elf_header.e_phnum;
1058   GElf_Phdr *program_header = NULL;
1059   GElf_Addr result;
1060   bool found_loaded_segment = false;
1061   GElf_Phdr ph_mem;
1062 
1063   for (unsigned i = 0; i < num_segments; ++i)
1064     {
1065       program_header = gelf_getphdr(elf_handle, i, &ph_mem);
1066       if (program_header && program_header->p_type == PT_LOAD)
1067 	{
1068 	  if (!found_loaded_segment)
1069 	    {
1070 	      result = program_header->p_vaddr;
1071 	      found_loaded_segment = true;
1072 	    }
1073 
1074 	  if (program_header->p_vaddr < result)
1075 	    // The resulting load address we want is the lowest
1076 	    // load address of all the loaded segments.
1077 	    result = program_header->p_vaddr;
1078 	}
1079     }
1080 
1081   if (found_loaded_segment)
1082     {
1083       load_address = result;
1084       return true;
1085     }
1086   return false;
1087 }
1088 
1089 /// Return the size of a word for the current architecture.
1090 ///
1091 /// @param elf_handle the ELF handle to consider.
1092 ///
1093 /// @return the size of a word.
1094 unsigned char
get_architecture_word_size(Elf * elf_handle)1095 get_architecture_word_size(Elf* elf_handle)
1096 {
1097   unsigned char word_size = 0;
1098   GElf_Ehdr	elf_header;
1099   gelf_getehdr(elf_handle, &elf_header);
1100   if (elf_header.e_ident[EI_CLASS] == ELFCLASS32)
1101     word_size = 4;
1102   else if (elf_header.e_ident[EI_CLASS] == ELFCLASS64)
1103     word_size = 8;
1104   else
1105     ABG_ASSERT_NOT_REACHED;
1106   return word_size;
1107 }
1108 
1109 /// Test if the elf file being read is an executable.
1110 ///
1111 /// @param elf_handle the ELF handle to consider.
1112 ///
1113 /// @return true iff the elf file being read is an / executable.
1114 bool
is_executable(Elf * elf_handle)1115 is_executable(Elf* elf_handle)
1116 {
1117   GElf_Ehdr  elf_header;
1118   gelf_getehdr(elf_handle, &elf_header);
1119   return elf_header.e_type == ET_EXEC;
1120 }
1121 
1122 /// Test if the elf file being read is a dynamic shared / object.
1123 ///
1124 /// @param elf_handle the ELF handle to consider.
1125 ///
1126 /// @return true iff the elf file being read is a / dynamic shared object.
1127 bool
is_dso(Elf * elf_handle)1128 is_dso(Elf* elf_handle)
1129 {
1130   GElf_Ehdr  elf_header;
1131   gelf_getehdr(elf_handle, &elf_header);
1132   return elf_header.e_type == ET_DYN;
1133 }
1134 
1135 /// Translate a section-relative symbol address (i.e, symbol value)
1136 /// into an absolute symbol address by adding the address of the
1137 /// section the symbol belongs to, to the address value.
1138 ///
1139 /// This is useful when looking at symbol values coming from
1140 /// relocatable files (of ET_REL kind).  If the binary is not
1141 /// ET_REL, then the function does nothing and returns the input
1142 /// address unchanged.
1143 ///
1144 /// @param elf_handle the elf handle for the binary to consider.
1145 ///
1146 /// @param sym the symbol whose address to possibly needs to be
1147 /// translated.
1148 ///
1149 /// @return the section-relative address, translated into an
1150 /// absolute address, if @p sym is from an ET_REL binary.
1151 /// Otherwise, return the address of @p sym, unchanged.
1152 GElf_Addr
maybe_adjust_et_rel_sym_addr_to_abs_addr(Elf * elf_handle,GElf_Sym * sym)1153 maybe_adjust_et_rel_sym_addr_to_abs_addr(Elf* elf_handle, GElf_Sym* sym)
1154 {
1155   Elf_Scn*  symbol_section = elf_getscn(elf_handle, sym->st_shndx);
1156   GElf_Addr addr = sym->st_value;
1157 
1158   if (!symbol_section)
1159     return addr;
1160 
1161   GElf_Ehdr elf_header;
1162   if (!gelf_getehdr(elf_handle, &elf_header))
1163     return addr;
1164 
1165   if (elf_header.e_type != ET_REL)
1166     return addr;
1167 
1168   GElf_Shdr section_header;
1169   if (!gelf_getshdr(symbol_section, &section_header))
1170     return addr;
1171 
1172   return addr + section_header.sh_addr;
1173 }
1174 
1175 /// Test if a given address is in a given section.
1176 ///
1177 /// @param addr the address to consider.
1178 ///
1179 /// @param section the section to consider.
1180 ///
1181 /// @return true iff @p addr is in section @p section.
1182 bool
address_is_in_section(Dwarf_Addr addr,Elf_Scn * section)1183 address_is_in_section(Dwarf_Addr addr, Elf_Scn* section)
1184 {
1185   if (!section)
1186     return false;
1187 
1188   GElf_Shdr  sheader_mem;
1189   GElf_Shdr* sheader = gelf_getshdr(section, &sheader_mem);
1190 
1191   if (sheader->sh_addr <= addr && addr <= sheader->sh_addr + sheader->sh_size)
1192     return true;
1193 
1194   return false;
1195 }
1196 
1197 /// Return true if an address is in the ".opd" section that is
1198 /// present on the ppc64 platform.
1199 ///
1200 /// @param addr the address to consider.
1201 ///
1202 /// @return true iff @p addr designates a word that is in the ".opd"
1203 /// section.
1204 bool
address_is_in_opd_section(Elf * elf_handle,Dwarf_Addr addr)1205 address_is_in_opd_section(Elf* elf_handle, Dwarf_Addr addr)
1206 {
1207   Elf_Scn * opd_section = find_opd_section(elf_handle);
1208   if (!opd_section)
1209     return false;
1210   if (address_is_in_section(addr, opd_section))
1211     return true;
1212   return false;
1213 }
1214 
1215 
1216 } // end namespace elf_helpers
1217 } // end namespace abigail
1218