1 //===- ELFReader.cpp ------------------------------------------------------===//
2 //
3 //                     The MCLinker Project
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "mcld/LD/ELFReader.h"
10 
11 #include "mcld/IRBuilder.h"
12 #include "mcld/Fragment/FillFragment.h"
13 #include "mcld/LD/EhFrame.h"
14 #include "mcld/LD/LDContext.h"
15 #include "mcld/LD/SectionData.h"
16 #include "mcld/Object/ObjectBuilder.h"
17 #include "mcld/Support/MemoryArea.h"
18 #include "mcld/Support/MsgHandling.h"
19 #include "mcld/Target/GNUInfo.h"
20 #include "mcld/Target/GNULDBackend.h"
21 
22 #include <llvm/ADT/StringRef.h>
23 #include <llvm/ADT/Twine.h>
24 #include <llvm/Support/ELF.h>
25 #include <llvm/Support/Host.h>
26 
27 #include <iostream>
28 
29 #include <cstring>
30 
31 namespace mcld {
32 
33 //===----------------------------------------------------------------------===//
34 // ELFReader<32, true>
35 //===----------------------------------------------------------------------===//
36 /// constructor
ELFReader(GNULDBackend & pBackend)37 ELFReader<32, true>::ELFReader(GNULDBackend& pBackend) : ELFReaderIF(pBackend) {
38 }
39 
40 /// destructor
~ELFReader()41 ELFReader<32, true>::~ELFReader() {
42 }
43 
44 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const45 bool ELFReader<32, true>::isELF(const void* pELFHeader) const {
46   const llvm::ELF::Elf32_Ehdr* hdr =
47       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
48   if (memcmp(llvm::ELF::ElfMagic, hdr, 4) == 0)
49     return true;
50   return false;
51 }
52 
53 /// readRegularSection - read a regular section and create fragments.
readRegularSection(Input & pInput,SectionData & pSD) const54 bool ELFReader<32, true>::readRegularSection(Input& pInput,
55                                              SectionData& pSD) const {
56   uint32_t offset = pInput.fileOffset() + pSD.getSection().offset();
57   uint32_t size = pSD.getSection().size();
58 
59   Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
60   ObjectBuilder::AppendFragment(*frag, pSD);
61   return true;
62 }
63 
64 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const65 bool ELFReader<32, true>::readSymbols(Input& pInput,
66                                       IRBuilder& pBuilder,
67                                       llvm::StringRef pRegion,
68                                       const char* pStrTab) const {
69   // get number of symbols
70   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Sym);
71   const llvm::ELF::Elf32_Sym* symtab =
72       reinterpret_cast<const llvm::ELF::Elf32_Sym*>(pRegion.begin());
73 
74   uint32_t st_name = 0x0;
75   uint32_t st_value = 0x0;
76   uint32_t st_size = 0x0;
77   uint8_t st_info = 0x0;
78   uint8_t st_other = 0x0;
79   uint16_t st_shndx = 0x0;
80 
81   // skip the first NULL symbol
82   pInput.context()->addSymbol(LDSymbol::Null());
83 
84   /// recording symbols added from DynObj to analyze weak alias
85   std::vector<AliasInfo> potential_aliases;
86   bool is_dyn_obj = (pInput.type() == Input::DynObj);
87   for (size_t idx = 1; idx < entsize; ++idx) {
88     st_info = symtab[idx].st_info;
89     st_other = symtab[idx].st_other;
90 
91     if (llvm::sys::IsLittleEndianHost) {
92       st_name = symtab[idx].st_name;
93       st_value = symtab[idx].st_value;
94       st_size = symtab[idx].st_size;
95       st_shndx = symtab[idx].st_shndx;
96     } else {
97       st_name = mcld::bswap32(symtab[idx].st_name);
98       st_value = mcld::bswap32(symtab[idx].st_value);
99       st_size = mcld::bswap32(symtab[idx].st_size);
100       st_shndx = mcld::bswap16(symtab[idx].st_shndx);
101     }
102 
103     // If the section should not be included, set the st_shndx SHN_UNDEF
104     // - A section in interrelated groups are not included.
105     if (pInput.type() == Input::Object && st_shndx < llvm::ELF::SHN_LORESERVE &&
106         st_shndx != llvm::ELF::SHN_UNDEF) {
107       if (pInput.context()->getSection(st_shndx) == NULL)
108         st_shndx = llvm::ELF::SHN_UNDEF;
109     }
110 
111     // get ld_type
112     ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
113 
114     // get ld_desc
115     ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
116 
117     // get ld_binding
118     ResolveInfo::Binding ld_binding =
119         getSymBinding((st_info >> 4), st_shndx, st_other);
120 
121     // get ld_value - ld_value must be section relative.
122     uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
123 
124     // get ld_vis
125     ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
126 
127     // get section
128     LDSection* section = NULL;
129     if (st_shndx < llvm::ELF::SHN_LORESERVE)  // including ABS and COMMON
130       section = pInput.context()->getSection(st_shndx);
131 
132     // get ld_name
133     std::string ld_name;
134     if (ResolveInfo::Section == ld_type) {
135       // Section symbol's st_name is the section index.
136       assert(section != NULL && "get a invalid section");
137       ld_name = section->name();
138     } else {
139       ld_name = std::string(pStrTab + st_name);
140     }
141 
142     LDSymbol* psym = pBuilder.AddSymbol(pInput,
143                                         ld_name,
144                                         ld_type,
145                                         ld_desc,
146                                         ld_binding,
147                                         st_size,
148                                         ld_value,
149                                         section,
150                                         ld_vis);
151 
152     if (is_dyn_obj && psym != NULL && ResolveInfo::Undefined != ld_desc &&
153         (ResolveInfo::Global == ld_binding ||
154          ResolveInfo::Weak == ld_binding) &&
155         ResolveInfo::Object == ld_type) {
156       AliasInfo p;
157       p.pt_alias = psym;
158       p.ld_binding = ld_binding;
159       p.ld_value = ld_value;
160       potential_aliases.push_back(p);
161     }
162   }  // end of for loop
163 
164   // analyze weak alias
165   // FIXME: it is better to let IRBuilder handle alias anlysis.
166   //        1. eliminate code duplication
167   //        2. easy to know if a symbol is from .so
168   //           (so that it may be a potential alias)
169   if (is_dyn_obj) {
170     // sort symbols by symbol value and then weak before strong
171     std::sort(potential_aliases.begin(), potential_aliases.end(), less);
172 
173     // for each weak symbol, find out all its aliases, and
174     // then link them as a circular list in Module
175     std::vector<AliasInfo>::iterator sym_it, sym_e;
176     sym_e = potential_aliases.end();
177     for (sym_it = potential_aliases.begin(); sym_it != sym_e; ++sym_it) {
178       if (ResolveInfo::Weak != sym_it->ld_binding)
179         continue;
180 
181       Module& pModule = pBuilder.getModule();
182       std::vector<AliasInfo>::iterator alias_it = sym_it + 1;
183       while (alias_it != sym_e) {
184         if (sym_it->ld_value != alias_it->ld_value)
185           break;
186 
187         if (sym_it + 1 == alias_it)
188           pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
189         pModule.addAlias(*alias_it->pt_alias->resolveInfo());
190         ++alias_it;
191       }
192 
193       sym_it = alias_it - 1;
194     }  // end of for loop
195   }
196 
197   return true;
198 }
199 
200 //===----------------------------------------------------------------------===//
201 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
202 //===----------------------------------------------------------------------===//
203 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const204 bool ELFReader<32, true>::readRela(Input& pInput,
205                                    LDSection& pSection,
206                                    llvm::StringRef pRegion) const {
207   // get the number of rela
208   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rela);
209   const llvm::ELF::Elf32_Rela* relaTab =
210       reinterpret_cast<const llvm::ELF::Elf32_Rela*>(pRegion.begin());
211 
212   for (size_t idx = 0; idx < entsize; ++idx) {
213     Relocation::Type r_type = 0x0;
214     uint32_t r_sym = 0x0;
215     uint32_t r_offset = 0x0;
216     int32_t r_addend = 0;
217     if (!target()
218              .readRelocation(relaTab[idx], r_type, r_sym, r_offset, r_addend)) {
219       return false;
220     }
221 
222     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
223     if (symbol == NULL) {
224       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
225     }
226 
227     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
228   }  // end of for
229   return true;
230 }
231 
232 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const233 bool ELFReader<32, true>::readRel(Input& pInput,
234                                   LDSection& pSection,
235                                   llvm::StringRef pRegion) const {
236   // get the number of rel
237   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf32_Rel);
238   const llvm::ELF::Elf32_Rel* relTab =
239       reinterpret_cast<const llvm::ELF::Elf32_Rel*>(pRegion.begin());
240 
241   for (size_t idx = 0; idx < entsize; ++idx) {
242     Relocation::Type r_type = 0x0;
243     uint32_t r_sym = 0x0;
244     uint32_t r_offset = 0x0;
245 
246     if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
247       return false;
248 
249     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
250     if (symbol == NULL) {
251       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
252     }
253 
254     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
255   }  // end of for
256   return true;
257 }
258 
259 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const260 bool ELFReader<32, true>::isMyEndian(const void* pELFHeader) const {
261   const llvm::ELF::Elf32_Ehdr* hdr =
262       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
263 
264   return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
265 }
266 
267 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const268 bool ELFReader<32, true>::isMyMachine(const void* pELFHeader) const {
269   const llvm::ELF::Elf32_Ehdr* hdr =
270       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
271 
272   if (llvm::sys::IsLittleEndianHost)
273     return (hdr->e_machine == target().getInfo().machine());
274   return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
275 }
276 
277 /// fileType - return the file type
fileType(const void * pELFHeader) const278 Input::Type ELFReader<32, true>::fileType(const void* pELFHeader) const {
279   const llvm::ELF::Elf32_Ehdr* hdr =
280       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
281   uint32_t type = 0x0;
282   if (llvm::sys::IsLittleEndianHost)
283     type = hdr->e_type;
284   else
285     type = mcld::bswap16(hdr->e_type);
286 
287   switch (type) {
288     case llvm::ELF::ET_REL:
289       return Input::Object;
290     case llvm::ELF::ET_EXEC:
291       return Input::Exec;
292     case llvm::ELF::ET_DYN:
293       return Input::DynObj;
294     case llvm::ELF::ET_CORE:
295       return Input::CoreFile;
296     case llvm::ELF::ET_NONE:
297     default:
298       return Input::Unknown;
299   }
300 }
301 
302 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const303 bool ELFReader<32, true>::readSectionHeaders(Input& pInput,
304                                              const void* pELFHeader) const {
305   const llvm::ELF::Elf32_Ehdr* ehdr =
306       reinterpret_cast<const llvm::ELF::Elf32_Ehdr*>(pELFHeader);
307 
308   uint32_t shoff = 0x0;
309   uint16_t shentsize = 0x0;
310   uint32_t shnum = 0x0;
311   uint32_t shstrtab = 0x0;
312 
313   if (llvm::sys::IsLittleEndianHost) {
314     shoff = ehdr->e_shoff;
315     shentsize = ehdr->e_shentsize;
316     shnum = ehdr->e_shnum;
317     shstrtab = ehdr->e_shstrndx;
318   } else {
319     shoff = mcld::bswap32(ehdr->e_shoff);
320     shentsize = mcld::bswap16(ehdr->e_shentsize);
321     shnum = mcld::bswap16(ehdr->e_shnum);
322     shstrtab = mcld::bswap16(ehdr->e_shstrndx);
323   }
324 
325   // If the file has no section header table, e_shoff holds zero.
326   if (shoff == 0x0)
327     return true;
328 
329   const llvm::ELF::Elf32_Shdr* shdr = NULL;
330   llvm::StringRef shdr_region;
331   uint32_t sh_name = 0x0;
332   uint32_t sh_type = 0x0;
333   uint32_t sh_flags = 0x0;
334   uint32_t sh_offset = 0x0;
335   uint32_t sh_size = 0x0;
336   uint32_t sh_link = 0x0;
337   uint32_t sh_info = 0x0;
338   uint32_t sh_addralign = 0x0;
339 
340   // if shnum and shstrtab overflow, the actual values are in the 1st shdr
341   if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
342     shdr_region =
343         pInput.memArea()->request(pInput.fileOffset() + shoff, shentsize);
344     shdr = reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
345 
346     if (llvm::sys::IsLittleEndianHost) {
347       sh_size = shdr->sh_size;
348       sh_link = shdr->sh_link;
349     } else {
350       sh_size = mcld::bswap32(shdr->sh_size);
351       sh_link = mcld::bswap32(shdr->sh_link);
352     }
353 
354     if (shnum == llvm::ELF::SHN_UNDEF)
355       shnum = sh_size;
356     if (shstrtab == llvm::ELF::SHN_XINDEX)
357       shstrtab = sh_link;
358 
359     shoff += shentsize;
360   }
361 
362   shdr_region =
363       pInput.memArea()->request(pInput.fileOffset() + shoff, shnum * shentsize);
364   const llvm::ELF::Elf32_Shdr* shdrTab =
365       reinterpret_cast<const llvm::ELF::Elf32_Shdr*>(shdr_region.begin());
366 
367   // get .shstrtab first
368   shdr = &shdrTab[shstrtab];
369   if (llvm::sys::IsLittleEndianHost) {
370     sh_offset = shdr->sh_offset;
371     sh_size = shdr->sh_size;
372   } else {
373     sh_offset = mcld::bswap32(shdr->sh_offset);
374     sh_size = mcld::bswap32(shdr->sh_size);
375   }
376 
377   llvm::StringRef sect_name_region =
378       pInput.memArea()->request(pInput.fileOffset() + sh_offset, sh_size);
379   const char* sect_name = sect_name_region.begin();
380 
381   LinkInfoList link_info_list;
382 
383   // create all LDSections, including first NULL section.
384   for (size_t idx = 0; idx < shnum; ++idx) {
385     if (llvm::sys::IsLittleEndianHost) {
386       sh_name = shdrTab[idx].sh_name;
387       sh_type = shdrTab[idx].sh_type;
388       sh_flags = shdrTab[idx].sh_flags;
389       sh_offset = shdrTab[idx].sh_offset;
390       sh_size = shdrTab[idx].sh_size;
391       sh_link = shdrTab[idx].sh_link;
392       sh_info = shdrTab[idx].sh_info;
393       sh_addralign = shdrTab[idx].sh_addralign;
394     } else {
395       sh_name = mcld::bswap32(shdrTab[idx].sh_name);
396       sh_type = mcld::bswap32(shdrTab[idx].sh_type);
397       sh_flags = mcld::bswap32(shdrTab[idx].sh_flags);
398       sh_offset = mcld::bswap32(shdrTab[idx].sh_offset);
399       sh_size = mcld::bswap32(shdrTab[idx].sh_size);
400       sh_link = mcld::bswap32(shdrTab[idx].sh_link);
401       sh_info = mcld::bswap32(shdrTab[idx].sh_info);
402       sh_addralign = mcld::bswap32(shdrTab[idx].sh_addralign);
403     }
404 
405     LDSection* section = IRBuilder::CreateELFHeader(
406         pInput, sect_name + sh_name, sh_type, sh_flags, sh_addralign);
407     section->setSize(sh_size);
408     section->setOffset(sh_offset);
409     section->setInfo(sh_info);
410 
411     if (sh_link != 0x0 || sh_info != 0x0) {
412       LinkInfo link_info = {section, sh_link, sh_info};
413       link_info_list.push_back(link_info);
414     }
415   }  // end of for
416 
417   // set up InfoLink
418   LinkInfoList::iterator info, infoEnd = link_info_list.end();
419   for (info = link_info_list.begin(); info != infoEnd; ++info) {
420     if (LDFileFormat::Relocation == info->section->kind())
421       info->section->setLink(pInput.context()->getSection(info->sh_info));
422     else
423       info->section->setLink(pInput.context()->getSection(info->sh_link));
424   }
425 
426   return true;
427 }
428 
429 /// readSignature - read a symbol from the given Input and index in symtab
430 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const431 ResolveInfo* ELFReader<32, true>::readSignature(Input& pInput,
432                                                 LDSection& pSymTab,
433                                                 uint32_t pSymIdx) const {
434   LDSection* symtab = &pSymTab;
435   LDSection* strtab = symtab->getLink();
436   assert(symtab != NULL && strtab != NULL);
437 
438   uint32_t offset = pInput.fileOffset() + symtab->offset() +
439                     sizeof(llvm::ELF::Elf32_Sym) * pSymIdx;
440   llvm::StringRef symbol_region =
441       pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf32_Sym));
442   const llvm::ELF::Elf32_Sym* entry =
443       reinterpret_cast<const llvm::ELF::Elf32_Sym*>(symbol_region.begin());
444 
445   uint32_t st_name = 0x0;
446   uint8_t st_info = 0x0;
447   uint8_t st_other = 0x0;
448   uint16_t st_shndx = 0x0;
449   st_info = entry->st_info;
450   st_other = entry->st_other;
451   if (llvm::sys::IsLittleEndianHost) {
452     st_name = entry->st_name;
453     st_shndx = entry->st_shndx;
454   } else {
455     st_name = mcld::bswap32(entry->st_name);
456     st_shndx = mcld::bswap16(entry->st_shndx);
457   }
458 
459   llvm::StringRef strtab_region = pInput.memArea()->request(
460       pInput.fileOffset() + strtab->offset(), strtab->size());
461 
462   // get ld_name
463   llvm::StringRef ld_name(strtab_region.begin() + st_name);
464 
465   ResolveInfo* result = ResolveInfo::Create(ld_name);
466   result->setSource(pInput.type() == Input::DynObj);
467   result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
468   result->setDesc(getSymDesc(st_shndx, pInput));
469   result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
470   result->setVisibility(getSymVisibility(st_other));
471 
472   return result;
473 }
474 
475 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const476 bool ELFReader<32, true>::readDynamic(Input& pInput) const {
477   assert(pInput.type() == Input::DynObj);
478   const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
479   if (dynamic_sect == NULL) {
480     fatal(diag::err_cannot_read_section) << ".dynamic";
481   }
482   const LDSection* dynstr_sect = dynamic_sect->getLink();
483   if (dynstr_sect == NULL) {
484     fatal(diag::err_cannot_read_section) << ".dynstr";
485   }
486 
487   llvm::StringRef dynamic_region = pInput.memArea()->request(
488       pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
489 
490   llvm::StringRef dynstr_region = pInput.memArea()->request(
491       pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
492 
493   const llvm::ELF::Elf32_Dyn* dynamic =
494       reinterpret_cast<const llvm::ELF::Elf32_Dyn*>(dynamic_region.begin());
495   const char* dynstr = dynstr_region.begin();
496   bool hasSOName = false;
497   size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf32_Dyn);
498 
499   for (size_t idx = 0; idx < numOfEntries; ++idx) {
500     llvm::ELF::Elf32_Sword d_tag = 0x0;
501     llvm::ELF::Elf32_Word d_val = 0x0;
502 
503     if (llvm::sys::IsLittleEndianHost) {
504       d_tag = dynamic[idx].d_tag;
505       d_val = dynamic[idx].d_un.d_val;
506     } else {
507       d_tag = mcld::bswap32(dynamic[idx].d_tag);
508       d_val = mcld::bswap32(dynamic[idx].d_un.d_val);
509     }
510 
511     switch (d_tag) {
512       case llvm::ELF::DT_SONAME:
513         assert(d_val < dynstr_sect->size());
514         pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
515         hasSOName = true;
516         break;
517       case llvm::ELF::DT_NEEDED:
518         // TODO:
519         break;
520       case llvm::ELF::DT_NULL:
521       default:
522         break;
523     }
524   }
525 
526   // if there is no SONAME in .dynamic, then set it from input path
527   if (!hasSOName)
528     pInput.setName(pInput.path().filename().native());
529 
530   return true;
531 }
532 
533 //===----------------------------------------------------------------------===//
534 // ELFReader<64, true>
535 //===----------------------------------------------------------------------===//
536 /// constructor
ELFReader(GNULDBackend & pBackend)537 ELFReader<64, true>::ELFReader(GNULDBackend& pBackend) : ELFReaderIF(pBackend) {
538 }
539 
540 /// destructor
~ELFReader()541 ELFReader<64, true>::~ELFReader() {
542 }
543 
544 /// isELF - is this a ELF file
isELF(const void * pELFHeader) const545 bool ELFReader<64, true>::isELF(const void* pELFHeader) const {
546   const llvm::ELF::Elf64_Ehdr* hdr =
547       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
548   if (memcmp(llvm::ELF::ElfMagic, hdr, 4) == 0)
549     return true;
550   return false;
551 }
552 
553 /// readRegularSection - read a regular section and create fragments.
readRegularSection(Input & pInput,SectionData & pSD) const554 bool ELFReader<64, true>::readRegularSection(Input& pInput,
555                                              SectionData& pSD) const {
556   uint64_t offset = pInput.fileOffset() + pSD.getSection().offset();
557   uint64_t size = pSD.getSection().size();
558 
559   Fragment* frag = IRBuilder::CreateRegion(pInput, offset, size);
560   ObjectBuilder::AppendFragment(*frag, pSD);
561   return true;
562 }
563 
564 /// readSymbols - read ELF symbols and create LDSymbol
readSymbols(Input & pInput,IRBuilder & pBuilder,llvm::StringRef pRegion,const char * pStrTab) const565 bool ELFReader<64, true>::readSymbols(Input& pInput,
566                                       IRBuilder& pBuilder,
567                                       llvm::StringRef pRegion,
568                                       const char* pStrTab) const {
569   // get number of symbols
570   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Sym);
571   const llvm::ELF::Elf64_Sym* symtab =
572       reinterpret_cast<const llvm::ELF::Elf64_Sym*>(pRegion.begin());
573 
574   uint32_t st_name = 0x0;
575   uint64_t st_value = 0x0;
576   uint64_t st_size = 0x0;
577   uint8_t st_info = 0x0;
578   uint8_t st_other = 0x0;
579   uint16_t st_shndx = 0x0;
580 
581   // skip the first NULL symbol
582   pInput.context()->addSymbol(LDSymbol::Null());
583 
584   /// recording symbols added from DynObj to analyze weak alias
585   std::vector<AliasInfo> potential_aliases;
586   bool is_dyn_obj = (pInput.type() == Input::DynObj);
587   for (size_t idx = 1; idx < entsize; ++idx) {
588     st_info = symtab[idx].st_info;
589     st_other = symtab[idx].st_other;
590 
591     if (llvm::sys::IsLittleEndianHost) {
592       st_name = symtab[idx].st_name;
593       st_value = symtab[idx].st_value;
594       st_size = symtab[idx].st_size;
595       st_shndx = symtab[idx].st_shndx;
596     } else {
597       st_name = mcld::bswap32(symtab[idx].st_name);
598       st_value = mcld::bswap64(symtab[idx].st_value);
599       st_size = mcld::bswap64(symtab[idx].st_size);
600       st_shndx = mcld::bswap16(symtab[idx].st_shndx);
601     }
602 
603     // If the section should not be included, set the st_shndx SHN_UNDEF
604     // - A section in interrelated groups are not included.
605     if (pInput.type() == Input::Object && st_shndx < llvm::ELF::SHN_LORESERVE &&
606         st_shndx != llvm::ELF::SHN_UNDEF) {
607       if (pInput.context()->getSection(st_shndx) == NULL)
608         st_shndx = llvm::ELF::SHN_UNDEF;
609     }
610 
611     // get ld_type
612     ResolveInfo::Type ld_type = getSymType(st_info, st_shndx);
613 
614     // get ld_desc
615     ResolveInfo::Desc ld_desc = getSymDesc(st_shndx, pInput);
616 
617     // get ld_binding
618     ResolveInfo::Binding ld_binding =
619         getSymBinding((st_info >> 4), st_shndx, st_other);
620 
621     // get ld_value - ld_value must be section relative.
622     uint64_t ld_value = getSymValue(st_value, st_shndx, pInput);
623 
624     // get ld_vis
625     ResolveInfo::Visibility ld_vis = getSymVisibility(st_other);
626 
627     // get section
628     LDSection* section = NULL;
629     if (st_shndx < llvm::ELF::SHN_LORESERVE)  // including ABS and COMMON
630       section = pInput.context()->getSection(st_shndx);
631 
632     // get ld_name
633     std::string ld_name;
634     if (ResolveInfo::Section == ld_type) {
635       // Section symbol's st_name is the section index.
636       assert(section != NULL && "get a invalid section");
637       ld_name = section->name();
638     } else {
639       ld_name = std::string(pStrTab + st_name);
640     }
641 
642     LDSymbol* psym = pBuilder.AddSymbol(pInput,
643                                         ld_name,
644                                         ld_type,
645                                         ld_desc,
646                                         ld_binding,
647                                         st_size,
648                                         ld_value,
649                                         section,
650                                         ld_vis);
651 
652     if (is_dyn_obj && psym != NULL && ResolveInfo::Undefined != ld_desc &&
653         (ResolveInfo::Global == ld_binding ||
654          ResolveInfo::Weak == ld_binding) &&
655         ResolveInfo::Object == ld_type) {
656       AliasInfo p;
657       p.pt_alias = psym;
658       p.ld_binding = ld_binding;
659       p.ld_value = ld_value;
660       potential_aliases.push_back(p);
661     }
662   }  // end of for loop
663 
664   // analyze weak alias here
665   if (is_dyn_obj) {
666     // sort symbols by symbol value and then weak before strong
667     std::sort(potential_aliases.begin(), potential_aliases.end(), less);
668 
669     // for each weak symbol, find out all its aliases, and
670     // then link them as a circular list in Module
671     std::vector<AliasInfo>::iterator sym_it, sym_e;
672     sym_e = potential_aliases.end();
673     for (sym_it = potential_aliases.begin(); sym_it != sym_e; ++sym_it) {
674       if (ResolveInfo::Weak != sym_it->ld_binding)
675         continue;
676 
677       Module& pModule = pBuilder.getModule();
678       std::vector<AliasInfo>::iterator alias_it = sym_it + 1;
679       while (alias_it != sym_e) {
680         if (sym_it->ld_value != alias_it->ld_value)
681           break;
682 
683         if (sym_it + 1 == alias_it)
684           pModule.CreateAliasList(*sym_it->pt_alias->resolveInfo());
685         pModule.addAlias(*alias_it->pt_alias->resolveInfo());
686         ++alias_it;
687       }
688 
689       sym_it = alias_it - 1;
690     }  // end of for loop
691   }
692   return true;
693 }
694 
695 //===----------------------------------------------------------------------===//
696 // ELFReader::read relocations - read ELF rela and rel, and create Relocation
697 //===----------------------------------------------------------------------===//
698 /// ELFReader::readRela - read ELF rela and create Relocation
readRela(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const699 bool ELFReader<64, true>::readRela(Input& pInput,
700                                    LDSection& pSection,
701                                    llvm::StringRef pRegion) const {
702   // get the number of rela
703   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rela);
704   const llvm::ELF::Elf64_Rela* relaTab =
705       reinterpret_cast<const llvm::ELF::Elf64_Rela*>(pRegion.begin());
706 
707   for (size_t idx = 0; idx < entsize; ++idx) {
708     Relocation::Type r_type = 0x0;
709     uint32_t r_sym = 0x0;
710     uint64_t r_offset = 0x0;
711     int64_t r_addend = 0;
712     if (!target()
713              .readRelocation(relaTab[idx], r_type, r_sym, r_offset, r_addend)) {
714       return false;
715     }
716 
717     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
718     if (symbol == NULL) {
719       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
720     }
721 
722     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset, r_addend);
723   }  // end of for
724   return true;
725 }
726 
727 /// readRel - read ELF rel and create Relocation
readRel(Input & pInput,LDSection & pSection,llvm::StringRef pRegion) const728 bool ELFReader<64, true>::readRel(Input& pInput,
729                                   LDSection& pSection,
730                                   llvm::StringRef pRegion) const {
731   // get the number of rel
732   size_t entsize = pRegion.size() / sizeof(llvm::ELF::Elf64_Rel);
733   const llvm::ELF::Elf64_Rel* relTab =
734       reinterpret_cast<const llvm::ELF::Elf64_Rel*>(pRegion.begin());
735 
736   for (size_t idx = 0; idx < entsize; ++idx) {
737     Relocation::Type r_type = 0x0;
738     uint32_t r_sym = 0x0;
739     uint64_t r_offset = 0x0;
740     if (!target().readRelocation(relTab[idx], r_type, r_sym, r_offset))
741       return false;
742 
743     LDSymbol* symbol = pInput.context()->getSymbol(r_sym);
744     if (symbol == NULL) {
745       fatal(diag::err_cannot_read_symbol) << r_sym << pInput.path();
746     }
747 
748     IRBuilder::AddRelocation(pSection, r_type, *symbol, r_offset);
749   }  // end of for
750   return true;
751 }
752 
753 /// isMyEndian - is this ELF file in the same endian to me?
isMyEndian(const void * pELFHeader) const754 bool ELFReader<64, true>::isMyEndian(const void* pELFHeader) const {
755   const llvm::ELF::Elf64_Ehdr* hdr =
756       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
757 
758   return (hdr->e_ident[llvm::ELF::EI_DATA] == llvm::ELF::ELFDATA2LSB);
759 }
760 
761 /// isMyMachine - is this ELF file generated for the same machine.
isMyMachine(const void * pELFHeader) const762 bool ELFReader<64, true>::isMyMachine(const void* pELFHeader) const {
763   const llvm::ELF::Elf64_Ehdr* hdr =
764       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
765 
766   if (llvm::sys::IsLittleEndianHost)
767     return (hdr->e_machine == target().getInfo().machine());
768   return (mcld::bswap16(hdr->e_machine) == target().getInfo().machine());
769 }
770 
771 /// fileType - return the file type
fileType(const void * pELFHeader) const772 Input::Type ELFReader<64, true>::fileType(const void* pELFHeader) const {
773   const llvm::ELF::Elf64_Ehdr* hdr =
774       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
775   uint32_t type = 0x0;
776   if (llvm::sys::IsLittleEndianHost)
777     type = hdr->e_type;
778   else
779     type = mcld::bswap16(hdr->e_type);
780 
781   switch (type) {
782     case llvm::ELF::ET_REL:
783       return Input::Object;
784     case llvm::ELF::ET_EXEC:
785       return Input::Exec;
786     case llvm::ELF::ET_DYN:
787       return Input::DynObj;
788     case llvm::ELF::ET_CORE:
789       return Input::CoreFile;
790     case llvm::ELF::ET_NONE:
791     default:
792       return Input::Unknown;
793   }
794 }
795 
796 /// readSectionHeaders - read ELF section header table and create LDSections
readSectionHeaders(Input & pInput,const void * pELFHeader) const797 bool ELFReader<64, true>::readSectionHeaders(Input& pInput,
798                                              const void* pELFHeader) const {
799   const llvm::ELF::Elf64_Ehdr* ehdr =
800       reinterpret_cast<const llvm::ELF::Elf64_Ehdr*>(pELFHeader);
801 
802   uint64_t shoff = 0x0;
803   uint16_t shentsize = 0x0;
804   uint32_t shnum = 0x0;
805   uint32_t shstrtab = 0x0;
806 
807   if (llvm::sys::IsLittleEndianHost) {
808     shoff = ehdr->e_shoff;
809     shentsize = ehdr->e_shentsize;
810     shnum = ehdr->e_shnum;
811     shstrtab = ehdr->e_shstrndx;
812   } else {
813     shoff = mcld::bswap64(ehdr->e_shoff);
814     shentsize = mcld::bswap16(ehdr->e_shentsize);
815     shnum = mcld::bswap16(ehdr->e_shnum);
816     shstrtab = mcld::bswap16(ehdr->e_shstrndx);
817   }
818 
819   // If the file has no section header table, e_shoff holds zero.
820   if (shoff == 0x0)
821     return true;
822 
823   const llvm::ELF::Elf64_Shdr* shdr = NULL;
824   llvm::StringRef shdr_region;
825   uint32_t sh_name = 0x0;
826   uint32_t sh_type = 0x0;
827   uint64_t sh_flags = 0x0;
828   uint64_t sh_offset = 0x0;
829   uint64_t sh_size = 0x0;
830   uint32_t sh_link = 0x0;
831   uint32_t sh_info = 0x0;
832   uint64_t sh_addralign = 0x0;
833 
834   // if shnum and shstrtab overflow, the actual values are in the 1st shdr
835   if (shnum == llvm::ELF::SHN_UNDEF || shstrtab == llvm::ELF::SHN_XINDEX) {
836     shdr_region =
837         pInput.memArea()->request(pInput.fileOffset() + shoff, shentsize);
838     shdr = reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
839 
840     if (llvm::sys::IsLittleEndianHost) {
841       sh_size = shdr->sh_size;
842       sh_link = shdr->sh_link;
843     } else {
844       sh_size = mcld::bswap64(shdr->sh_size);
845       sh_link = mcld::bswap32(shdr->sh_link);
846     }
847 
848     if (shnum == llvm::ELF::SHN_UNDEF)
849       shnum = sh_size;
850     if (shstrtab == llvm::ELF::SHN_XINDEX)
851       shstrtab = sh_link;
852 
853     shoff += shentsize;
854   }
855 
856   shdr_region =
857       pInput.memArea()->request(pInput.fileOffset() + shoff, shnum * shentsize);
858   const llvm::ELF::Elf64_Shdr* shdrTab =
859       reinterpret_cast<const llvm::ELF::Elf64_Shdr*>(shdr_region.begin());
860 
861   // get .shstrtab first
862   shdr = &shdrTab[shstrtab];
863   if (llvm::sys::IsLittleEndianHost) {
864     sh_offset = shdr->sh_offset;
865     sh_size = shdr->sh_size;
866   } else {
867     sh_offset = mcld::bswap64(shdr->sh_offset);
868     sh_size = mcld::bswap64(shdr->sh_size);
869   }
870 
871   llvm::StringRef sect_name_region =
872       pInput.memArea()->request(pInput.fileOffset() + sh_offset, sh_size);
873   const char* sect_name = sect_name_region.begin();
874 
875   LinkInfoList link_info_list;
876 
877   // create all LDSections, including first NULL section.
878   for (size_t idx = 0; idx < shnum; ++idx) {
879     if (llvm::sys::IsLittleEndianHost) {
880       sh_name = shdrTab[idx].sh_name;
881       sh_type = shdrTab[idx].sh_type;
882       sh_flags = shdrTab[idx].sh_flags;
883       sh_offset = shdrTab[idx].sh_offset;
884       sh_size = shdrTab[idx].sh_size;
885       sh_link = shdrTab[idx].sh_link;
886       sh_info = shdrTab[idx].sh_info;
887       sh_addralign = shdrTab[idx].sh_addralign;
888     } else {
889       sh_name = mcld::bswap32(shdrTab[idx].sh_name);
890       sh_type = mcld::bswap32(shdrTab[idx].sh_type);
891       sh_flags = mcld::bswap64(shdrTab[idx].sh_flags);
892       sh_offset = mcld::bswap64(shdrTab[idx].sh_offset);
893       sh_size = mcld::bswap64(shdrTab[idx].sh_size);
894       sh_link = mcld::bswap32(shdrTab[idx].sh_link);
895       sh_info = mcld::bswap32(shdrTab[idx].sh_info);
896       sh_addralign = mcld::bswap64(shdrTab[idx].sh_addralign);
897     }
898 
899     LDSection* section = IRBuilder::CreateELFHeader(
900         pInput, sect_name + sh_name, sh_type, sh_flags, sh_addralign);
901     section->setSize(sh_size);
902     section->setOffset(sh_offset);
903     section->setInfo(sh_info);
904 
905     if (sh_link != 0x0 || sh_info != 0x0) {
906       LinkInfo link_info = {section, sh_link, sh_info};
907       link_info_list.push_back(link_info);
908     }
909   }  // end of for
910 
911   // set up InfoLink
912   LinkInfoList::iterator info, infoEnd = link_info_list.end();
913   for (info = link_info_list.begin(); info != infoEnd; ++info) {
914     if (LDFileFormat::Relocation == info->section->kind())
915       info->section->setLink(pInput.context()->getSection(info->sh_info));
916     else
917       info->section->setLink(pInput.context()->getSection(info->sh_link));
918   }
919 
920   return true;
921 }
922 
923 /// readSignature - read a symbol from the given Input and index in symtab
924 /// This is used to get the signature of a group section.
readSignature(Input & pInput,LDSection & pSymTab,uint32_t pSymIdx) const925 ResolveInfo* ELFReader<64, true>::readSignature(Input& pInput,
926                                                 LDSection& pSymTab,
927                                                 uint32_t pSymIdx) const {
928   LDSection* symtab = &pSymTab;
929   LDSection* strtab = symtab->getLink();
930   assert(symtab != NULL && strtab != NULL);
931 
932   uint64_t offset = pInput.fileOffset() + symtab->offset() +
933                     sizeof(llvm::ELF::Elf64_Sym) * pSymIdx;
934   llvm::StringRef symbol_region =
935       pInput.memArea()->request(offset, sizeof(llvm::ELF::Elf64_Sym));
936   const llvm::ELF::Elf64_Sym* entry =
937       reinterpret_cast<const llvm::ELF::Elf64_Sym*>(symbol_region.begin());
938 
939   uint32_t st_name = 0x0;
940   uint8_t st_info = 0x0;
941   uint8_t st_other = 0x0;
942   uint16_t st_shndx = 0x0;
943   st_info = entry->st_info;
944   st_other = entry->st_other;
945   if (llvm::sys::IsLittleEndianHost) {
946     st_name = entry->st_name;
947     st_shndx = entry->st_shndx;
948   } else {
949     st_name = mcld::bswap32(entry->st_name);
950     st_shndx = mcld::bswap16(entry->st_shndx);
951   }
952 
953   llvm::StringRef strtab_region = pInput.memArea()->request(
954       pInput.fileOffset() + strtab->offset(), strtab->size());
955 
956   // get ld_name
957   llvm::StringRef ld_name(strtab_region.begin() + st_name);
958 
959   ResolveInfo* result = ResolveInfo::Create(ld_name);
960   result->setSource(pInput.type() == Input::DynObj);
961   result->setType(static_cast<ResolveInfo::Type>(st_info & 0xF));
962   result->setDesc(getSymDesc(st_shndx, pInput));
963   result->setBinding(getSymBinding((st_info >> 4), st_shndx, st_other));
964   result->setVisibility(getSymVisibility(st_other));
965 
966   return result;
967 }
968 
969 /// readDynamic - read ELF .dynamic in input dynobj
readDynamic(Input & pInput) const970 bool ELFReader<64, true>::readDynamic(Input& pInput) const {
971   assert(pInput.type() == Input::DynObj);
972   const LDSection* dynamic_sect = pInput.context()->getSection(".dynamic");
973   if (dynamic_sect == NULL) {
974     fatal(diag::err_cannot_read_section) << ".dynamic";
975   }
976   const LDSection* dynstr_sect = dynamic_sect->getLink();
977   if (dynstr_sect == NULL) {
978     fatal(diag::err_cannot_read_section) << ".dynstr";
979   }
980 
981   llvm::StringRef dynamic_region = pInput.memArea()->request(
982       pInput.fileOffset() + dynamic_sect->offset(), dynamic_sect->size());
983 
984   llvm::StringRef dynstr_region = pInput.memArea()->request(
985       pInput.fileOffset() + dynstr_sect->offset(), dynstr_sect->size());
986 
987   const llvm::ELF::Elf64_Dyn* dynamic =
988       reinterpret_cast<const llvm::ELF::Elf64_Dyn*>(dynamic_region.begin());
989   const char* dynstr = dynstr_region.begin();
990   bool hasSOName = false;
991   size_t numOfEntries = dynamic_sect->size() / sizeof(llvm::ELF::Elf64_Dyn);
992 
993   for (size_t idx = 0; idx < numOfEntries; ++idx) {
994     llvm::ELF::Elf64_Sxword d_tag = 0x0;
995     llvm::ELF::Elf64_Xword d_val = 0x0;
996 
997     if (llvm::sys::IsLittleEndianHost) {
998       d_tag = dynamic[idx].d_tag;
999       d_val = dynamic[idx].d_un.d_val;
1000     } else {
1001       d_tag = mcld::bswap64(dynamic[idx].d_tag);
1002       d_val = mcld::bswap64(dynamic[idx].d_un.d_val);
1003     }
1004 
1005     switch (d_tag) {
1006       case llvm::ELF::DT_SONAME:
1007         assert(d_val < dynstr_sect->size());
1008         pInput.setName(sys::fs::Path(dynstr + d_val).filename().native());
1009         hasSOName = true;
1010         break;
1011       case llvm::ELF::DT_NEEDED:
1012         // TODO:
1013         break;
1014       case llvm::ELF::DT_NULL:
1015       default:
1016         break;
1017     }
1018   }
1019 
1020   // if there is no SONAME in .dynamic, then set it from input path
1021   if (!hasSOName)
1022     pInput.setName(pInput.path().filename().native());
1023 
1024   return true;
1025 }
1026 
1027 }  // namespace mcld
1028